nlparrot

natural language processing server
Log | Files | Refs | README | LICENSE

commit 108a43603bfb9165152896c81b771f96011de7fb
parent 55cd2b3271de4ee25a1fe5d984c83c6b2a1d41cc
Author: Stefan Koch <programming@stefan-koch.name>
Date:   Sun, 12 Nov 2023 15:16:04 +0100

add a command to download all models

Diffstat:
MDockerfile | 15++++++---------
Mpyproject.toml | 4++++
Asrc/nlparrot/cli.py | 29+++++++++++++++++++++++++++++
Msrc/nlparrot/server.py | 2+-
4 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile @@ -12,19 +12,16 @@ COPY pyproject.toml /opt/nlparrot/ COPY src/nlparrot /opt/nlparrot/src/nlparrot RUN pip install -e /opt/nlparrot -# installs a Python package, thus before creation of user -# Alternative installation approaches, maybe more suited to an installation -# that also works well outside a Docker container are: -# - installation via pip -# - installation from within Python (would then be the same as stanza) -# cf. https://spacy.io/usage/models#download-pip +# TODO: hr_core_news_sm would also be installed by the "download-models" call below, +# but without a virtualenv (which I'd like to avoid in the container) this requires root. +# On the other hand, the stanza models must be installed as the user used to run +# nlparrot later, not as root. RUN python -m spacy download hr_core_news_sm RUN useradd --create-home nlparrot WORKDIR /home/nlparrot USER nlparrot -# after creation of user, so that it downloads into the user home directory -RUN python -c 'import stanza; [stanza.download(model) for model in ["multilingual", "en", "es", "hr"]]' +RUN python /opt/nlparrot/src/nlparrot/cli.py download-models -CMD ["python", "/opt/nlparrot/src/nlparrot/server.py"] +CMD ["python", "/opt/nlparrot/src/nlparrot/cli.py", "run"] diff --git a/pyproject.toml b/pyproject.toml @@ -3,6 +3,7 @@ name = "nlparrot" version = "0.0.6" dependencies = [ 'classla', + 'click', 'pyphen', 'spacy', 'stanza', @@ -21,6 +22,9 @@ package-dir = {"" = "src"} [tool.setuptools.packages.find] where = ["src"] +[project.scripts] +nlparrot = "nlparrot.cli:cli" + [tool.black] line-length = 120 diff --git a/src/nlparrot/cli.py b/src/nlparrot/cli.py @@ -0,0 +1,29 @@ +import click +import spacy +import stanza + +from nlparrot.server import run_server + + +@click.group() +def cli(): + pass + + +@cli.command() +def download_models(): + click.echo('Downloading stanza models') + for model in ["multilingual", "en", "es", "hr"]: + stanza.download(model) + + click.echo('Downloading spacy models') + spacy.cli.download('hr_core_news_sm') + + +@cli.command() +def run(): + run_server() + + +if __name__ == '__main__': + cli() diff --git a/src/nlparrot/server.py b/src/nlparrot/server.py @@ -18,7 +18,7 @@ def get_listen_address(): return {"address": socket_path, "family": "AF_UNIX"} -if __name__ == "__main__": +def run_server(): print("Starting server ...") current_language = None