commit 108a43603bfb9165152896c81b771f96011de7fb
parent 55cd2b3271de4ee25a1fe5d984c83c6b2a1d41cc
Author: Stefan Koch <programming@stefan-koch.name>
Date: Sun, 12 Nov 2023 15:16:04 +0100
add a command to download all models
Diffstat:
4 files changed, 40 insertions(+), 10 deletions(-)
diff --git a/Dockerfile b/Dockerfile
@@ -12,19 +12,16 @@ COPY pyproject.toml /opt/nlparrot/
COPY src/nlparrot /opt/nlparrot/src/nlparrot
RUN pip install -e /opt/nlparrot
-# installs a Python package, thus before creation of user
-# Alternative installation approaches, maybe more suited to an installation
-# that also works well outside a Docker container are:
-# - installation via pip
-# - installation from within Python (would then be the same as stanza)
-# cf. https://spacy.io/usage/models#download-pip
+# TODO: hr_core_news_sm would also be installed by the "download-models" call below,
+# but without a virtualenv (which I'd like to avoid in the container) this requires root.
+# On the other hand, the stanza models must be installed as the user used to run
+# nlparrot later, not as root.
RUN python -m spacy download hr_core_news_sm
RUN useradd --create-home nlparrot
WORKDIR /home/nlparrot
USER nlparrot
-# after creation of user, so that it downloads into the user home directory
-RUN python -c 'import stanza; [stanza.download(model) for model in ["multilingual", "en", "es", "hr"]]'
+RUN python /opt/nlparrot/src/nlparrot/cli.py download-models
-CMD ["python", "/opt/nlparrot/src/nlparrot/server.py"]
+CMD ["python", "/opt/nlparrot/src/nlparrot/cli.py", "run"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,6 +3,7 @@ name = "nlparrot"
version = "0.0.6"
dependencies = [
'classla',
+ 'click',
'pyphen',
'spacy',
'stanza',
@@ -21,6 +22,9 @@ package-dir = {"" = "src"}
[tool.setuptools.packages.find]
where = ["src"]
+[project.scripts]
+nlparrot = "nlparrot.cli:cli"
+
[tool.black]
line-length = 120
diff --git a/src/nlparrot/cli.py b/src/nlparrot/cli.py
@@ -0,0 +1,29 @@
+import click
+import spacy
+import stanza
+
+from nlparrot.server import run_server
+
+
+@click.group()
+def cli():
+ pass
+
+
+@cli.command()
+def download_models():
+ click.echo('Downloading stanza models')
+ for model in ["multilingual", "en", "es", "hr"]:
+ stanza.download(model)
+
+ click.echo('Downloading spacy models')
+ spacy.cli.download('hr_core_news_sm')
+
+
+@cli.command()
+def run():
+ run_server()
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/src/nlparrot/server.py b/src/nlparrot/server.py
@@ -18,7 +18,7 @@ def get_listen_address():
return {"address": socket_path, "family": "AF_UNIX"}
-if __name__ == "__main__":
+def run_server():
print("Starting server ...")
current_language = None