Bump version to 4.0.0

Recompile model
Include file name of output in arguments
2022-12-24 12:18:51 -08:00 · 2022-12-24 12:18:25 -08:00 · 2022-12-24 12:17:44 -08:00
4 changed files with 20 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -27,9 +27,10 @@ argument, and it treats the input as a single token or ngram.

 ### Compiling models

-    gptc compile [-n <max_ngram_length>] [-c <min_count>] <raw model file>
+    gptc compile [-n <max_ngram_length>] [-c <min_count>] <raw model file> <compiled model file>

-This will print the compiled model encoded in binary format to stdout.
+This will write the compiled model encoded in binary format to `<compiled model
+file>`.

 If `-c` is specified, words and ngrams used less than `min_count` times will be
 excluded from the compiled model.
--- a/gptc/main.py
+++ b/gptc/main.py
@ -13,8 +13,13 @@ def main() -> None:
    )
    subparsers = parser.add_subparsers(dest="subparser_name", required=True)

-    compile_parser = subparsers.add_parser("compile", help="compile a raw model")
+    compile_parser = subparsers.add_parser(
+        "compile", help="compile a raw model"
+    )
    compile_parser.add_argument("model", help="raw model to compile")
+    compile_parser.add_argument(
+        "out", help="name of file to write compiled model to"
+    )
    compile_parser.add_argument(
        "--max-ngram-length",
        "-n",
@ -53,11 +58,15 @@ def main() -> None:
        action="store_true",
    )

-    check_parser = subparsers.add_parser("check", help="check one word or ngram in model")
+    check_parser = subparsers.add_parser(
+        "check", help="check one word or ngram in model"
+    )
    check_parser.add_argument("model", help="compiled model to use")
    check_parser.add_argument("token", help="token or ngram to check")

-    pack_parser = subparsers.add_parser("pack", help="pack a model from a directory")
+    pack_parser = subparsers.add_parser(
+        "pack", help="pack a model from a directory"
+    )
    pack_parser.add_argument("model", help="directory containing model")

    args = parser.parse_args()
@ -66,7 +75,10 @@ def main() -> None:
        with open(args.model, "r") as f:
            model = json.load(f)

-        gptc.compile(model, args.max_ngram_length, args.min_count).serialize(sys.stdout.buffer)
+        with open(args.out, "wb+") as f:
+            gptc.compile(
+                model, args.max_ngram_length, args.min_count
+            ).serialize(f)
    elif args.subparser_name == "classify":
        with open(args.model, "rb") as f:
            model = gptc.deserialize(f)
@ -76,7 +88,6 @@ def main() -> None:
        else:
            text = sys.stdin.read()

-
        if args.category:
            classifier = gptc.Classifier(model, args.max_ngram_length)
            print(classifier.classify(text))
--- a/models/compiled.gptc
+++ b/models/compiled.gptc
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "gptc"
-version = "3.1.1"
+version = "4.0.0"
 description = "General-purpose text classifier"
 readme = "README.md"
 authors = [{ name = "Samuel Sloniker", email = "sam@kj7rrv.com"}]
Author	SHA1	Message	Date
Samuel Sloniker	822aa7d1fd	Bump version to 4.0.0	2022-12-24 12:18:51 -08:00
Samuel Sloniker	8417c8acda	Recompile model	2022-12-24 12:18:25 -08:00
Samuel Sloniker	ec7f4116fc	Include file name of output in arguments	2022-12-24 12:17:44 -08:00