Add CLI for Model.get()

2022-11-26 18:26:52 -08:00 · 2022-11-26 18:26:52 -08:00 · b1228edd9c
commit b1228edd9c
parent 25192ffddf
2 changed files with 15 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -25,6 +25,13 @@ Alternatively, if you only need the most likely category, you can use this:
 This will prompt for a string and classify it, outputting the category on
 stdout (or "None" if it cannot determine anything).

+### Checking individual words or ngrams
+
+    gptc check <compiled model file> <token or ngram>
+
+This is very similar to `gptc classify`, except it takes the input as an
+argument, and it treats the input as a single token or ngram.
+
 ### Compiling models

    gptc compile [-n <max_ngram_length>] [-c <min_count>] <raw model file>
--- a/gptc/main.py
+++ b/gptc/main.py
@ -53,6 +53,10 @@ def main() -> None:
        action="store_true",
    )

+    check_parser = subparsers.add_parser("check", help="check one word or ngram in model")
+    check_parser.add_argument("model", help="compiled model to use")
+    check_parser.add_argument("token", help="token or ngram to check")
+
    pack_parser = subparsers.add_parser("pack", help="pack a model from a directory")
    pack_parser.add_argument("model", help="directory containing model")

@ -81,6 +85,10 @@ def main() -> None:
        else:
            probabilities = model.confidence(text, args.max_ngram_length)
            print(json.dumps(probabilities))
+    elif args.subparser_name == "check":
+        with open(args.model, "rb") as f:
+            model = gptc.deserialize(f.read())
+        print(json.dumps(model.get(args.token)))
    else:
        print(json.dumps(gptc.pack(args.model, True)[0]))