diff --git a/README.md b/README.md index a22febc..798e97b 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,13 @@ Alternatively, if you only need the most likely category, you can use this: This will prompt for a string and classify it, outputting the category on stdout (or "None" if it cannot determine anything). +### Checking individual words or ngrams + + gptc check + +This is very similar to `gptc classify`, except it takes the input as an +argument, and it treats the input as a single token or ngram. + ### Compiling models gptc compile [-n ] [-c ] diff --git a/gptc/__main__.py b/gptc/__main__.py index 11cd795..32ec754 100644 --- a/gptc/__main__.py +++ b/gptc/__main__.py @@ -53,6 +53,10 @@ def main() -> None: action="store_true", ) + check_parser = subparsers.add_parser("check", help="check one word or ngram in model") + check_parser.add_argument("model", help="compiled model to use") + check_parser.add_argument("token", help="token or ngram to check") + pack_parser = subparsers.add_parser("pack", help="pack a model from a directory") pack_parser.add_argument("model", help="directory containing model") @@ -81,6 +85,10 @@ def main() -> None: else: probabilities = model.confidence(text, args.max_ngram_length) print(json.dumps(probabilities)) + elif args.subparser_name == "check": + with open(args.model, "rb") as f: + model = gptc.deserialize(f.read()) + print(json.dumps(model.get(args.token))) else: print(json.dumps(gptc.pack(args.model, True)[0]))