Compare commits

..

No commits in common. "822aa7d1fdd06da7475938d8b8edaf38a3ea84f0" and "f8dbc78b8231ae551f6ec61514644518190e5e21" have entirely different histories.

4 changed files with 8 additions and 20 deletions

View File

@ -27,10 +27,9 @@ argument, and it treats the input as a single token or ngram.
### Compiling models ### Compiling models
gptc compile [-n <max_ngram_length>] [-c <min_count>] <raw model file> <compiled model file> gptc compile [-n <max_ngram_length>] [-c <min_count>] <raw model file>
This will write the compiled model encoded in binary format to `<compiled model This will print the compiled model encoded in binary format to stdout.
file>`.
If `-c` is specified, words and ngrams used less than `min_count` times will be If `-c` is specified, words and ngrams used less than `min_count` times will be
excluded from the compiled model. excluded from the compiled model.

View File

@ -13,13 +13,8 @@ def main() -> None:
) )
subparsers = parser.add_subparsers(dest="subparser_name", required=True) subparsers = parser.add_subparsers(dest="subparser_name", required=True)
compile_parser = subparsers.add_parser( compile_parser = subparsers.add_parser("compile", help="compile a raw model")
"compile", help="compile a raw model"
)
compile_parser.add_argument("model", help="raw model to compile") compile_parser.add_argument("model", help="raw model to compile")
compile_parser.add_argument(
"out", help="name of file to write compiled model to"
)
compile_parser.add_argument( compile_parser.add_argument(
"--max-ngram-length", "--max-ngram-length",
"-n", "-n",
@ -58,15 +53,11 @@ def main() -> None:
action="store_true", action="store_true",
) )
check_parser = subparsers.add_parser( check_parser = subparsers.add_parser("check", help="check one word or ngram in model")
"check", help="check one word or ngram in model"
)
check_parser.add_argument("model", help="compiled model to use") check_parser.add_argument("model", help="compiled model to use")
check_parser.add_argument("token", help="token or ngram to check") check_parser.add_argument("token", help="token or ngram to check")
pack_parser = subparsers.add_parser( pack_parser = subparsers.add_parser("pack", help="pack a model from a directory")
"pack", help="pack a model from a directory"
)
pack_parser.add_argument("model", help="directory containing model") pack_parser.add_argument("model", help="directory containing model")
args = parser.parse_args() args = parser.parse_args()
@ -75,10 +66,7 @@ def main() -> None:
with open(args.model, "r") as f: with open(args.model, "r") as f:
model = json.load(f) model = json.load(f)
with open(args.out, "wb+") as f: gptc.compile(model, args.max_ngram_length, args.min_count).serialize(sys.stdout.buffer)
gptc.compile(
model, args.max_ngram_length, args.min_count
).serialize(f)
elif args.subparser_name == "classify": elif args.subparser_name == "classify":
with open(args.model, "rb") as f: with open(args.model, "rb") as f:
model = gptc.deserialize(f) model = gptc.deserialize(f)
@ -88,6 +76,7 @@ def main() -> None:
else: else:
text = sys.stdin.read() text = sys.stdin.read()
if args.category: if args.category:
classifier = gptc.Classifier(model, args.max_ngram_length) classifier = gptc.Classifier(model, args.max_ngram_length)
print(classifier.classify(text)) print(classifier.classify(text))

Binary file not shown.

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "gptc" name = "gptc"
version = "4.0.0" version = "3.1.1"
description = "General-purpose text classifier" description = "General-purpose text classifier"
readme = "README.md" readme = "README.md"
authors = [{ name = "Samuel Sloniker", email = "sam@kj7rrv.com"}] authors = [{ name = "Samuel Sloniker", email = "sam@kj7rrv.com"}]