Compare commits
3 Commits
f8dbc78b82
...
822aa7d1fd
Author | SHA1 | Date | |
---|---|---|---|
822aa7d1fd | |||
8417c8acda | |||
ec7f4116fc |
|
@ -27,9 +27,10 @@ argument, and it treats the input as a single token or ngram.
|
||||||
|
|
||||||
### Compiling models
|
### Compiling models
|
||||||
|
|
||||||
gptc compile [-n <max_ngram_length>] [-c <min_count>] <raw model file>
|
gptc compile [-n <max_ngram_length>] [-c <min_count>] <raw model file> <compiled model file>
|
||||||
|
|
||||||
This will print the compiled model encoded in binary format to stdout.
|
This will write the compiled model encoded in binary format to `<compiled model
|
||||||
|
file>`.
|
||||||
|
|
||||||
If `-c` is specified, words and ngrams used less than `min_count` times will be
|
If `-c` is specified, words and ngrams used less than `min_count` times will be
|
||||||
excluded from the compiled model.
|
excluded from the compiled model.
|
||||||
|
|
|
@ -13,8 +13,13 @@ def main() -> None:
|
||||||
)
|
)
|
||||||
subparsers = parser.add_subparsers(dest="subparser_name", required=True)
|
subparsers = parser.add_subparsers(dest="subparser_name", required=True)
|
||||||
|
|
||||||
compile_parser = subparsers.add_parser("compile", help="compile a raw model")
|
compile_parser = subparsers.add_parser(
|
||||||
|
"compile", help="compile a raw model"
|
||||||
|
)
|
||||||
compile_parser.add_argument("model", help="raw model to compile")
|
compile_parser.add_argument("model", help="raw model to compile")
|
||||||
|
compile_parser.add_argument(
|
||||||
|
"out", help="name of file to write compiled model to"
|
||||||
|
)
|
||||||
compile_parser.add_argument(
|
compile_parser.add_argument(
|
||||||
"--max-ngram-length",
|
"--max-ngram-length",
|
||||||
"-n",
|
"-n",
|
||||||
|
@ -53,11 +58,15 @@ def main() -> None:
|
||||||
action="store_true",
|
action="store_true",
|
||||||
)
|
)
|
||||||
|
|
||||||
check_parser = subparsers.add_parser("check", help="check one word or ngram in model")
|
check_parser = subparsers.add_parser(
|
||||||
|
"check", help="check one word or ngram in model"
|
||||||
|
)
|
||||||
check_parser.add_argument("model", help="compiled model to use")
|
check_parser.add_argument("model", help="compiled model to use")
|
||||||
check_parser.add_argument("token", help="token or ngram to check")
|
check_parser.add_argument("token", help="token or ngram to check")
|
||||||
|
|
||||||
pack_parser = subparsers.add_parser("pack", help="pack a model from a directory")
|
pack_parser = subparsers.add_parser(
|
||||||
|
"pack", help="pack a model from a directory"
|
||||||
|
)
|
||||||
pack_parser.add_argument("model", help="directory containing model")
|
pack_parser.add_argument("model", help="directory containing model")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
@ -66,7 +75,10 @@ def main() -> None:
|
||||||
with open(args.model, "r") as f:
|
with open(args.model, "r") as f:
|
||||||
model = json.load(f)
|
model = json.load(f)
|
||||||
|
|
||||||
gptc.compile(model, args.max_ngram_length, args.min_count).serialize(sys.stdout.buffer)
|
with open(args.out, "wb+") as f:
|
||||||
|
gptc.compile(
|
||||||
|
model, args.max_ngram_length, args.min_count
|
||||||
|
).serialize(f)
|
||||||
elif args.subparser_name == "classify":
|
elif args.subparser_name == "classify":
|
||||||
with open(args.model, "rb") as f:
|
with open(args.model, "rb") as f:
|
||||||
model = gptc.deserialize(f)
|
model = gptc.deserialize(f)
|
||||||
|
@ -76,7 +88,6 @@ def main() -> None:
|
||||||
else:
|
else:
|
||||||
text = sys.stdin.read()
|
text = sys.stdin.read()
|
||||||
|
|
||||||
|
|
||||||
if args.category:
|
if args.category:
|
||||||
classifier = gptc.Classifier(model, args.max_ngram_length)
|
classifier = gptc.Classifier(model, args.max_ngram_length)
|
||||||
print(classifier.classify(text))
|
print(classifier.classify(text))
|
||||||
|
|
Binary file not shown.
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "gptc"
|
name = "gptc"
|
||||||
version = "3.1.1"
|
version = "4.0.0"
|
||||||
description = "General-purpose text classifier"
|
description = "General-purpose text classifier"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
authors = [{ name = "Samuel Sloniker", email = "sam@kj7rrv.com"}]
|
authors = [{ name = "Samuel Sloniker", email = "sam@kj7rrv.com"}]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user