Update project, add model and converter

This commit is contained in:
Samuel Sloniker 2022-07-06 13:07:37 -07:00
parent 9ed9b82bf7
commit fc03c8c866
9 changed files with 46955 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
__pycache__

53
convert_raw_model.py Normal file
View File

@ -0,0 +1,53 @@
import gptc.tokenizer
import json
def assemble_text(category_1, category_2, raw_model):
category_1_words = []
category_2_words = []
for text_dict in raw_model:
if text_dict["category"] == category_1:
words = category_1_words
elif text_dict["category"] == category_2:
words = category_2_words
else:
continue
words += gptc.tokenizer.tokenize(text_dict["text"])
category_1_words.append("")
category_2_words.append("")
return "\n".join(category_1_words), "\n".join(category_2_words)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"cat_1_name", help="the name of category 1 in the model"
)
parser.add_argument(
"cat_2_name", help="the name of category 2 in the model"
)
parser.add_argument("model_path", help="path to raw model in JSON format")
parser.add_argument(
"cat_1_file", help="path to file to write category 1 words to"
)
parser.add_argument(
"cat_2_file", help="path to file to write category 2 words to"
)
args = parser.parse_args()
with open(args.model_path) as f:
raw_model = json.load(f)
cat1, cat2 = assemble_text(args.cat_1_name, args.cat_2_name, raw_model)
with open(args.cat_1_file, "w+") as f:
f.write(cat1)
with open(args.cat_2_file, "w+") as f:
f.write(cat2)

11
convert_text.py Normal file
View File

@ -0,0 +1,11 @@
import gptc.tokenizer
words = []
while True:
try:
words += gptc.tokenizer.tokenize(input())
except EOFError:
break
print("\n".join(words))

Binary file not shown.

5235
model/compiled/All words.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff