Samuel Sloniker
2 years ago
9 changed files with 46955 additions and 0 deletions
@ -0,0 +1,53 @@
|
||||
import gptc.tokenizer |
||||
import json |
||||
|
||||
|
||||
def assemble_text(category_1, category_2, raw_model): |
||||
category_1_words = [] |
||||
category_2_words = [] |
||||
|
||||
for text_dict in raw_model: |
||||
if text_dict["category"] == category_1: |
||||
words = category_1_words |
||||
elif text_dict["category"] == category_2: |
||||
words = category_2_words |
||||
else: |
||||
continue |
||||
|
||||
words += gptc.tokenizer.tokenize(text_dict["text"]) |
||||
|
||||
category_1_words.append("") |
||||
category_2_words.append("") |
||||
|
||||
return "\n".join(category_1_words), "\n".join(category_2_words) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
import argparse |
||||
|
||||
parser = argparse.ArgumentParser() |
||||
parser.add_argument( |
||||
"cat_1_name", help="the name of category 1 in the model" |
||||
) |
||||
parser.add_argument( |
||||
"cat_2_name", help="the name of category 2 in the model" |
||||
) |
||||
parser.add_argument("model_path", help="path to raw model in JSON format") |
||||
parser.add_argument( |
||||
"cat_1_file", help="path to file to write category 1 words to" |
||||
) |
||||
parser.add_argument( |
||||
"cat_2_file", help="path to file to write category 2 words to" |
||||
) |
||||
args = parser.parse_args() |
||||
|
||||
with open(args.model_path) as f: |
||||
raw_model = json.load(f) |
||||
|
||||
cat1, cat2 = assemble_text(args.cat_1_name, args.cat_2_name, raw_model) |
||||
|
||||
with open(args.cat_1_file, "w+") as f: |
||||
f.write(cat1) |
||||
|
||||
with open(args.cat_2_file, "w+") as f: |
||||
f.write(cat2) |
@ -0,0 +1,11 @@
|
||||
import gptc.tokenizer |
||||
|
||||
words = [] |
||||
|
||||
while True: |
||||
try: |
||||
words += gptc.tokenizer.tokenize(input()) |
||||
except EOFError: |
||||
break |
||||
|
||||
print("\n".join(words)) |
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue