Update project, add model and converter
This commit is contained in:
parent
9ed9b82bf7
commit
fc03c8c866
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
__pycache__
|
53
convert_raw_model.py
Normal file
53
convert_raw_model.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import gptc.tokenizer
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def assemble_text(category_1, category_2, raw_model):
|
||||||
|
category_1_words = []
|
||||||
|
category_2_words = []
|
||||||
|
|
||||||
|
for text_dict in raw_model:
|
||||||
|
if text_dict["category"] == category_1:
|
||||||
|
words = category_1_words
|
||||||
|
elif text_dict["category"] == category_2:
|
||||||
|
words = category_2_words
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
words += gptc.tokenizer.tokenize(text_dict["text"])
|
||||||
|
|
||||||
|
category_1_words.append("")
|
||||||
|
category_2_words.append("")
|
||||||
|
|
||||||
|
return "\n".join(category_1_words), "\n".join(category_2_words)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"cat_1_name", help="the name of category 1 in the model"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"cat_2_name", help="the name of category 2 in the model"
|
||||||
|
)
|
||||||
|
parser.add_argument("model_path", help="path to raw model in JSON format")
|
||||||
|
parser.add_argument(
|
||||||
|
"cat_1_file", help="path to file to write category 1 words to"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"cat_2_file", help="path to file to write category 2 words to"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with open(args.model_path) as f:
|
||||||
|
raw_model = json.load(f)
|
||||||
|
|
||||||
|
cat1, cat2 = assemble_text(args.cat_1_name, args.cat_2_name, raw_model)
|
||||||
|
|
||||||
|
with open(args.cat_1_file, "w+") as f:
|
||||||
|
f.write(cat1)
|
||||||
|
|
||||||
|
with open(args.cat_2_file, "w+") as f:
|
||||||
|
f.write(cat2)
|
11
convert_text.py
Normal file
11
convert_text.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
import gptc.tokenizer
|
||||||
|
|
||||||
|
words = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
words += gptc.tokenizer.tokenize(input())
|
||||||
|
except EOFError:
|
||||||
|
break
|
||||||
|
|
||||||
|
print("\n".join(words))
|
BIN
gptc_scratch.sb3
BIN
gptc_scratch.sb3
Binary file not shown.
5235
model/compiled/All words.txt
Normal file
5235
model/compiled/All words.txt
Normal file
File diff suppressed because it is too large
Load Diff
5235
model/compiled/Word scores in Category 1.txt
Normal file
5235
model/compiled/Word scores in Category 1.txt
Normal file
File diff suppressed because it is too large
Load Diff
5235
model/compiled/Word scores in Category 2.txt
Normal file
5235
model/compiled/Word scores in Category 2.txt
Normal file
File diff suppressed because it is too large
Load Diff
15624
model/raw/category_1_words_twain.txt
Normal file
15624
model/raw/category_1_words_twain.txt
Normal file
File diff suppressed because it is too large
Load Diff
15561
model/raw/category_2_words_shakespeare.txt
Normal file
15561
model/raw/category_2_words_shakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user