Browse Source

Update project, add model and converter

master
Samuel Sloniker 2 years ago
parent
commit
fc03c8c866
  1. 1
      .gitignore
  2. 53
      convert_raw_model.py
  3. 11
      convert_text.py
  4. BIN
      gptc_scratch.sb3
  5. 5235
      model/compiled/All words.txt
  6. 5235
      model/compiled/Word scores in Category 1.txt
  7. 5235
      model/compiled/Word scores in Category 2.txt
  8. 15624
      model/raw/category_1_words_twain.txt
  9. 15561
      model/raw/category_2_words_shakespeare.txt

1
.gitignore vendored

@ -0,0 +1 @@
__pycache__

53
convert_raw_model.py

@ -0,0 +1,53 @@
import gptc.tokenizer
import json
def assemble_text(category_1, category_2, raw_model):
category_1_words = []
category_2_words = []
for text_dict in raw_model:
if text_dict["category"] == category_1:
words = category_1_words
elif text_dict["category"] == category_2:
words = category_2_words
else:
continue
words += gptc.tokenizer.tokenize(text_dict["text"])
category_1_words.append("")
category_2_words.append("")
return "\n".join(category_1_words), "\n".join(category_2_words)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"cat_1_name", help="the name of category 1 in the model"
)
parser.add_argument(
"cat_2_name", help="the name of category 2 in the model"
)
parser.add_argument("model_path", help="path to raw model in JSON format")
parser.add_argument(
"cat_1_file", help="path to file to write category 1 words to"
)
parser.add_argument(
"cat_2_file", help="path to file to write category 2 words to"
)
args = parser.parse_args()
with open(args.model_path) as f:
raw_model = json.load(f)
cat1, cat2 = assemble_text(args.cat_1_name, args.cat_2_name, raw_model)
with open(args.cat_1_file, "w+") as f:
f.write(cat1)
with open(args.cat_2_file, "w+") as f:
f.write(cat2)

11
convert_text.py

@ -0,0 +1,11 @@
import gptc.tokenizer
words = []
while True:
try:
words += gptc.tokenizer.tokenize(input())
except EOFError:
break
print("\n".join(words))

BIN
gptc_scratch.sb3

Binary file not shown.

5235
model/compiled/All words.txt

File diff suppressed because it is too large Load Diff

5235
model/compiled/Word scores in Category 1.txt

File diff suppressed because it is too large Load Diff

5235
model/compiled/Word scores in Category 2.txt

File diff suppressed because it is too large Load Diff

15624
model/raw/category_1_words_twain.txt

File diff suppressed because it is too large Load Diff

15561
model/raw/category_2_words_shakespeare.txt

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save