gptc_scratch/convert_raw_model.py

import gptc.tokenizer
import json


def assemble_text(category_1, category_2, raw_model):
    category_1_words = []
    category_2_words = []

    for text_dict in raw_model:
        if text_dict["category"] == category_1:
            words = category_1_words
        elif text_dict["category"] == category_2:
            words = category_2_words
        else:
            continue

        words += gptc.tokenizer.tokenize(text_dict["text"])

    category_1_words.append("")
    category_2_words.append("")

    return "\n".join(category_1_words), "\n".join(category_2_words)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "cat_1_name", help="the name of category 1 in the model"
    )
    parser.add_argument(
        "cat_2_name", help="the name of category 2 in the model"
    )
    parser.add_argument("model_path", help="path to raw model in JSON format")
    parser.add_argument(
        "cat_1_file", help="path to file to write category 1 words to"
    )
    parser.add_argument(
        "cat_2_file", help="path to file to write category 2 words to"
    )
    args = parser.parse_args()

    with open(args.model_path) as f:
        raw_model = json.load(f)

    cat1, cat2 = assemble_text(args.cat_1_name, args.cat_2_name, raw_model)

    with open(args.cat_1_file, "w+") as f:
        f.write(cat1)

    with open(args.cat_2_file, "w+") as f:
        f.write(cat2)
Update project, add model and converter 2 years ago			`import gptc.tokenizer`
			`import json`


			`def assemble_text(category_1, category_2, raw_model):`
			`category_1_words = []`
			`category_2_words = []`

			`for text_dict in raw_model:`
			`if text_dict["category"] == category_1:`
			`words = category_1_words`
			`elif text_dict["category"] == category_2:`
			`words = category_2_words`
			`else:`
			`continue`

			`words += gptc.tokenizer.tokenize(text_dict["text"])`

			`category_1_words.append("")`
			`category_2_words.append("")`

			`return "\n".join(category_1_words), "\n".join(category_2_words)`


			`if __name__ == "__main__":`
			`import argparse`

			`parser = argparse.ArgumentParser()`
			`parser.add_argument(`
			`"cat_1_name", help="the name of category 1 in the model"`
			`)`
			`parser.add_argument(`
			`"cat_2_name", help="the name of category 2 in the model"`
			`)`
			`parser.add_argument("model_path", help="path to raw model in JSON format")`
			`parser.add_argument(`
			`"cat_1_file", help="path to file to write category 1 words to"`
			`)`
			`parser.add_argument(`
			`"cat_2_file", help="path to file to write category 2 words to"`
			`)`
			`args = parser.parse_args()`

			`with open(args.model_path) as f:`
			`raw_model = json.load(f)`

			`cat1, cat2 = assemble_text(args.cat_1_name, args.cat_2_name, raw_model)`

			`with open(args.cat_1_file, "w+") as f:`
			`f.write(cat1)`

			`with open(args.cat_2_file, "w+") as f:`
			`f.write(cat2)`