diff --git a/utils/pack.py b/utils/pack.py index 707cfd6..b2f7589 100644 --- a/utils/pack.py +++ b/utils/pack.py @@ -4,28 +4,38 @@ import sys import os import json + +def pack(directory, print_exceptions=True): + paths = os.listdir(directory) + texts = {} + exceptions = [] + + for path in paths: + texts[path] = [] + try: + for file in os.listdir(os.path.join(sys.argv[1], path)): + try: + with open(os.path.join(sys.argv[1], path, file)) as f: + texts[path].append(f.read()) + except Exception as e: + exceptions.append((e,)) + if print_exceptions: + print(e, file=sys.stderr) + except Exception as e: + exceptions.append((e,)) + if print_exceptions: + print(e, file=sys.stderr) + + raw_model = [] + + for category, cat_texts in texts.items(): + raw_model += [{"category": category, "text": i} for i in cat_texts] + + return raw_model, exceptions + + if len(sys.argv) != 2: print("usage: pack.py ", file=sys.stderr) exit(1) -paths = os.listdir(sys.argv[1]) -texts = {} - -for path in paths: - texts[path] = [] - try: - for file in os.listdir(os.path.join(sys.argv[1], path)): - try: - with open(os.path.join(sys.argv[1], path, file)) as f: - texts[path].append(f.read()) - except Exception as e: - print(e, file=sys.stderr) - except Exception as e: - print(e, file=sys.stderr) - -raw_model = [] - -for category, cat_texts in texts.items(): - raw_model += [{"category": category, "text": i} for i in cat_texts] - -print(json.dumps(raw_model)) +print(json.dumps(pack(sys.argv[1])[0]))