gptc/utils/pack.py

42 lines
1.1 KiB
Python
Raw Normal View History

2022-03-05 10:17:17 -08:00
# SPDX-License-Identifier: LGPL-3.0-or-later
2021-10-27 19:26:32 -07:00
import sys
import os
import json
2022-05-21 13:09:53 -07:00
def pack(directory, print_exceptions=True):
paths = os.listdir(directory)
texts = {}
exceptions = []
for path in paths:
texts[path] = []
try:
for file in os.listdir(os.path.join(sys.argv[1], path)):
try:
with open(os.path.join(sys.argv[1], path, file)) as f:
texts[path].append(f.read())
except Exception as e:
exceptions.append((e,))
if print_exceptions:
print(e, file=sys.stderr)
except Exception as e:
exceptions.append((e,))
if print_exceptions:
2021-10-27 19:26:32 -07:00
print(e, file=sys.stderr)
2022-05-21 13:09:53 -07:00
raw_model = []
for category, cat_texts in texts.items():
raw_model += [{"category": category, "text": i} for i in cat_texts]
2021-10-27 19:26:32 -07:00
2022-05-21 13:09:53 -07:00
return raw_model, exceptions
if len(sys.argv) != 2:
print("usage: pack.py <path>", file=sys.stderr)
exit(1)
2021-10-27 19:26:32 -07:00
2022-05-21 13:09:53 -07:00
print(json.dumps(pack(sys.argv[1])[0]))