2022-03-05 10:17:17 -08:00
|
|
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
|
|
|
|
2021-10-27 19:26:32 -07:00
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import json
|
|
|
|
|
|
|
|
if len(sys.argv) != 2:
|
2022-03-05 09:42:52 -08:00
|
|
|
print("usage: pack.py <path>", file=sys.stderr)
|
2021-10-27 19:26:32 -07:00
|
|
|
exit(1)
|
|
|
|
|
|
|
|
paths = os.listdir(sys.argv[1])
|
|
|
|
texts = {}
|
|
|
|
|
|
|
|
for path in paths:
|
|
|
|
texts[path] = []
|
|
|
|
try:
|
|
|
|
for file in os.listdir(os.path.join(sys.argv[1], path)):
|
|
|
|
try:
|
|
|
|
with open(os.path.join(sys.argv[1], path, file)) as f:
|
|
|
|
texts[path].append(f.read())
|
|
|
|
except Exception as e:
|
|
|
|
print(e, file=sys.stderr)
|
|
|
|
except Exception as e:
|
|
|
|
print(e, file=sys.stderr)
|
|
|
|
|
|
|
|
raw_model = []
|
|
|
|
|
|
|
|
for category, cat_texts in texts.items():
|
2022-03-05 09:42:52 -08:00
|
|
|
raw_model += [{"category": category, "text": i} for i in cat_texts]
|
2021-10-27 19:26:32 -07:00
|
|
|
|
|
|
|
print(json.dumps(raw_model))
|