2022-11-25 09:29:39 -08:00
|
|
|
#!/usr/bin/env python3
|
2022-11-25 09:35:27 -08:00
|
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
|
|
|
|
# Copyright (c) 2022 Samuel L Sloniker
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify it under
|
|
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
|
|
# Foundation, either version 3 of the License, or (at your option) any later
|
|
|
|
# version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
|
|
# details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License along with
|
2022-11-26 13:05:37 -08:00
|
|
|
# this program. If not, see <https://www.gnu.org/licenses/>.
|
2022-11-25 09:29:39 -08:00
|
|
|
|
2022-11-24 20:46:05 -08:00
|
|
|
import sqlite3
|
2022-11-26 13:05:37 -08:00
|
|
|
import tomli
|
2022-11-24 20:46:05 -08:00
|
|
|
import gptc
|
|
|
|
|
2022-11-26 13:05:37 -08:00
|
|
|
with open("compiler.toml", "rb") as f:
|
|
|
|
config = tomli.load(f)
|
|
|
|
|
2022-11-24 20:46:05 -08:00
|
|
|
con = sqlite3.connect("articles.db")
|
|
|
|
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
|
|
|
|
|
|
|
raw_model = [
|
|
|
|
{"text": i[0], "category": i[1]}
|
|
|
|
for i in con.execute("SELECT text, category FROM articles;")
|
|
|
|
]
|
|
|
|
|
|
|
|
with open("model.gptc", "w+b") as f:
|
2022-12-24 12:29:00 -08:00
|
|
|
gptc.compile(
|
|
|
|
raw_model,
|
|
|
|
max_ngram_length=config["max_ngram_length"],
|
|
|
|
min_count=config["min_count"],
|
|
|
|
).serialize(f)
|
2022-11-24 20:46:05 -08:00
|
|
|
|
|
|
|
con.commit()
|
|
|
|
con.close()
|