You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
42 lines
1.3 KiB
42 lines
1.3 KiB
#!/usr/bin/env python3 |
|
# SPDX-License-Identifier: GPL-3.0-or-later |
|
|
|
# Copyright (c) 2022 Samuel L Sloniker |
|
# |
|
# This program is free software: you can redistribute it and/or modify it under |
|
# the terms of the GNU General Public License as published by the Free Software |
|
# Foundation, either version 3 of the License, or (at your option) any later |
|
# version. |
|
# |
|
# This program is distributed in the hope that it will be useful, but WITHOUT |
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
|
# details. |
|
# |
|
# You should have received a copy of the GNU General Public License along with |
|
# this program. If not, see <https://www.gnu.org/licenses/>. |
|
|
|
import sqlite3 |
|
import tomli |
|
import gptc |
|
|
|
with open("compiler.toml", "rb") as f: |
|
config = tomli.load(f) |
|
|
|
con = sqlite3.connect("articles.db") |
|
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);") |
|
|
|
raw_model = [ |
|
{"text": i[0], "category": i[1]} |
|
for i in con.execute("SELECT text, category FROM articles;") |
|
] |
|
|
|
with open("model.gptc", "w+b") as f: |
|
gptc.compile( |
|
raw_model, |
|
max_ngram_length=config["max_ngram_length"], |
|
min_count=config["min_count"], |
|
).serialize(f) |
|
|
|
con.commit() |
|
con.close()
|
|
|