#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-3.0-or-later # Copyright (c) 2022 Samuel L Sloniker # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . import sqlite3 import tomli import gptc with open("compiler.toml", "rb") as f: config = tomli.load(f) con = sqlite3.connect("articles.db") con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);") raw_model = [ {"text": i[0], "category": i[1]} for i in con.execute("SELECT text, category FROM articles;") ] with open("model.gptc", "w+b") as f: gptc.compile( raw_model, max_ngram_length=config["max_ngram_length"], min_count=config["min_count"], ).serialize(f) con.commit() con.close()