diff --git a/compile.py b/compile.py new file mode 100644 index 0000000..9bdd158 --- /dev/null +++ b/compile.py @@ -0,0 +1,18 @@ +import sqlite3 +import gptc + +con = sqlite3.connect("articles.db") +con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);") + +raw_model = [ + {"text": i[0], "category": i[1]} + for i in con.execute("SELECT text, category FROM articles;") +] + +with open("model.gptc", "w+b") as f: + f.write( + gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize() + ) + +con.commit() +con.close() diff --git a/download.py b/download.py index 619159d..1f68e52 100644 --- a/download.py +++ b/download.py @@ -1,9 +1,7 @@ import feedparser -import hashlib import sqlite3 import goose3 import tomli -import gptc import bs4 @@ -85,17 +83,4 @@ try: print("Not enough information. Skipping.") finally: con.commit() - - print("Compiling model...") - - raw_model = [ - {"text": i[0], "category": i[1]} - for i in con.execute("SELECT text, category FROM articles;") - ] - - with open("model.gptc", "w+b") as f: - f.write( - gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize() - ) - con.close()