Split download into download and compile

This commit is contained in:
Samuel Sloniker 2022-11-24 20:46:05 -08:00
parent a96d474e37
commit af5f3c3df1
Signed by: kj7rrv
GPG Key ID: 1BB4029E66285A62
2 changed files with 18 additions and 15 deletions

18
compile.py Normal file
View File

@ -0,0 +1,18 @@
import sqlite3
import gptc
con = sqlite3.connect("articles.db")
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
raw_model = [
{"text": i[0], "category": i[1]}
for i in con.execute("SELECT text, category FROM articles;")
]
with open("model.gptc", "w+b") as f:
f.write(
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
)
con.commit()
con.close()

View File

@ -1,9 +1,7 @@
import feedparser
import hashlib
import sqlite3
import goose3
import tomli
import gptc
import bs4
@ -85,17 +83,4 @@ try:
print("Not enough information. Skipping.")
finally:
con.commit()
print("Compiling model...")
raw_model = [
{"text": i[0], "category": i[1]}
for i in con.execute("SELECT text, category FROM articles;")
]
with open("model.gptc", "w+b") as f:
f.write(
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
)
con.close()