Split download into download and compile
This commit is contained in:
parent
a96d474e37
commit
af5f3c3df1
18
compile.py
Normal file
18
compile.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
import sqlite3
|
||||||
|
import gptc
|
||||||
|
|
||||||
|
con = sqlite3.connect("articles.db")
|
||||||
|
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
||||||
|
|
||||||
|
raw_model = [
|
||||||
|
{"text": i[0], "category": i[1]}
|
||||||
|
for i in con.execute("SELECT text, category FROM articles;")
|
||||||
|
]
|
||||||
|
|
||||||
|
with open("model.gptc", "w+b") as f:
|
||||||
|
f.write(
|
||||||
|
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
|
||||||
|
)
|
||||||
|
|
||||||
|
con.commit()
|
||||||
|
con.close()
|
15
download.py
15
download.py
|
@ -1,9 +1,7 @@
|
||||||
import feedparser
|
import feedparser
|
||||||
import hashlib
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import goose3
|
import goose3
|
||||||
import tomli
|
import tomli
|
||||||
import gptc
|
|
||||||
import bs4
|
import bs4
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,17 +83,4 @@ try:
|
||||||
print("Not enough information. Skipping.")
|
print("Not enough information. Skipping.")
|
||||||
finally:
|
finally:
|
||||||
con.commit()
|
con.commit()
|
||||||
|
|
||||||
print("Compiling model...")
|
|
||||||
|
|
||||||
raw_model = [
|
|
||||||
{"text": i[0], "category": i[1]}
|
|
||||||
for i in con.execute("SELECT text, category FROM articles;")
|
|
||||||
]
|
|
||||||
|
|
||||||
with open("model.gptc", "w+b") as f:
|
|
||||||
f.write(
|
|
||||||
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
|
|
||||||
)
|
|
||||||
|
|
||||||
con.close()
|
con.close()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user