Compare commits

...

4 Commits

Author SHA1 Message Date
af5f3c3df1
Split download into download and compile 2022-11-24 20:46:05 -08:00
a96d474e37
Update gitignore 2022-11-24 20:23:43 -08:00
ee8189d476
Update stats script 2022-11-24 19:48:54 -08:00
1d2cfab68c
More sources 2022-11-24 19:41:40 -08:00
6 changed files with 40 additions and 16 deletions

1
.gitignore vendored
View File

@ -139,4 +139,5 @@ cython_debug/
# Model # Model
*.db *.db
*.db-journal
*.gptc *.gptc

View File

@ -9,6 +9,7 @@ Inclusion of a site in this model is not an endorsement of the site.
### Left ### Left
* ABC News
* The Atlantic * The Atlantic
* CBS News * CBS News
* CNBC * CNBC
@ -21,6 +22,7 @@ Inclusion of a site in this model is not an endorsement of the site.
* American Thinker * American Thinker
* Breitbart * Breitbart
* Daily Caller
* Epoch Times * Epoch Times
* The Federalist * The Federalist
* Fox News * Fox News

18
compile.py Normal file
View File

@ -0,0 +1,18 @@
import sqlite3
import gptc
con = sqlite3.connect("articles.db")
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
raw_model = [
{"text": i[0], "category": i[1]}
for i in con.execute("SELECT text, category FROM articles;")
]
with open("model.gptc", "w+b") as f:
f.write(
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
)
con.commit()
con.close()

View File

@ -1,9 +1,7 @@
import feedparser import feedparser
import hashlib
import sqlite3 import sqlite3
import goose3 import goose3
import tomli import tomli
import gptc
import bs4 import bs4
@ -85,17 +83,4 @@ try:
print("Not enough information. Skipping.") print("Not enough information. Skipping.")
finally: finally:
con.commit() con.commit()
print("Compiling model...")
raw_model = [
{"text": i[0], "category": i[1]}
for i in con.execute("SELECT text, category FROM articles;")
]
with open("model.gptc", "w+b") as f:
f.write(
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
)
con.close() con.close()

View File

@ -40,6 +40,11 @@ feed="https://www.theatlantic.com/feed/all/"
category="left" category="left"
contains_articles=true contains_articles=true
[abc_news]
feed="https://abcnews.go.com/abcnews/usheadlines"
category="left"
contains_articles=false
#[bloomberg] #[bloomberg]
#feed="https://www.bloomberg.com/politics/feeds/site.xml" #feed="https://www.bloomberg.com/politics/feeds/site.xml"
#category="left" #category="left"
@ -94,6 +99,11 @@ feed="https://notthebee.com/feed"
category="right" category="right"
contains_articles=false contains_articles=false
[daily_caller]
feed="https://feeds.feedburner.com/dailycaller"
category="right"
contains_articles=false
#[newsmax] #[newsmax]
#feed="https://www.newsmax.com/rss/Newsfront/16/" #feed="https://www.newsmax.com/rss/Newsfront/16/"
#category="right" #category="right"

View File

@ -7,7 +7,11 @@ right=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'right'"
left_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'left'" | sort | uniq) left_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'left'" | sort | uniq)
right_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'right'" | sort | uniq) right_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'right'" | sort | uniq)
echo "This model contains a total of $total articles ($left left, $right right)." total_source_count=$(sqlite3 articles.db "SELECT source FROM articles" | sort | uniq | wc -l)
left_source_count=$(echo $left_sources | wc -w)
right_source_count=$(echo $right_sources | wc -w)
echo "This model contains a total of $total articles from $total_source_count sources."
echo "" echo ""
echo "## Left" echo "## Left"
echo "" echo ""
@ -15,8 +19,12 @@ for i in $left_sources; do
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)" echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
done done
echo "" echo ""
echo "Left total: $left articles from $left_source_count sources"
echo ""
echo "## Right" echo "## Right"
echo "" echo ""
for i in $right_sources; do for i in $right_sources; do
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)" echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
done done
echo ""
echo "Right total: $right articles from $right_source_count sources"