Compare commits
4 Commits
c5cc6d78f9
...
af5f3c3df1
Author | SHA1 | Date | |
---|---|---|---|
af5f3c3df1 | |||
a96d474e37 | |||
ee8189d476 | |||
1d2cfab68c |
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -139,4 +139,5 @@ cython_debug/
|
||||||
|
|
||||||
# Model
|
# Model
|
||||||
*.db
|
*.db
|
||||||
|
*.db-journal
|
||||||
*.gptc
|
*.gptc
|
||||||
|
|
|
@ -9,6 +9,7 @@ Inclusion of a site in this model is not an endorsement of the site.
|
||||||
|
|
||||||
### Left
|
### Left
|
||||||
|
|
||||||
|
* ABC News
|
||||||
* The Atlantic
|
* The Atlantic
|
||||||
* CBS News
|
* CBS News
|
||||||
* CNBC
|
* CNBC
|
||||||
|
@ -21,6 +22,7 @@ Inclusion of a site in this model is not an endorsement of the site.
|
||||||
|
|
||||||
* American Thinker
|
* American Thinker
|
||||||
* Breitbart
|
* Breitbart
|
||||||
|
* Daily Caller
|
||||||
* Epoch Times
|
* Epoch Times
|
||||||
* The Federalist
|
* The Federalist
|
||||||
* Fox News
|
* Fox News
|
||||||
|
|
18
compile.py
Normal file
18
compile.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
import sqlite3
|
||||||
|
import gptc
|
||||||
|
|
||||||
|
con = sqlite3.connect("articles.db")
|
||||||
|
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
||||||
|
|
||||||
|
raw_model = [
|
||||||
|
{"text": i[0], "category": i[1]}
|
||||||
|
for i in con.execute("SELECT text, category FROM articles;")
|
||||||
|
]
|
||||||
|
|
||||||
|
with open("model.gptc", "w+b") as f:
|
||||||
|
f.write(
|
||||||
|
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
|
||||||
|
)
|
||||||
|
|
||||||
|
con.commit()
|
||||||
|
con.close()
|
15
download.py
15
download.py
|
@ -1,9 +1,7 @@
|
||||||
import feedparser
|
import feedparser
|
||||||
import hashlib
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import goose3
|
import goose3
|
||||||
import tomli
|
import tomli
|
||||||
import gptc
|
|
||||||
import bs4
|
import bs4
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,17 +83,4 @@ try:
|
||||||
print("Not enough information. Skipping.")
|
print("Not enough information. Skipping.")
|
||||||
finally:
|
finally:
|
||||||
con.commit()
|
con.commit()
|
||||||
|
|
||||||
print("Compiling model...")
|
|
||||||
|
|
||||||
raw_model = [
|
|
||||||
{"text": i[0], "category": i[1]}
|
|
||||||
for i in con.execute("SELECT text, category FROM articles;")
|
|
||||||
]
|
|
||||||
|
|
||||||
with open("model.gptc", "w+b") as f:
|
|
||||||
f.write(
|
|
||||||
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
|
|
||||||
)
|
|
||||||
|
|
||||||
con.close()
|
con.close()
|
||||||
|
|
10
sources.toml
10
sources.toml
|
@ -40,6 +40,11 @@ feed="https://www.theatlantic.com/feed/all/"
|
||||||
category="left"
|
category="left"
|
||||||
contains_articles=true
|
contains_articles=true
|
||||||
|
|
||||||
|
[abc_news]
|
||||||
|
feed="https://abcnews.go.com/abcnews/usheadlines"
|
||||||
|
category="left"
|
||||||
|
contains_articles=false
|
||||||
|
|
||||||
#[bloomberg]
|
#[bloomberg]
|
||||||
#feed="https://www.bloomberg.com/politics/feeds/site.xml"
|
#feed="https://www.bloomberg.com/politics/feeds/site.xml"
|
||||||
#category="left"
|
#category="left"
|
||||||
|
@ -94,6 +99,11 @@ feed="https://notthebee.com/feed"
|
||||||
category="right"
|
category="right"
|
||||||
contains_articles=false
|
contains_articles=false
|
||||||
|
|
||||||
|
[daily_caller]
|
||||||
|
feed="https://feeds.feedburner.com/dailycaller"
|
||||||
|
category="right"
|
||||||
|
contains_articles=false
|
||||||
|
|
||||||
#[newsmax]
|
#[newsmax]
|
||||||
#feed="https://www.newsmax.com/rss/Newsfront/16/"
|
#feed="https://www.newsmax.com/rss/Newsfront/16/"
|
||||||
#category="right"
|
#category="right"
|
||||||
|
|
10
stats.sh
10
stats.sh
|
@ -7,7 +7,11 @@ right=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'right'"
|
||||||
left_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'left'" | sort | uniq)
|
left_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'left'" | sort | uniq)
|
||||||
right_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'right'" | sort | uniq)
|
right_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'right'" | sort | uniq)
|
||||||
|
|
||||||
echo "This model contains a total of $total articles ($left left, $right right)."
|
total_source_count=$(sqlite3 articles.db "SELECT source FROM articles" | sort | uniq | wc -l)
|
||||||
|
left_source_count=$(echo $left_sources | wc -w)
|
||||||
|
right_source_count=$(echo $right_sources | wc -w)
|
||||||
|
|
||||||
|
echo "This model contains a total of $total articles from $total_source_count sources."
|
||||||
echo ""
|
echo ""
|
||||||
echo "## Left"
|
echo "## Left"
|
||||||
echo ""
|
echo ""
|
||||||
|
@ -15,8 +19,12 @@ for i in $left_sources; do
|
||||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
|
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
|
||||||
done
|
done
|
||||||
echo ""
|
echo ""
|
||||||
|
echo "Left total: $left articles from $left_source_count sources"
|
||||||
|
echo ""
|
||||||
echo "## Right"
|
echo "## Right"
|
||||||
echo ""
|
echo ""
|
||||||
for i in $right_sources; do
|
for i in $right_sources; do
|
||||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
|
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
|
||||||
done
|
done
|
||||||
|
echo ""
|
||||||
|
echo "Right total: $right articles from $right_source_count sources"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user