Compare commits
6 Commits
9f3abd8641
...
5f3a2977f1
Author | SHA1 | Date | |
---|---|---|---|
5f3a2977f1 | |||
314bdef1c5 | |||
28f81c9a63 | |||
06190f5101 | |||
68c8949005 | |||
29d77b5393 |
|
@ -25,7 +25,9 @@ Inclusion of a site in this model is not an endorsement of the site.
|
||||||
* CNN
|
* CNN
|
||||||
* HuffPost (formerly Huffington Post)
|
* HuffPost (formerly Huffington Post)
|
||||||
* Los Angeles Times
|
* Los Angeles Times
|
||||||
|
* Slate
|
||||||
* PBS NewsHour
|
* PBS NewsHour
|
||||||
|
* The Washington Post
|
||||||
|
|
||||||
### Right
|
### Right
|
||||||
|
|
||||||
|
|
10
compile.py
10
compile.py
|
@ -17,8 +17,12 @@
|
||||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import tomli
|
||||||
import gptc
|
import gptc
|
||||||
|
|
||||||
|
with open("compiler.toml", "rb") as f:
|
||||||
|
config = tomli.load(f)
|
||||||
|
|
||||||
con = sqlite3.connect("articles.db")
|
con = sqlite3.connect("articles.db")
|
||||||
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
||||||
|
|
||||||
|
@ -29,7 +33,11 @@ raw_model = [
|
||||||
|
|
||||||
with open("model.gptc", "w+b") as f:
|
with open("model.gptc", "w+b") as f:
|
||||||
f.write(
|
f.write(
|
||||||
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
|
gptc.compile(
|
||||||
|
raw_model,
|
||||||
|
max_ngram_length=config["max_ngram_length"],
|
||||||
|
min_count=config["min_count"],
|
||||||
|
).serialize()
|
||||||
)
|
)
|
||||||
|
|
||||||
con.commit()
|
con.commit()
|
||||||
|
|
2
compiler.toml
Normal file
2
compiler.toml
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
max_ngram_length=3
|
||||||
|
min_count=5
|
161
sources.toml
161
sources.toml
|
@ -5,6 +5,31 @@
|
||||||
# ? Newsmax (read timeout errors)
|
# ? Newsmax (read timeout errors)
|
||||||
# ? Bloomberg (CAPTCHA on RSS feed?)
|
# ? Bloomberg (CAPTCHA on RSS feed?)
|
||||||
|
|
||||||
|
[abc_news]
|
||||||
|
feed="https://abcnews.go.com/abcnews/usheadlines"
|
||||||
|
category="left"
|
||||||
|
contains_articles=false
|
||||||
|
name="ABC News"
|
||||||
|
|
||||||
|
[atlantic]
|
||||||
|
feed="https://www.theatlantic.com/feed/all/"
|
||||||
|
category="left"
|
||||||
|
contains_articles=true
|
||||||
|
name="The Atlantic"
|
||||||
|
sort="Atlantic"
|
||||||
|
|
||||||
|
[cbs_news]
|
||||||
|
feed="https://www.cbsnews.com/latest/rss/main"
|
||||||
|
category="left"
|
||||||
|
contains_articles=false
|
||||||
|
name="CBS News"
|
||||||
|
|
||||||
|
[cnbc]
|
||||||
|
feed="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114"
|
||||||
|
category="left"
|
||||||
|
contains_articles=false
|
||||||
|
name="CNBC"
|
||||||
|
|
||||||
[cnn]
|
[cnn]
|
||||||
feed="http://rss.cnn.com/rss/cnn_latest.rss"
|
feed="http://rss.cnn.com/rss/cnn_latest.rss"
|
||||||
category="left"
|
category="left"
|
||||||
|
@ -17,43 +42,6 @@ category="left"
|
||||||
contains_articles=false
|
contains_articles=false
|
||||||
name="HuffPost"
|
name="HuffPost"
|
||||||
|
|
||||||
[cnbc]
|
|
||||||
feed="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114"
|
|
||||||
category="left"
|
|
||||||
contains_articles=false
|
|
||||||
name="CNBC"
|
|
||||||
|
|
||||||
[pbs_newshour]
|
|
||||||
feed="https://www.pbs.org/newshour/feeds/rss/headlines"
|
|
||||||
category="left"
|
|
||||||
contains_articles=false
|
|
||||||
name="PBS NewsHour"
|
|
||||||
|
|
||||||
[latimes]
|
|
||||||
feed="https://www.latimes.com/local/rss2.0.xml"
|
|
||||||
category="left"
|
|
||||||
contains_articles=false
|
|
||||||
name="Los Angeles Times"
|
|
||||||
|
|
||||||
[cbs_news]
|
|
||||||
feed="https://www.cbsnews.com/latest/rss/main"
|
|
||||||
category="left"
|
|
||||||
contains_articles=false
|
|
||||||
name="CBS News"
|
|
||||||
|
|
||||||
[atlantic]
|
|
||||||
feed="https://www.theatlantic.com/feed/all/"
|
|
||||||
category="left"
|
|
||||||
contains_articles=true
|
|
||||||
name="The Atlantic"
|
|
||||||
sort="Atlantic"
|
|
||||||
|
|
||||||
[abc_news]
|
|
||||||
feed="https://abcnews.go.com/abcnews/usheadlines"
|
|
||||||
category="left"
|
|
||||||
contains_articles=false
|
|
||||||
name="ABC News"
|
|
||||||
|
|
||||||
[intercept]
|
[intercept]
|
||||||
feed="https://theintercept.com/feed/?lang=en"
|
feed="https://theintercept.com/feed/?lang=en"
|
||||||
category="left"
|
category="left"
|
||||||
|
@ -61,45 +49,42 @@ contains_articles=true
|
||||||
name="The Intercept"
|
name="The Intercept"
|
||||||
sort="Intercept"
|
sort="Intercept"
|
||||||
|
|
||||||
|
[latimes]
|
||||||
|
feed="https://www.latimes.com/local/rss2.0.xml"
|
||||||
|
category="left"
|
||||||
|
contains_articles=false
|
||||||
|
name="Los Angeles Times"
|
||||||
|
|
||||||
|
[pbs_newshour]
|
||||||
|
feed="https://www.pbs.org/newshour/feeds/rss/headlines"
|
||||||
|
category="left"
|
||||||
|
contains_articles=false
|
||||||
|
name="PBS NewsHour"
|
||||||
|
|
||||||
|
[slate]
|
||||||
|
feed="http://www.slate.com/articles/news_and_politics.fulltext.all.10.rss"
|
||||||
|
category="left"
|
||||||
|
contains_articles=false
|
||||||
|
name="Slate"
|
||||||
|
|
||||||
|
[washington_post]
|
||||||
|
feed="https://feeds.washingtonpost.com/rss/national"
|
||||||
|
category="left"
|
||||||
|
contains_articles=false
|
||||||
|
name="The Washington Post"
|
||||||
|
sort="Washington Post"
|
||||||
|
|
||||||
#[bloomberg]
|
#[bloomberg]
|
||||||
#feed="https://www.bloomberg.com/politics/feeds/site.xml"
|
#feed="https://www.bloomberg.com/politics/feeds/site.xml"
|
||||||
#category="left"
|
#category="left"
|
||||||
#contains_articles=false
|
#contains_articles=false
|
||||||
|
|
||||||
[fox_news]
|
[american_conservative]
|
||||||
feed="https://moxie.foxnews.com/google-publisher/latest.xml"
|
feed="https://theamericanconservative.com/articles/feed/"
|
||||||
category="right"
|
category="right"
|
||||||
contains_articles=true
|
contains_articles=true
|
||||||
name="Fox News"
|
name="The American Conservative"
|
||||||
|
sort="American Conservative"
|
||||||
[oann]
|
|
||||||
feed="https://www.oann.com/category/newsroom/feed"
|
|
||||||
category="right"
|
|
||||||
contains_articles=true
|
|
||||||
name="One America News Network"
|
|
||||||
|
|
||||||
[nypost]
|
|
||||||
feed="https://nypost.com/feed"
|
|
||||||
category="right"
|
|
||||||
contains_articles=false
|
|
||||||
exclude=[
|
|
||||||
{ type="startswith", pattern="https://pagesix.com" },
|
|
||||||
{ type="startswith", pattern="https://decider.com" },
|
|
||||||
]
|
|
||||||
name="New York Post"
|
|
||||||
|
|
||||||
[federalist]
|
|
||||||
feed="https://thefederalist.com/feed"
|
|
||||||
category="right"
|
|
||||||
contains_articles=false
|
|
||||||
name="The Federalist"
|
|
||||||
sort="Federalist"
|
|
||||||
|
|
||||||
[washington_examiner]
|
|
||||||
feed="https://feeds.feedburner.com/dcexaminer/Politics"
|
|
||||||
category="right"
|
|
||||||
contains_articles=true
|
|
||||||
name="Washington Examiner"
|
|
||||||
|
|
||||||
[american_thinker]
|
[american_thinker]
|
||||||
feed="https://feeds.feedburner.com/americanthinker_articles"
|
feed="https://feeds.feedburner.com/americanthinker_articles"
|
||||||
|
@ -113,30 +98,54 @@ category="right"
|
||||||
contains_articles=false
|
contains_articles=false
|
||||||
name="Breitbart"
|
name="Breitbart"
|
||||||
|
|
||||||
|
[daily_caller]
|
||||||
|
feed="https://feeds.feedburner.com/dailycaller"
|
||||||
|
category="right"
|
||||||
|
contains_articles=false
|
||||||
|
name="Daily Caller"
|
||||||
|
|
||||||
[epoch_times]
|
[epoch_times]
|
||||||
feed="https://www.theepochtimes.com/feed/"
|
feed="https://www.theepochtimes.com/feed/"
|
||||||
category="right"
|
category="right"
|
||||||
contains_articles=false
|
contains_articles=false
|
||||||
name="Epoch Times"
|
name="Epoch Times"
|
||||||
|
|
||||||
|
[federalist]
|
||||||
|
feed="https://thefederalist.com/feed"
|
||||||
|
category="right"
|
||||||
|
contains_articles=false
|
||||||
|
name="The Federalist"
|
||||||
|
sort="Federalist"
|
||||||
|
|
||||||
|
[fox_news]
|
||||||
|
feed="https://moxie.foxnews.com/google-publisher/latest.xml"
|
||||||
|
category="right"
|
||||||
|
contains_articles=true
|
||||||
|
name="Fox News"
|
||||||
|
|
||||||
[not_the_bee]
|
[not_the_bee]
|
||||||
feed="https://notthebee.com/feed"
|
feed="https://notthebee.com/feed"
|
||||||
category="right"
|
category="right"
|
||||||
contains_articles=false
|
contains_articles=false
|
||||||
name="Not the Bee"
|
name="Not the Bee"
|
||||||
|
|
||||||
[daily_caller]
|
[nypost]
|
||||||
feed="https://feeds.feedburner.com/dailycaller"
|
feed="https://nypost.com/news/feed"
|
||||||
category="right"
|
category="right"
|
||||||
contains_articles=false
|
contains_articles=false
|
||||||
name="Daily Caller"
|
name="New York Post"
|
||||||
|
|
||||||
[american_conservative]
|
[oann]
|
||||||
feed="https://theamericanconservative.com/articles/feed/"
|
feed="https://www.oann.com/category/newsroom/feed"
|
||||||
category="right"
|
category="right"
|
||||||
contains_articles=true
|
contains_articles=true
|
||||||
name="The American Conservative"
|
name="One America News Network"
|
||||||
sort="American Conservative"
|
|
||||||
|
[washington_examiner]
|
||||||
|
feed="https://feeds.feedburner.com/dcexaminer/Politics"
|
||||||
|
category="right"
|
||||||
|
contains_articles=true
|
||||||
|
name="Washington Examiner"
|
||||||
|
|
||||||
#[newsmax]
|
#[newsmax]
|
||||||
#feed="https://www.newsmax.com/rss/Newsfront/16/"
|
#feed="https://www.newsmax.com/rss/Newsfront/16/"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user