Compare commits

..

No commits in common. "5f3a2977f1e4793edfecd03836c084da2a1ea163" and "9f3abd864163a658feda8a2201410858e7f7afa0" have entirely different histories.

4 changed files with 70 additions and 91 deletions

View File

@ -25,9 +25,7 @@ Inclusion of a site in this model is not an endorsement of the site.
* CNN * CNN
* HuffPost (formerly Huffington Post) * HuffPost (formerly Huffington Post)
* Los Angeles Times * Los Angeles Times
* Slate
* PBS NewsHour * PBS NewsHour
* The Washington Post
### Right ### Right

View File

@ -17,12 +17,8 @@
# this program. If not, see <https://www.gnu.org/licenses/>. # this program. If not, see <https://www.gnu.org/licenses/>.
import sqlite3 import sqlite3
import tomli
import gptc import gptc
with open("compiler.toml", "rb") as f:
config = tomli.load(f)
con = sqlite3.connect("articles.db") con = sqlite3.connect("articles.db")
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);") con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
@ -33,11 +29,7 @@ raw_model = [
with open("model.gptc", "w+b") as f: with open("model.gptc", "w+b") as f:
f.write( f.write(
gptc.compile( gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
raw_model,
max_ngram_length=config["max_ngram_length"],
min_count=config["min_count"],
).serialize()
) )
con.commit() con.commit()

View File

@ -1,2 +0,0 @@
max_ngram_length=3
min_count=5

View File

@ -5,31 +5,6 @@
# ? Newsmax (read timeout errors) # ? Newsmax (read timeout errors)
# ? Bloomberg (CAPTCHA on RSS feed?) # ? Bloomberg (CAPTCHA on RSS feed?)
[abc_news]
feed="https://abcnews.go.com/abcnews/usheadlines"
category="left"
contains_articles=false
name="ABC News"
[atlantic]
feed="https://www.theatlantic.com/feed/all/"
category="left"
contains_articles=true
name="The Atlantic"
sort="Atlantic"
[cbs_news]
feed="https://www.cbsnews.com/latest/rss/main"
category="left"
contains_articles=false
name="CBS News"
[cnbc]
feed="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114"
category="left"
contains_articles=false
name="CNBC"
[cnn] [cnn]
feed="http://rss.cnn.com/rss/cnn_latest.rss" feed="http://rss.cnn.com/rss/cnn_latest.rss"
category="left" category="left"
@ -42,18 +17,11 @@ category="left"
contains_articles=false contains_articles=false
name="HuffPost" name="HuffPost"
[intercept] [cnbc]
feed="https://theintercept.com/feed/?lang=en" feed="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114"
category="left"
contains_articles=true
name="The Intercept"
sort="Intercept"
[latimes]
feed="https://www.latimes.com/local/rss2.0.xml"
category="left" category="left"
contains_articles=false contains_articles=false
name="Los Angeles Times" name="CNBC"
[pbs_newshour] [pbs_newshour]
feed="https://www.pbs.org/newshour/feeds/rss/headlines" feed="https://www.pbs.org/newshour/feeds/rss/headlines"
@ -61,30 +29,77 @@ category="left"
contains_articles=false contains_articles=false
name="PBS NewsHour" name="PBS NewsHour"
[slate] [latimes]
feed="http://www.slate.com/articles/news_and_politics.fulltext.all.10.rss" feed="https://www.latimes.com/local/rss2.0.xml"
category="left" category="left"
contains_articles=false contains_articles=false
name="Slate" name="Los Angeles Times"
[washington_post] [cbs_news]
feed="https://feeds.washingtonpost.com/rss/national" feed="https://www.cbsnews.com/latest/rss/main"
category="left" category="left"
contains_articles=false contains_articles=false
name="The Washington Post" name="CBS News"
sort="Washington Post"
[atlantic]
feed="https://www.theatlantic.com/feed/all/"
category="left"
contains_articles=true
name="The Atlantic"
sort="Atlantic"
[abc_news]
feed="https://abcnews.go.com/abcnews/usheadlines"
category="left"
contains_articles=false
name="ABC News"
[intercept]
feed="https://theintercept.com/feed/?lang=en"
category="left"
contains_articles=true
name="The Intercept"
sort="Intercept"
#[bloomberg] #[bloomberg]
#feed="https://www.bloomberg.com/politics/feeds/site.xml" #feed="https://www.bloomberg.com/politics/feeds/site.xml"
#category="left" #category="left"
#contains_articles=false #contains_articles=false
[american_conservative] [fox_news]
feed="https://theamericanconservative.com/articles/feed/" feed="https://moxie.foxnews.com/google-publisher/latest.xml"
category="right" category="right"
contains_articles=true contains_articles=true
name="The American Conservative" name="Fox News"
sort="American Conservative"
[oann]
feed="https://www.oann.com/category/newsroom/feed"
category="right"
contains_articles=true
name="One America News Network"
[nypost]
feed="https://nypost.com/feed"
category="right"
contains_articles=false
exclude=[
{ type="startswith", pattern="https://pagesix.com" },
{ type="startswith", pattern="https://decider.com" },
]
name="New York Post"
[federalist]
feed="https://thefederalist.com/feed"
category="right"
contains_articles=false
name="The Federalist"
sort="Federalist"
[washington_examiner]
feed="https://feeds.feedburner.com/dcexaminer/Politics"
category="right"
contains_articles=true
name="Washington Examiner"
[american_thinker] [american_thinker]
feed="https://feeds.feedburner.com/americanthinker_articles" feed="https://feeds.feedburner.com/americanthinker_articles"
@ -98,54 +113,30 @@ category="right"
contains_articles=false contains_articles=false
name="Breitbart" name="Breitbart"
[daily_caller]
feed="https://feeds.feedburner.com/dailycaller"
category="right"
contains_articles=false
name="Daily Caller"
[epoch_times] [epoch_times]
feed="https://www.theepochtimes.com/feed/" feed="https://www.theepochtimes.com/feed/"
category="right" category="right"
contains_articles=false contains_articles=false
name="Epoch Times" name="Epoch Times"
[federalist]
feed="https://thefederalist.com/feed"
category="right"
contains_articles=false
name="The Federalist"
sort="Federalist"
[fox_news]
feed="https://moxie.foxnews.com/google-publisher/latest.xml"
category="right"
contains_articles=true
name="Fox News"
[not_the_bee] [not_the_bee]
feed="https://notthebee.com/feed" feed="https://notthebee.com/feed"
category="right" category="right"
contains_articles=false contains_articles=false
name="Not the Bee" name="Not the Bee"
[nypost] [daily_caller]
feed="https://nypost.com/news/feed" feed="https://feeds.feedburner.com/dailycaller"
category="right" category="right"
contains_articles=false contains_articles=false
name="New York Post" name="Daily Caller"
[oann] [american_conservative]
feed="https://www.oann.com/category/newsroom/feed" feed="https://theamericanconservative.com/articles/feed/"
category="right" category="right"
contains_articles=true contains_articles=true
name="One America News Network" name="The American Conservative"
sort="American Conservative"
[washington_examiner]
feed="https://feeds.feedburner.com/dcexaminer/Politics"
category="right"
contains_articles=true
name="Washington Examiner"
#[newsmax] #[newsmax]
#feed="https://www.newsmax.com/rss/Newsfront/16/" #feed="https://www.newsmax.com/rss/Newsfront/16/"