Compare commits
3 Commits
af5f3c3df1
...
487b087910
Author | SHA1 | Date | |
---|---|---|---|
487b087910 | |||
43faa6139a | |||
5f7fd0ccb5 |
|
@ -7,7 +7,9 @@ import bs4
|
|||
|
||||
def matches(string, checks):
|
||||
for check in checks:
|
||||
if check["type"] == "startswith" and string.startswith(check["pattern"]):
|
||||
if check["type"] == "startswith" and string.startswith(
|
||||
check["pattern"]
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -44,7 +46,8 @@ try:
|
|||
entries = [
|
||||
entry
|
||||
for entry in feedparser.parse(url)["entries"]
|
||||
if not entry["link"] in known and not matches(entry["link"], config.get("exclude", []))
|
||||
if not entry["link"] in known
|
||||
and not matches(entry["link"], config.get("exclude", []))
|
||||
]
|
||||
print(f"Fetched feed. Found {len(entries)} new articles.")
|
||||
|
||||
|
|
20
sources.toml
20
sources.toml
|
@ -9,41 +9,50 @@
|
|||
feed="http://rss.cnn.com/rss/cnn_latest.rss"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CNN"
|
||||
|
||||
[huffpost]
|
||||
feed="https://chaski.huffpost.com/us/auto"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="HuffPost"
|
||||
|
||||
[cnbc]
|
||||
feed="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CNBC"
|
||||
|
||||
[pbs_newshour]
|
||||
feed="https://www.pbs.org/newshour/feeds/rss/headlines"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="PBS NewsHour"
|
||||
|
||||
[latimes]
|
||||
feed="https://www.latimes.com/local/rss2.0.xml"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="Los Angeles Times"
|
||||
|
||||
[cbs_news]
|
||||
feed="https://www.cbsnews.com/latest/rss/main"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CBS News"
|
||||
|
||||
[atlantic]
|
||||
feed="https://www.theatlantic.com/feed/all/"
|
||||
category="left"
|
||||
contains_articles=true
|
||||
name="The Atlantic"
|
||||
sort="Atlantic"
|
||||
|
||||
[abc_news]
|
||||
feed="https://abcnews.go.com/abcnews/usheadlines"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="ABC News"
|
||||
|
||||
#[bloomberg]
|
||||
#feed="https://www.bloomberg.com/politics/feeds/site.xml"
|
||||
|
@ -54,11 +63,13 @@ contains_articles=false
|
|||
feed="https://moxie.foxnews.com/google-publisher/latest.xml"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="Fox News"
|
||||
|
||||
[oann]
|
||||
feed="https://www.oann.com/category/newsroom/feed"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="One America News Network"
|
||||
|
||||
[nypost]
|
||||
feed="https://nypost.com/feed"
|
||||
|
@ -68,41 +79,50 @@ exclude=[
|
|||
{ type="startswith", pattern="https://pagesix.com" },
|
||||
{ type="startswith", pattern="https://decider.com" },
|
||||
]
|
||||
name="New York Post"
|
||||
|
||||
[federalist]
|
||||
feed="https://thefederalist.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="The Federalist"
|
||||
sort="Federalist"
|
||||
|
||||
[washington_examiner]
|
||||
feed="https://feeds.feedburner.com/dcexaminer/Politics"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="Washington Examiner"
|
||||
|
||||
[american_thinker]
|
||||
feed="https://feeds.feedburner.com/americanthinker_articles"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="American Thinker"
|
||||
|
||||
[breitbart]
|
||||
feed="https://feeds.feedburner.com/breitbart/"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Breitbart"
|
||||
|
||||
[epoch_times]
|
||||
feed="https://www.theepochtimes.com/feed/"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Epoch Times"
|
||||
|
||||
[not_the_bee]
|
||||
feed="https://notthebee.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Not the Bee"
|
||||
|
||||
[daily_caller]
|
||||
feed="https://feeds.feedburner.com/dailycaller"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Daily Caller"
|
||||
|
||||
#[newsmax]
|
||||
#feed="https://www.newsmax.com/rss/Newsfront/16/"
|
||||
|
|
60
stats.py
Normal file
60
stats.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
import sqlite3
|
||||
import tomli
|
||||
|
||||
with open("sources.toml", "rb") as f:
|
||||
sources = tomli.load(f)
|
||||
|
||||
con = sqlite3.connect("articles.db")
|
||||
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
||||
|
||||
article_count = len(list(con.execute("SELECT url FROM articles")))
|
||||
left_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'left'")))
|
||||
right_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'right'")))
|
||||
|
||||
source_count = 0
|
||||
left_source_count = 0
|
||||
right_source_count = 0
|
||||
|
||||
left_sources = []
|
||||
right_sources = []
|
||||
|
||||
for source_id, source_info in sources.items():
|
||||
source_count += 1
|
||||
if source_info["category"] == "left":
|
||||
left_source_count += 1
|
||||
source_list = left_sources
|
||||
else:
|
||||
right_source_count += 1
|
||||
source_list = right_sources
|
||||
|
||||
source_list.append({
|
||||
"name": source_info["name"],
|
||||
"sort": source_info.get("sort", source_info["name"]),
|
||||
"count": len(list(con.execute("SELECT url FROM articles WHERE source = ?", (source_id,)))),
|
||||
})
|
||||
|
||||
left_sources.sort(key=lambda x: x["sort"])
|
||||
right_sources.sort(key=lambda x: x["sort"])
|
||||
|
||||
left_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in left_sources])
|
||||
right_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in right_sources])
|
||||
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
|
||||
print(f"""\
|
||||
This model contains a total of {article_count} articles from {source_count} sources.
|
||||
|
||||
## Left
|
||||
|
||||
{left_breakdown}
|
||||
|
||||
Left total: {left_article_count} articles from {left_source_count} sources
|
||||
|
||||
## Right
|
||||
|
||||
{right_breakdown}
|
||||
|
||||
Right total: {right_article_count} articles from {right_source_count} sources
|
||||
""")
|
30
stats.sh
30
stats.sh
|
@ -1,30 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
total=$(sqlite3 articles.db "SELECT url FROM articles" | wc -l)
|
||||
left=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'left'" | wc -l)
|
||||
right=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'right'" | wc -l)
|
||||
|
||||
left_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'left'" | sort | uniq)
|
||||
right_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'right'" | sort | uniq)
|
||||
|
||||
total_source_count=$(sqlite3 articles.db "SELECT source FROM articles" | sort | uniq | wc -l)
|
||||
left_source_count=$(echo $left_sources | wc -w)
|
||||
right_source_count=$(echo $right_sources | wc -w)
|
||||
|
||||
echo "This model contains a total of $total articles from $total_source_count sources."
|
||||
echo ""
|
||||
echo "## Left"
|
||||
echo ""
|
||||
for i in $left_sources; do
|
||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
|
||||
done
|
||||
echo ""
|
||||
echo "Left total: $left articles from $left_source_count sources"
|
||||
echo ""
|
||||
echo "## Right"
|
||||
echo ""
|
||||
for i in $right_sources; do
|
||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
|
||||
done
|
||||
echo ""
|
||||
echo "Right total: $right articles from $right_source_count sources"
|
Loading…
Reference in New Issue
Block a user