Compare commits
32 Commits
2022-11-24
...
master
Author | SHA1 | Date | |
---|---|---|---|
9c66b18cfe | |||
fe088822e1 | |||
1daab919ea | |||
4eeb8d2d17 | |||
f1ccaaabab | |||
9d82b07f17 | |||
f25594d771 | |||
af9e5e92a3 | |||
b485780738 | |||
82846f39ba | |||
5f3a2977f1 | |||
314bdef1c5 | |||
28f81c9a63 | |||
06190f5101 | |||
68c8949005 | |||
29d77b5393 | |||
9f3abd8641 | |||
17bb8a4f3f | |||
f0b93cd2f6 | |||
aa8fa31195 | |||
e5e046b70d | |||
54d97a3a16 | |||
487b087910 | |||
43faa6139a | |||
5f7fd0ccb5 | |||
af5f3c3df1 | |||
a96d474e37 | |||
ee8189d476 | |||
1d2cfab68c | |||
c5cc6d78f9 | |||
e206210ec5 | |||
7bf17b150e |
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -139,4 +139,6 @@ cython_debug/
|
|||
|
||||
# Model
|
||||
*.db
|
||||
*.db-journal
|
||||
*.gptc
|
||||
build/
|
||||
|
|
39
README.md
39
README.md
|
@ -3,19 +3,46 @@
|
|||
A [GPTC](https://git.kj7rrv.com/kj7rrv/gptc) model to classify American news as
|
||||
right- or left-leaning
|
||||
|
||||
## Scripts
|
||||
|
||||
No scripts take any arguments.
|
||||
|
||||
* `./download.py`: download new articles and add them to the database
|
||||
* `./compile.py`: compile GPTC model
|
||||
* `./export.py`: create `build/` directory with files for release
|
||||
* `./stats.py`: print statistics on article and source counts
|
||||
|
||||
## Sources
|
||||
|
||||
Inclusion of a site in this model is not an endorsement of the site.
|
||||
|
||||
### Left
|
||||
|
||||
* CNN
|
||||
* HuffPost (formerly Huffington Post)
|
||||
* ABC News
|
||||
* The Atlantic
|
||||
* CBS News
|
||||
* CNBC
|
||||
* PBS NewsHour
|
||||
* CNN
|
||||
* Democracy Now!
|
||||
* HuffPost (formerly Huffington Post)
|
||||
* The Intercept
|
||||
* Los Angeles Times
|
||||
* PBS NewsHour
|
||||
* Slate
|
||||
* The Washington Post
|
||||
|
||||
### Right
|
||||
|
||||
* Fox News
|
||||
* One America News Network
|
||||
* New York Post
|
||||
* The American Conservative
|
||||
* American Thinker
|
||||
* Breitbart
|
||||
* Daily Caller
|
||||
* Epoch Times
|
||||
* The Federalist
|
||||
* Fox News
|
||||
* LifeSiteNews
|
||||
* New York Post
|
||||
* Not the Bee
|
||||
* One America News Network
|
||||
* RedState
|
||||
* Washington Examiner
|
||||
|
|
70
analyses/constitutional_amendments.py
Normal file
70
analyses/constitutional_amendments.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
import gptc
|
||||
|
||||
amendments = [
|
||||
("1st", "First"),
|
||||
("2nd", "Second"),
|
||||
("3rd", "Third"),
|
||||
("4th", "Fourth"),
|
||||
("5th", "Fifth"),
|
||||
("6th", "Sixth"),
|
||||
("7th", "Seventh"),
|
||||
("8th", "Eighth"),
|
||||
("9th", "Ninth"),
|
||||
("10th", "Tenth"),
|
||||
("11th", "Eleventh"),
|
||||
("12th", "Twelfth"),
|
||||
("13th", "Thirteenth"),
|
||||
("14th", "Fourteenth"),
|
||||
("15th", "Fifteenth"),
|
||||
("16th", "Sixteenth"),
|
||||
("17th", "Seventeenth"),
|
||||
("18th", "Eighteenth"),
|
||||
("19th", "Nineteenth"),
|
||||
("20th", "Twentieth"),
|
||||
("21st", "Twenty-first"),
|
||||
("22nd", "Twenty-second"),
|
||||
("23rd", "Twenty-third"),
|
||||
("24th", "Twenty-fourth"),
|
||||
("25th", "Twenty-fifth"),
|
||||
("26th", "Twenty-sixth"),
|
||||
("27th", "Twenty-seventh"),
|
||||
]
|
||||
|
||||
with open("model.gptc", "rb") as f:
|
||||
model = gptc.deserialize(f)
|
||||
|
||||
data = {}
|
||||
|
||||
for number, name in amendments:
|
||||
number_data = model.get(number + " Amendment")
|
||||
name_data = model.get(name + " Amendment")
|
||||
|
||||
if number_data and not name_data:
|
||||
data[name] = number_data
|
||||
elif name_data and not number_data:
|
||||
data[name] = name_data
|
||||
elif number_data and name_data:
|
||||
data[name] = {
|
||||
key: (number_data[key] + name_data[key]) / 2
|
||||
for key in number_data.keys()
|
||||
}
|
||||
|
||||
classified_amendments = sorted(data.items(), key=lambda x: x[1]["left"])
|
||||
|
||||
print("# Constitutional Amendment Analysis")
|
||||
print()
|
||||
print("""This is an analysis of which amendments to the U.S. Constitution are mentioned
|
||||
more in right- or left-leaning American news sources. Data do not necessarily
|
||||
correlate with support or opposition for the amendment among right- or
|
||||
left-leaning Americans.""")
|
||||
print()
|
||||
print("| Amendment | Left | Right |")
|
||||
print("+----------------+-------+-------+")
|
||||
for amendment, data in classified_amendments:
|
||||
percent_right = f"{data['right']*100:>4.1f}%"
|
||||
percent_left = f"{data['left']*100:>4.1f}%"
|
||||
|
||||
amendment_padding = " "*(14 - len(amendment))
|
||||
print(f"| {amendment}{amendment_padding} | {percent_left} | {percent_right} |")
|
||||
print("+----------------+-------+-------+")
|
||||
print("| Amendment | Left | Right |")
|
85
analyses/states.py
Normal file
85
analyses/states.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
import gptc
|
||||
|
||||
states = [
|
||||
"Alabama",
|
||||
"Alaska",
|
||||
"Arizona",
|
||||
"Arkansas",
|
||||
"California",
|
||||
"Colorado",
|
||||
"Connecticut",
|
||||
"Delaware",
|
||||
"Florida",
|
||||
"Georgia",
|
||||
"Hawaii",
|
||||
"Idaho",
|
||||
"Illinois",
|
||||
"Indiana",
|
||||
"Iowa",
|
||||
"Kansas",
|
||||
"Kentucky",
|
||||
"Louisiana",
|
||||
"Maine",
|
||||
"Maryland",
|
||||
"Massachusetts",
|
||||
"Michigan",
|
||||
"Minnesota",
|
||||
"Mississippi",
|
||||
"Missouri",
|
||||
"Montana",
|
||||
"Nebraska",
|
||||
"Nevada",
|
||||
"New Hampshire",
|
||||
"New Jersey",
|
||||
"New Mexico",
|
||||
"New York",
|
||||
"North Carolina",
|
||||
"North Dakota",
|
||||
"Ohio",
|
||||
"Oklahoma",
|
||||
"Oregon",
|
||||
"Pennsylvania",
|
||||
"Rhode Island",
|
||||
"South Carolina",
|
||||
"South Dakota",
|
||||
"Tennessee",
|
||||
"Texas",
|
||||
"Utah",
|
||||
"Vermont",
|
||||
"Virginia",
|
||||
"Washington",
|
||||
"West Virginia",
|
||||
"Wisconsin",
|
||||
"Wyoming",
|
||||
]
|
||||
|
||||
with open("model.gptc", "rb") as f:
|
||||
model = gptc.deserialize(f)
|
||||
|
||||
classified_states = []
|
||||
|
||||
for state in states:
|
||||
classified_states.append((state, model.get(state),))
|
||||
|
||||
classified_states.sort(key=lambda x: x[1]["left"])
|
||||
|
||||
longest = max([len(state) for state in states])
|
||||
|
||||
print("# State Analysis")
|
||||
print()
|
||||
print("""This is an analysis of which states are mentioned more in right- or left-
|
||||
leaning American news sources. Results do not necessarily correlate with the
|
||||
political views of residents of the states; for example, the predominantly
|
||||
liberal state of Oregon is mentioned more in right-leaning sources than in
|
||||
left-leaning ones.""")
|
||||
print()
|
||||
print("| State | Left | Right |")
|
||||
print("+----------------+-------+-------+")
|
||||
for state, data in classified_states:
|
||||
percent_right = f"{round(data['right']*1000)/10}%"
|
||||
percent_left = f"{round(data['left']*1000)/10}%"
|
||||
|
||||
state_padding = " "*(longest - len(state))
|
||||
print(f"| {state}{state_padding} | {percent_left} | {percent_right} |")
|
||||
print("+----------------+-------+-------+")
|
||||
print("| State | Left | Right |")
|
42
compile.py
Executable file
42
compile.py
Executable file
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import sqlite3
|
||||
import tomli
|
||||
import gptc
|
||||
|
||||
with open("compiler.toml", "rb") as f:
|
||||
config = tomli.load(f)
|
||||
|
||||
con = sqlite3.connect("articles.db")
|
||||
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
||||
|
||||
raw_model = [
|
||||
{"text": i[0], "category": i[1]}
|
||||
for i in con.execute("SELECT text, category FROM articles;")
|
||||
]
|
||||
|
||||
with open("model.gptc", "w+b") as f:
|
||||
gptc.compile(
|
||||
raw_model,
|
||||
max_ngram_length=config["max_ngram_length"],
|
||||
min_count=config["min_count"],
|
||||
).serialize(f)
|
||||
|
||||
con.commit()
|
||||
con.close()
|
2
compiler.toml
Normal file
2
compiler.toml
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_ngram_length=5
|
||||
min_count=5
|
96
download.py
Normal file → Executable file
96
download.py
Normal file → Executable file
|
@ -1,11 +1,37 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import feedparser
|
||||
import hashlib
|
||||
import sqlite3
|
||||
import goose3
|
||||
import tomli
|
||||
import gptc
|
||||
import bs4
|
||||
|
||||
|
||||
def matches(string, checks):
|
||||
for check in checks:
|
||||
if check["type"] == "startswith" and string.startswith(
|
||||
check["pattern"]
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
with open("sources.toml", "rb") as f:
|
||||
sources = tomli.load(f)
|
||||
|
||||
|
@ -39,49 +65,43 @@ try:
|
|||
entry
|
||||
for entry in feedparser.parse(url)["entries"]
|
||||
if not entry["link"] in known
|
||||
and not matches(entry["link"], config.get("exclude", []))
|
||||
]
|
||||
print(f"Fetched feed. Found {len(entries)} new articles.")
|
||||
|
||||
if contains_articles:
|
||||
for entry in entries:
|
||||
print(f"Saving {entry['link']}")
|
||||
con.execute(
|
||||
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
||||
(
|
||||
name,
|
||||
category,
|
||||
entry["link"],
|
||||
bs4.BeautifulSoup(
|
||||
entry["content"][0]["value"], features="lxml"
|
||||
).text,
|
||||
),
|
||||
)
|
||||
try:
|
||||
print(f"Saving {entry['link']}")
|
||||
con.execute(
|
||||
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
||||
(
|
||||
name,
|
||||
category,
|
||||
entry["link"],
|
||||
bs4.BeautifulSoup(
|
||||
entry["content"][0]["value"], features="lxml"
|
||||
).text,
|
||||
),
|
||||
)
|
||||
except KeyError:
|
||||
print("Not enough information. Skipping.")
|
||||
else:
|
||||
for entry in entries:
|
||||
print(f"Downloading {entry['link']}...")
|
||||
con.execute(
|
||||
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
||||
(
|
||||
name,
|
||||
category,
|
||||
entry["link"],
|
||||
g.extract(entry["link"]).cleaned_text,
|
||||
),
|
||||
)
|
||||
print(f"Done downloading.")
|
||||
try:
|
||||
print(f"Downloading {entry['link']}...")
|
||||
con.execute(
|
||||
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
||||
(
|
||||
name,
|
||||
category,
|
||||
entry["link"],
|
||||
g.extract(entry["link"]).cleaned_text,
|
||||
),
|
||||
)
|
||||
print(f"Done downloading.")
|
||||
except KeyError:
|
||||
print("Not enough information. Skipping.")
|
||||
finally:
|
||||
con.commit()
|
||||
|
||||
print("Compiling model...")
|
||||
|
||||
raw_model = [
|
||||
{"text": i[0], "category": i[1]}
|
||||
for i in con.execute("SELECT text, category FROM articles;")
|
||||
]
|
||||
|
||||
with open("model.gptc", "w+b") as f:
|
||||
f.write(
|
||||
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
|
||||
)
|
||||
|
||||
con.close()
|
||||
|
|
39
export.py
Executable file
39
export.py
Executable file
|
@ -0,0 +1,39 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sqlite3
|
||||
|
||||
try:
|
||||
shutil.rmtree("build")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
os.mkdir("build")
|
||||
|
||||
shutil.copy("articles.db", "build/articles.db")
|
||||
shutil.copy("sources.toml", "build/sources.toml")
|
||||
shutil.copy("compiler.toml", "build/compiler.toml")
|
||||
shutil.copy("model.gptc", "build/model.gptc")
|
||||
|
||||
con = sqlite3.Connection("build/articles.db")
|
||||
con.execute("UPDATE articles SET text = '***';")
|
||||
con.commit()
|
||||
con.execute("VACUUM;")
|
||||
con.commit()
|
||||
con.close()
|
|
@ -1,8 +0,0 @@
|
|||
#!/bin/sh
|
||||
cp articles.db old_articles.db
|
||||
sqlite3 articles.db 'UPDATE articles SET text = "***";'
|
||||
sqlite3 articles.db 'VACUUM;'
|
||||
echo -n "Press enter when done..."
|
||||
read
|
||||
rm articles.db
|
||||
mv old_articles.db articles.db
|
140
sources.toml
140
sources.toml
|
@ -5,55 +5,165 @@
|
|||
# ? Newsmax (read timeout errors)
|
||||
# ? Bloomberg (CAPTCHA on RSS feed?)
|
||||
|
||||
[cnn]
|
||||
feed="http://rss.cnn.com/rss/cnn_latest.rss"
|
||||
[abc_news]
|
||||
feed="https://abcnews.go.com/abcnews/usheadlines"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="ABC News"
|
||||
|
||||
[huffpost]
|
||||
feed="https://chaski.huffpost.com/us/auto"
|
||||
[atlantic]
|
||||
feed="https://www.theatlantic.com/feed/all/"
|
||||
category="left"
|
||||
contains_articles=true
|
||||
name="The Atlantic"
|
||||
sort="Atlantic"
|
||||
|
||||
[cbs_news]
|
||||
feed="https://www.cbsnews.com/latest/rss/main"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CBS News"
|
||||
|
||||
[cnbc]
|
||||
feed="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CNBC"
|
||||
|
||||
[pbs_newshour]
|
||||
feed="https://www.pbs.org/newshour/feeds/rss/headlines"
|
||||
[cnn]
|
||||
feed="http://rss.cnn.com/rss/cnn_latest.rss"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CNN"
|
||||
|
||||
[democracy_now]
|
||||
feed="https://www.democracynow.org/democracynow.rss"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="Democracy Now!"
|
||||
|
||||
[huffpost]
|
||||
feed="https://chaski.huffpost.com/us/auto"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="HuffPost"
|
||||
|
||||
[intercept]
|
||||
feed="https://theintercept.com/feed/?lang=en"
|
||||
category="left"
|
||||
contains_articles=true
|
||||
name="The Intercept"
|
||||
sort="Intercept"
|
||||
|
||||
[latimes]
|
||||
feed="https://www.latimes.com/local/rss2.0.xml"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="Los Angeles Times"
|
||||
|
||||
[pbs_newshour]
|
||||
feed="https://www.pbs.org/newshour/feeds/rss/headlines"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="PBS NewsHour"
|
||||
|
||||
[slate]
|
||||
feed="http://www.slate.com/articles/news_and_politics.fulltext.all.10.rss"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="Slate"
|
||||
|
||||
[washington_post]
|
||||
feed="https://feeds.washingtonpost.com/rss/national"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="The Washington Post"
|
||||
sort="Washington Post"
|
||||
|
||||
#[bloomberg]
|
||||
#feed="https://www.bloomberg.com/politics/feeds/site.xml"
|
||||
#category="left"
|
||||
#contains_articles=false
|
||||
|
||||
[fox]
|
||||
feed="https://moxie.foxnews.com/google-publisher/latest.xml"
|
||||
[american_conservative]
|
||||
feed="https://theamericanconservative.com/articles/feed/"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="The American Conservative"
|
||||
sort="American Conservative"
|
||||
|
||||
[oann]
|
||||
feed="https://www.oann.com/category/newsroom/feed"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
|
||||
[nypost]
|
||||
feed="https://nypost.com/feed"
|
||||
[american_thinker]
|
||||
feed="https://feeds.feedburner.com/americanthinker_articles"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="American Thinker"
|
||||
|
||||
[breitbart]
|
||||
feed="https://feeds.feedburner.com/breitbart/"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Breitbart"
|
||||
|
||||
[daily_caller]
|
||||
feed="https://feeds.feedburner.com/dailycaller"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Daily Caller"
|
||||
|
||||
[epoch_times]
|
||||
feed="https://www.theepochtimes.com/feed/"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Epoch Times"
|
||||
|
||||
[federalist]
|
||||
feed="https://thefederalist.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="The Federalist"
|
||||
sort="Federalist"
|
||||
|
||||
[fox_news]
|
||||
feed="https://moxie.foxnews.com/google-publisher/latest.xml"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="Fox News"
|
||||
|
||||
[lifesitenews]
|
||||
feed="https://www.lifesitenews.com/ldn/rss/headlines.xml"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="LifeSiteNews"
|
||||
|
||||
[not_the_bee]
|
||||
feed="https://notthebee.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Not the Bee"
|
||||
|
||||
[nypost]
|
||||
feed="https://nypost.com/news/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="New York Post"
|
||||
|
||||
[oann]
|
||||
feed="https://www.oann.com/category/newsroom/feed"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="One America News Network"
|
||||
|
||||
[redstate]
|
||||
feed="https://redstate.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="RedState"
|
||||
|
||||
[washington_examiner]
|
||||
feed="https://feeds.feedburner.com/dcexaminer/Politics"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="Washington Examiner"
|
||||
|
||||
#[newsmax]
|
||||
#feed="https://www.newsmax.com/rss/Newsfront/16/"
|
||||
|
|
77
stats.py
Executable file
77
stats.py
Executable file
|
@ -0,0 +1,77 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import sqlite3
|
||||
import tomli
|
||||
|
||||
with open("sources.toml", "rb") as f:
|
||||
sources = tomli.load(f)
|
||||
|
||||
con = sqlite3.connect("articles.db")
|
||||
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
||||
|
||||
article_count = len(list(con.execute("SELECT url FROM articles")))
|
||||
left_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'left'")))
|
||||
right_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'right'")))
|
||||
|
||||
source_count = 0
|
||||
left_source_count = 0
|
||||
right_source_count = 0
|
||||
|
||||
left_sources = []
|
||||
right_sources = []
|
||||
|
||||
for source_id, source_info in sources.items():
|
||||
source_count += 1
|
||||
if source_info["category"] == "left":
|
||||
left_source_count += 1
|
||||
source_list = left_sources
|
||||
else:
|
||||
right_source_count += 1
|
||||
source_list = right_sources
|
||||
|
||||
source_list.append({
|
||||
"name": source_info["name"],
|
||||
"sort": source_info.get("sort", source_info["name"]),
|
||||
"count": len(list(con.execute("SELECT url FROM articles WHERE source = ?", (source_id,)))),
|
||||
})
|
||||
|
||||
left_sources.sort(key=lambda x: x["sort"])
|
||||
right_sources.sort(key=lambda x: x["sort"])
|
||||
|
||||
left_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in left_sources])
|
||||
right_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in right_sources])
|
||||
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
|
||||
print(f"""\
|
||||
This model contains a total of {article_count} articles from {source_count} sources.
|
||||
|
||||
## Left
|
||||
|
||||
{left_breakdown}
|
||||
|
||||
Left total: {left_article_count} articles from {left_source_count} sources
|
||||
|
||||
## Right
|
||||
|
||||
{right_breakdown}
|
||||
|
||||
Right total: {right_article_count} articles from {right_source_count} sources""")
|
22
stats.sh
22
stats.sh
|
@ -1,22 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
total=$(sqlite3 articles.db "SELECT url FROM articles" | wc -l)
|
||||
left=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'left'" | wc -l)
|
||||
right=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'right'" | wc -l)
|
||||
|
||||
left_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'left'" | sort | uniq)
|
||||
right_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'right'" | sort | uniq)
|
||||
|
||||
echo "This model contains a total of $total articles ($left left, $right right)."
|
||||
echo ""
|
||||
echo "## Left"
|
||||
echo ""
|
||||
for i in $left_sources; do
|
||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
|
||||
done
|
||||
echo ""
|
||||
echo "## Right"
|
||||
echo ""
|
||||
for i in $right_sources; do
|
||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)"
|
||||
done
|
Loading…
Reference in New Issue
Block a user