Compare commits
20 Commits
2022-11-25
...
master
Author | SHA1 | Date | |
---|---|---|---|
9c66b18cfe | |||
fe088822e1 | |||
1daab919ea | |||
4eeb8d2d17 | |||
f1ccaaabab | |||
9d82b07f17 | |||
f25594d771 | |||
af9e5e92a3 | |||
b485780738 | |||
82846f39ba | |||
5f3a2977f1 | |||
314bdef1c5 | |||
28f81c9a63 | |||
06190f5101 | |||
68c8949005 | |||
29d77b5393 | |||
9f3abd8641 | |||
17bb8a4f3f | |||
f0b93cd2f6 | |||
aa8fa31195 |
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -141,3 +141,4 @@ cython_debug/
|
|||
*.db
|
||||
*.db-journal
|
||||
*.gptc
|
||||
build/
|
||||
|
|
20
README.md
20
README.md
|
@ -3,10 +3,19 @@
|
|||
A [GPTC](https://git.kj7rrv.com/kj7rrv/gptc) model to classify American news as
|
||||
right- or left-leaning
|
||||
|
||||
Inclusion of a site in this model is not an endorsement of the site.
|
||||
## Scripts
|
||||
|
||||
No scripts take any arguments.
|
||||
|
||||
* `./download.py`: download new articles and add them to the database
|
||||
* `./compile.py`: compile GPTC model
|
||||
* `./export.py`: create `build/` directory with files for release
|
||||
* `./stats.py`: print statistics on article and source counts
|
||||
|
||||
## Sources
|
||||
|
||||
Inclusion of a site in this model is not an endorsement of the site.
|
||||
|
||||
### Left
|
||||
|
||||
* ABC News
|
||||
|
@ -14,19 +23,26 @@ Inclusion of a site in this model is not an endorsement of the site.
|
|||
* CBS News
|
||||
* CNBC
|
||||
* CNN
|
||||
* Democracy Now!
|
||||
* HuffPost (formerly Huffington Post)
|
||||
* The Intercept
|
||||
* Los Angeles Times
|
||||
* PBS NewsHour
|
||||
* Slate
|
||||
* The Washington Post
|
||||
|
||||
### Right
|
||||
|
||||
* The American Conservative
|
||||
* American Thinker
|
||||
* Breitbart
|
||||
* Daily Caller
|
||||
* Epoch Times
|
||||
* The Federalist
|
||||
* Fox News
|
||||
* Not the Bee
|
||||
* LifeSiteNews
|
||||
* New York Post
|
||||
* Not the Bee
|
||||
* One America News Network
|
||||
* RedState
|
||||
* Washington Examiner
|
||||
|
|
70
analyses/constitutional_amendments.py
Normal file
70
analyses/constitutional_amendments.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
import gptc
|
||||
|
||||
amendments = [
|
||||
("1st", "First"),
|
||||
("2nd", "Second"),
|
||||
("3rd", "Third"),
|
||||
("4th", "Fourth"),
|
||||
("5th", "Fifth"),
|
||||
("6th", "Sixth"),
|
||||
("7th", "Seventh"),
|
||||
("8th", "Eighth"),
|
||||
("9th", "Ninth"),
|
||||
("10th", "Tenth"),
|
||||
("11th", "Eleventh"),
|
||||
("12th", "Twelfth"),
|
||||
("13th", "Thirteenth"),
|
||||
("14th", "Fourteenth"),
|
||||
("15th", "Fifteenth"),
|
||||
("16th", "Sixteenth"),
|
||||
("17th", "Seventeenth"),
|
||||
("18th", "Eighteenth"),
|
||||
("19th", "Nineteenth"),
|
||||
("20th", "Twentieth"),
|
||||
("21st", "Twenty-first"),
|
||||
("22nd", "Twenty-second"),
|
||||
("23rd", "Twenty-third"),
|
||||
("24th", "Twenty-fourth"),
|
||||
("25th", "Twenty-fifth"),
|
||||
("26th", "Twenty-sixth"),
|
||||
("27th", "Twenty-seventh"),
|
||||
]
|
||||
|
||||
with open("model.gptc", "rb") as f:
|
||||
model = gptc.deserialize(f)
|
||||
|
||||
data = {}
|
||||
|
||||
for number, name in amendments:
|
||||
number_data = model.get(number + " Amendment")
|
||||
name_data = model.get(name + " Amendment")
|
||||
|
||||
if number_data and not name_data:
|
||||
data[name] = number_data
|
||||
elif name_data and not number_data:
|
||||
data[name] = name_data
|
||||
elif number_data and name_data:
|
||||
data[name] = {
|
||||
key: (number_data[key] + name_data[key]) / 2
|
||||
for key in number_data.keys()
|
||||
}
|
||||
|
||||
classified_amendments = sorted(data.items(), key=lambda x: x[1]["left"])
|
||||
|
||||
print("# Constitutional Amendment Analysis")
|
||||
print()
|
||||
print("""This is an analysis of which amendments to the U.S. Constitution are mentioned
|
||||
more in right- or left-leaning American news sources. Data do not necessarily
|
||||
correlate with support or opposition for the amendment among right- or
|
||||
left-leaning Americans.""")
|
||||
print()
|
||||
print("| Amendment | Left | Right |")
|
||||
print("+----------------+-------+-------+")
|
||||
for amendment, data in classified_amendments:
|
||||
percent_right = f"{data['right']*100:>4.1f}%"
|
||||
percent_left = f"{data['left']*100:>4.1f}%"
|
||||
|
||||
amendment_padding = " "*(14 - len(amendment))
|
||||
print(f"| {amendment}{amendment_padding} | {percent_left} | {percent_right} |")
|
||||
print("+----------------+-------+-------+")
|
||||
print("| Amendment | Left | Right |")
|
85
analyses/states.py
Normal file
85
analyses/states.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
import gptc
|
||||
|
||||
states = [
|
||||
"Alabama",
|
||||
"Alaska",
|
||||
"Arizona",
|
||||
"Arkansas",
|
||||
"California",
|
||||
"Colorado",
|
||||
"Connecticut",
|
||||
"Delaware",
|
||||
"Florida",
|
||||
"Georgia",
|
||||
"Hawaii",
|
||||
"Idaho",
|
||||
"Illinois",
|
||||
"Indiana",
|
||||
"Iowa",
|
||||
"Kansas",
|
||||
"Kentucky",
|
||||
"Louisiana",
|
||||
"Maine",
|
||||
"Maryland",
|
||||
"Massachusetts",
|
||||
"Michigan",
|
||||
"Minnesota",
|
||||
"Mississippi",
|
||||
"Missouri",
|
||||
"Montana",
|
||||
"Nebraska",
|
||||
"Nevada",
|
||||
"New Hampshire",
|
||||
"New Jersey",
|
||||
"New Mexico",
|
||||
"New York",
|
||||
"North Carolina",
|
||||
"North Dakota",
|
||||
"Ohio",
|
||||
"Oklahoma",
|
||||
"Oregon",
|
||||
"Pennsylvania",
|
||||
"Rhode Island",
|
||||
"South Carolina",
|
||||
"South Dakota",
|
||||
"Tennessee",
|
||||
"Texas",
|
||||
"Utah",
|
||||
"Vermont",
|
||||
"Virginia",
|
||||
"Washington",
|
||||
"West Virginia",
|
||||
"Wisconsin",
|
||||
"Wyoming",
|
||||
]
|
||||
|
||||
with open("model.gptc", "rb") as f:
|
||||
model = gptc.deserialize(f)
|
||||
|
||||
classified_states = []
|
||||
|
||||
for state in states:
|
||||
classified_states.append((state, model.get(state),))
|
||||
|
||||
classified_states.sort(key=lambda x: x[1]["left"])
|
||||
|
||||
longest = max([len(state) for state in states])
|
||||
|
||||
print("# State Analysis")
|
||||
print()
|
||||
print("""This is an analysis of which states are mentioned more in right- or left-
|
||||
leaning American news sources. Results do not necessarily correlate with the
|
||||
political views of residents of the states; for example, the predominantly
|
||||
liberal state of Oregon is mentioned more in right-leaning sources than in
|
||||
left-leaning ones.""")
|
||||
print()
|
||||
print("| State | Left | Right |")
|
||||
print("+----------------+-------+-------+")
|
||||
for state, data in classified_states:
|
||||
percent_right = f"{round(data['right']*1000)/10}%"
|
||||
percent_left = f"{round(data['left']*1000)/10}%"
|
||||
|
||||
state_padding = " "*(longest - len(state))
|
||||
print(f"| {state}{state_padding} | {percent_left} | {percent_right} |")
|
||||
print("+----------------+-------+-------+")
|
||||
print("| State | Left | Right |")
|
30
compile.py
Normal file → Executable file
30
compile.py
Normal file → Executable file
|
@ -1,6 +1,28 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import sqlite3
|
||||
import tomli
|
||||
import gptc
|
||||
|
||||
with open("compiler.toml", "rb") as f:
|
||||
config = tomli.load(f)
|
||||
|
||||
con = sqlite3.connect("articles.db")
|
||||
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")
|
||||
|
||||
|
@ -10,9 +32,11 @@ raw_model = [
|
|||
]
|
||||
|
||||
with open("model.gptc", "w+b") as f:
|
||||
f.write(
|
||||
gptc.compile(raw_model, max_ngram_length=3, min_count=3).serialize()
|
||||
)
|
||||
gptc.compile(
|
||||
raw_model,
|
||||
max_ngram_length=config["max_ngram_length"],
|
||||
min_count=config["min_count"],
|
||||
).serialize(f)
|
||||
|
||||
con.commit()
|
||||
con.close()
|
||||
|
|
2
compiler.toml
Normal file
2
compiler.toml
Normal file
|
@ -0,0 +1,2 @@
|
|||
max_ngram_length=5
|
||||
min_count=5
|
18
download.py
Normal file → Executable file
18
download.py
Normal file → Executable file
|
@ -1,3 +1,21 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import feedparser
|
||||
import sqlite3
|
||||
import goose3
|
||||
|
|
39
export.py
Executable file
39
export.py
Executable file
|
@ -0,0 +1,39 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sqlite3
|
||||
|
||||
try:
|
||||
shutil.rmtree("build")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
os.mkdir("build")
|
||||
|
||||
shutil.copy("articles.db", "build/articles.db")
|
||||
shutil.copy("sources.toml", "build/sources.toml")
|
||||
shutil.copy("compiler.toml", "build/compiler.toml")
|
||||
shutil.copy("model.gptc", "build/model.gptc")
|
||||
|
||||
con = sqlite3.Connection("build/articles.db")
|
||||
con.execute("UPDATE articles SET text = '***';")
|
||||
con.commit()
|
||||
con.execute("VACUUM;")
|
||||
con.commit()
|
||||
con.close()
|
|
@ -1,8 +0,0 @@
|
|||
#!/bin/sh
|
||||
cp articles.db old_articles.db
|
||||
sqlite3 articles.db 'UPDATE articles SET text = "***";'
|
||||
sqlite3 articles.db 'VACUUM;'
|
||||
echo -n "Press enter when done..."
|
||||
read
|
||||
rm articles.db
|
||||
mv old_articles.db articles.db
|
177
sources.toml
177
sources.toml
|
@ -5,41 +5,11 @@
|
|||
# ? Newsmax (read timeout errors)
|
||||
# ? Bloomberg (CAPTCHA on RSS feed?)
|
||||
|
||||
[cnn]
|
||||
feed="http://rss.cnn.com/rss/cnn_latest.rss"
|
||||
[abc_news]
|
||||
feed="https://abcnews.go.com/abcnews/usheadlines"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CNN"
|
||||
|
||||
[huffpost]
|
||||
feed="https://chaski.huffpost.com/us/auto"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="HuffPost"
|
||||
|
||||
[cnbc]
|
||||
feed="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CNBC"
|
||||
|
||||
[pbs_newshour]
|
||||
feed="https://www.pbs.org/newshour/feeds/rss/headlines"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="PBS NewsHour"
|
||||
|
||||
[latimes]
|
||||
feed="https://www.latimes.com/local/rss2.0.xml"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="Los Angeles Times"
|
||||
|
||||
[cbs_news]
|
||||
feed="https://www.cbsnews.com/latest/rss/main"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CBS News"
|
||||
name="ABC News"
|
||||
|
||||
[atlantic]
|
||||
feed="https://www.theatlantic.com/feed/all/"
|
||||
|
@ -48,11 +18,35 @@ contains_articles=true
|
|||
name="The Atlantic"
|
||||
sort="Atlantic"
|
||||
|
||||
[abc_news]
|
||||
feed="https://abcnews.go.com/abcnews/usheadlines"
|
||||
[cbs_news]
|
||||
feed="https://www.cbsnews.com/latest/rss/main"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="ABC News"
|
||||
name="CBS News"
|
||||
|
||||
[cnbc]
|
||||
feed="https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100003114"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CNBC"
|
||||
|
||||
[cnn]
|
||||
feed="http://rss.cnn.com/rss/cnn_latest.rss"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="CNN"
|
||||
|
||||
[democracy_now]
|
||||
feed="https://www.democracynow.org/democracynow.rss"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="Democracy Now!"
|
||||
|
||||
[huffpost]
|
||||
feed="https://chaski.huffpost.com/us/auto"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="HuffPost"
|
||||
|
||||
[intercept]
|
||||
feed="https://theintercept.com/feed/?lang=en"
|
||||
|
@ -61,45 +55,42 @@ contains_articles=true
|
|||
name="The Intercept"
|
||||
sort="Intercept"
|
||||
|
||||
[latimes]
|
||||
feed="https://www.latimes.com/local/rss2.0.xml"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="Los Angeles Times"
|
||||
|
||||
[pbs_newshour]
|
||||
feed="https://www.pbs.org/newshour/feeds/rss/headlines"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="PBS NewsHour"
|
||||
|
||||
[slate]
|
||||
feed="http://www.slate.com/articles/news_and_politics.fulltext.all.10.rss"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="Slate"
|
||||
|
||||
[washington_post]
|
||||
feed="https://feeds.washingtonpost.com/rss/national"
|
||||
category="left"
|
||||
contains_articles=false
|
||||
name="The Washington Post"
|
||||
sort="Washington Post"
|
||||
|
||||
#[bloomberg]
|
||||
#feed="https://www.bloomberg.com/politics/feeds/site.xml"
|
||||
#category="left"
|
||||
#contains_articles=false
|
||||
|
||||
[fox_news]
|
||||
feed="https://moxie.foxnews.com/google-publisher/latest.xml"
|
||||
[american_conservative]
|
||||
feed="https://theamericanconservative.com/articles/feed/"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="Fox News"
|
||||
|
||||
[oann]
|
||||
feed="https://www.oann.com/category/newsroom/feed"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="One America News Network"
|
||||
|
||||
[nypost]
|
||||
feed="https://nypost.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
exclude=[
|
||||
{ type="startswith", pattern="https://pagesix.com" },
|
||||
{ type="startswith", pattern="https://decider.com" },
|
||||
]
|
||||
name="New York Post"
|
||||
|
||||
[federalist]
|
||||
feed="https://thefederalist.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="The Federalist"
|
||||
sort="Federalist"
|
||||
|
||||
[washington_examiner]
|
||||
feed="https://feeds.feedburner.com/dcexaminer/Politics"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="Washington Examiner"
|
||||
name="The American Conservative"
|
||||
sort="American Conservative"
|
||||
|
||||
[american_thinker]
|
||||
feed="https://feeds.feedburner.com/americanthinker_articles"
|
||||
|
@ -113,30 +104,66 @@ category="right"
|
|||
contains_articles=false
|
||||
name="Breitbart"
|
||||
|
||||
[daily_caller]
|
||||
feed="https://feeds.feedburner.com/dailycaller"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Daily Caller"
|
||||
|
||||
[epoch_times]
|
||||
feed="https://www.theepochtimes.com/feed/"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Epoch Times"
|
||||
|
||||
[federalist]
|
||||
feed="https://thefederalist.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="The Federalist"
|
||||
sort="Federalist"
|
||||
|
||||
[fox_news]
|
||||
feed="https://moxie.foxnews.com/google-publisher/latest.xml"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="Fox News"
|
||||
|
||||
[lifesitenews]
|
||||
feed="https://www.lifesitenews.com/ldn/rss/headlines.xml"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="LifeSiteNews"
|
||||
|
||||
[not_the_bee]
|
||||
feed="https://notthebee.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Not the Bee"
|
||||
|
||||
[daily_caller]
|
||||
feed="https://feeds.feedburner.com/dailycaller"
|
||||
[nypost]
|
||||
feed="https://nypost.com/news/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="Daily Caller"
|
||||
name="New York Post"
|
||||
|
||||
[american_conservative]
|
||||
feed="https://theamericanconservative.com/articles/feed/"
|
||||
[oann]
|
||||
feed="https://www.oann.com/category/newsroom/feed"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="The American Conservative"
|
||||
sort="American Conservative"
|
||||
name="One America News Network"
|
||||
|
||||
[redstate]
|
||||
feed="https://redstate.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
name="RedState"
|
||||
|
||||
[washington_examiner]
|
||||
feed="https://feeds.feedburner.com/dcexaminer/Politics"
|
||||
category="right"
|
||||
contains_articles=true
|
||||
name="Washington Examiner"
|
||||
|
||||
#[newsmax]
|
||||
#feed="https://www.newsmax.com/rss/Newsfront/16/"
|
||||
|
|
18
stats.py
Normal file → Executable file
18
stats.py
Normal file → Executable file
|
@ -1,3 +1,21 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import sqlite3
|
||||
import tomli
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user