Compare commits

...

8 Commits

Author SHA1 Message Date
9c66b18cfe
Export compiler.toml 2023-01-08 08:48:20 -08:00
fe088822e1
Decrease ngram length to 5 2023-01-08 08:47:59 -08:00
1daab919ea
Add analyses 2023-01-08 08:45:22 -08:00
4eeb8d2d17
Use GPTC v4.0.0 2022-12-24 12:29:00 -08:00
f1ccaaabab
Add LifeSiteNews 2022-12-20 17:50:21 -08:00
9d82b07f17
Add RedState 2022-12-20 17:31:12 -08:00
f25594d771
Reduce model size 2022-11-28 18:00:31 -08:00
af9e5e92a3
Use 10-grams 2022-11-28 17:40:10 -08:00
7 changed files with 176 additions and 8 deletions

View File

@ -40,7 +40,9 @@ Inclusion of a site in this model is not an endorsement of the site.
* Epoch Times
* The Federalist
* Fox News
* LifeSiteNews
* New York Post
* Not the Bee
* One America News Network
* RedState
* Washington Examiner

View File

@ -0,0 +1,70 @@
import gptc
amendments = [
("1st", "First"),
("2nd", "Second"),
("3rd", "Third"),
("4th", "Fourth"),
("5th", "Fifth"),
("6th", "Sixth"),
("7th", "Seventh"),
("8th", "Eighth"),
("9th", "Ninth"),
("10th", "Tenth"),
("11th", "Eleventh"),
("12th", "Twelfth"),
("13th", "Thirteenth"),
("14th", "Fourteenth"),
("15th", "Fifteenth"),
("16th", "Sixteenth"),
("17th", "Seventeenth"),
("18th", "Eighteenth"),
("19th", "Nineteenth"),
("20th", "Twentieth"),
("21st", "Twenty-first"),
("22nd", "Twenty-second"),
("23rd", "Twenty-third"),
("24th", "Twenty-fourth"),
("25th", "Twenty-fifth"),
("26th", "Twenty-sixth"),
("27th", "Twenty-seventh"),
]
with open("model.gptc", "rb") as f:
model = gptc.deserialize(f)
data = {}
for number, name in amendments:
number_data = model.get(number + " Amendment")
name_data = model.get(name + " Amendment")
if number_data and not name_data:
data[name] = number_data
elif name_data and not number_data:
data[name] = name_data
elif number_data and name_data:
data[name] = {
key: (number_data[key] + name_data[key]) / 2
for key in number_data.keys()
}
classified_amendments = sorted(data.items(), key=lambda x: x[1]["left"])
print("# Constitutional Amendment Analysis")
print()
print("""This is an analysis of which amendments to the U.S. Constitution are mentioned
more in right- or left-leaning American news sources. Data do not necessarily
correlate with support or opposition for the amendment among right- or
left-leaning Americans.""")
print()
print("| Amendment | Left | Right |")
print("+----------------+-------+-------+")
for amendment, data in classified_amendments:
percent_right = f"{data['right']*100:>4.1f}%"
percent_left = f"{data['left']*100:>4.1f}%"
amendment_padding = " "*(14 - len(amendment))
print(f"| {amendment}{amendment_padding} | {percent_left} | {percent_right} |")
print("+----------------+-------+-------+")
print("| Amendment | Left | Right |")

85
analyses/states.py Normal file
View File

@ -0,0 +1,85 @@
import gptc
states = [
"Alabama",
"Alaska",
"Arizona",
"Arkansas",
"California",
"Colorado",
"Connecticut",
"Delaware",
"Florida",
"Georgia",
"Hawaii",
"Idaho",
"Illinois",
"Indiana",
"Iowa",
"Kansas",
"Kentucky",
"Louisiana",
"Maine",
"Maryland",
"Massachusetts",
"Michigan",
"Minnesota",
"Mississippi",
"Missouri",
"Montana",
"Nebraska",
"Nevada",
"New Hampshire",
"New Jersey",
"New Mexico",
"New York",
"North Carolina",
"North Dakota",
"Ohio",
"Oklahoma",
"Oregon",
"Pennsylvania",
"Rhode Island",
"South Carolina",
"South Dakota",
"Tennessee",
"Texas",
"Utah",
"Vermont",
"Virginia",
"Washington",
"West Virginia",
"Wisconsin",
"Wyoming",
]
with open("model.gptc", "rb") as f:
model = gptc.deserialize(f)
classified_states = []
for state in states:
classified_states.append((state, model.get(state),))
classified_states.sort(key=lambda x: x[1]["left"])
longest = max([len(state) for state in states])
print("# State Analysis")
print()
print("""This is an analysis of which states are mentioned more in right- or left-
leaning American news sources. Results do not necessarily correlate with the
political views of residents of the states; for example, the predominantly
liberal state of Oregon is mentioned more in right-leaning sources than in
left-leaning ones.""")
print()
print("| State | Left | Right |")
print("+----------------+-------+-------+")
for state, data in classified_states:
percent_right = f"{round(data['right']*1000)/10}%"
percent_left = f"{round(data['left']*1000)/10}%"
state_padding = " "*(longest - len(state))
print(f"| {state}{state_padding} | {percent_left} | {percent_right} |")
print("+----------------+-------+-------+")
print("| State | Left | Right |")

View File

@ -32,13 +32,11 @@ raw_model = [
]
with open("model.gptc", "w+b") as f:
f.write(
gptc.compile(
raw_model,
max_ngram_length=config["max_ngram_length"],
min_count=config["min_count"],
).serialize()
)
gptc.compile(
raw_model,
max_ngram_length=config["max_ngram_length"],
min_count=config["min_count"],
).serialize(f)
con.commit()
con.close()

View File

@ -1,2 +1,2 @@
max_ngram_length=3
max_ngram_length=5
min_count=5

View File

@ -28,6 +28,7 @@ os.mkdir("build")
shutil.copy("articles.db", "build/articles.db")
shutil.copy("sources.toml", "build/sources.toml")
shutil.copy("compiler.toml", "build/compiler.toml")
shutil.copy("model.gptc", "build/model.gptc")
con = sqlite3.Connection("build/articles.db")

View File

@ -129,6 +129,12 @@ category="right"
contains_articles=true
name="Fox News"
[lifesitenews]
feed="https://www.lifesitenews.com/ldn/rss/headlines.xml"
category="right"
contains_articles=false
name="LifeSiteNews"
[not_the_bee]
feed="https://notthebee.com/feed"
category="right"
@ -147,6 +153,12 @@ category="right"
contains_articles=true
name="One America News Network"
[redstate]
feed="https://redstate.com/feed"
category="right"
contains_articles=false
name="RedState"
[washington_examiner]
feed="https://feeds.feedburner.com/dcexaminer/Politics"
category="right"