Compare commits
32 Commits
2022-11-24
...
master
12 changed files with 526 additions and 82 deletions
@ -0,0 +1,70 @@
|
||||
import gptc |
||||
|
||||
amendments = [ |
||||
("1st", "First"), |
||||
("2nd", "Second"), |
||||
("3rd", "Third"), |
||||
("4th", "Fourth"), |
||||
("5th", "Fifth"), |
||||
("6th", "Sixth"), |
||||
("7th", "Seventh"), |
||||
("8th", "Eighth"), |
||||
("9th", "Ninth"), |
||||
("10th", "Tenth"), |
||||
("11th", "Eleventh"), |
||||
("12th", "Twelfth"), |
||||
("13th", "Thirteenth"), |
||||
("14th", "Fourteenth"), |
||||
("15th", "Fifteenth"), |
||||
("16th", "Sixteenth"), |
||||
("17th", "Seventeenth"), |
||||
("18th", "Eighteenth"), |
||||
("19th", "Nineteenth"), |
||||
("20th", "Twentieth"), |
||||
("21st", "Twenty-first"), |
||||
("22nd", "Twenty-second"), |
||||
("23rd", "Twenty-third"), |
||||
("24th", "Twenty-fourth"), |
||||
("25th", "Twenty-fifth"), |
||||
("26th", "Twenty-sixth"), |
||||
("27th", "Twenty-seventh"), |
||||
] |
||||
|
||||
with open("model.gptc", "rb") as f: |
||||
model = gptc.deserialize(f) |
||||
|
||||
data = {} |
||||
|
||||
for number, name in amendments: |
||||
number_data = model.get(number + " Amendment") |
||||
name_data = model.get(name + " Amendment") |
||||
|
||||
if number_data and not name_data: |
||||
data[name] = number_data |
||||
elif name_data and not number_data: |
||||
data[name] = name_data |
||||
elif number_data and name_data: |
||||
data[name] = { |
||||
key: (number_data[key] + name_data[key]) / 2 |
||||
for key in number_data.keys() |
||||
} |
||||
|
||||
classified_amendments = sorted(data.items(), key=lambda x: x[1]["left"]) |
||||
|
||||
print("# Constitutional Amendment Analysis") |
||||
print() |
||||
print("""This is an analysis of which amendments to the U.S. Constitution are mentioned |
||||
more in right- or left-leaning American news sources. Data do not necessarily |
||||
correlate with support or opposition for the amendment among right- or |
||||
left-leaning Americans.""") |
||||
print() |
||||
print("| Amendment | Left | Right |") |
||||
print("+----------------+-------+-------+") |
||||
for amendment, data in classified_amendments: |
||||
percent_right = f"{data['right']*100:>4.1f}%" |
||||
percent_left = f"{data['left']*100:>4.1f}%" |
||||
|
||||
amendment_padding = " "*(14 - len(amendment)) |
||||
print(f"| {amendment}{amendment_padding} | {percent_left} | {percent_right} |") |
||||
print("+----------------+-------+-------+") |
||||
print("| Amendment | Left | Right |") |
@ -0,0 +1,85 @@
|
||||
import gptc |
||||
|
||||
states = [ |
||||
"Alabama", |
||||
"Alaska", |
||||
"Arizona", |
||||
"Arkansas", |
||||
"California", |
||||
"Colorado", |
||||
"Connecticut", |
||||
"Delaware", |
||||
"Florida", |
||||
"Georgia", |
||||
"Hawaii", |
||||
"Idaho", |
||||
"Illinois", |
||||
"Indiana", |
||||
"Iowa", |
||||
"Kansas", |
||||
"Kentucky", |
||||
"Louisiana", |
||||
"Maine", |
||||
"Maryland", |
||||
"Massachusetts", |
||||
"Michigan", |
||||
"Minnesota", |
||||
"Mississippi", |
||||
"Missouri", |
||||
"Montana", |
||||
"Nebraska", |
||||
"Nevada", |
||||
"New Hampshire", |
||||
"New Jersey", |
||||
"New Mexico", |
||||
"New York", |
||||
"North Carolina", |
||||
"North Dakota", |
||||
"Ohio", |
||||
"Oklahoma", |
||||
"Oregon", |
||||
"Pennsylvania", |
||||
"Rhode Island", |
||||
"South Carolina", |
||||
"South Dakota", |
||||
"Tennessee", |
||||
"Texas", |
||||
"Utah", |
||||
"Vermont", |
||||
"Virginia", |
||||
"Washington", |
||||
"West Virginia", |
||||
"Wisconsin", |
||||
"Wyoming", |
||||
] |
||||
|
||||
with open("model.gptc", "rb") as f: |
||||
model = gptc.deserialize(f) |
||||
|
||||
classified_states = [] |
||||
|
||||
for state in states: |
||||
classified_states.append((state, model.get(state),)) |
||||
|
||||
classified_states.sort(key=lambda x: x[1]["left"]) |
||||
|
||||
longest = max([len(state) for state in states]) |
||||
|
||||
print("# State Analysis") |
||||
print() |
||||
print("""This is an analysis of which states are mentioned more in right- or left- |
||||
leaning American news sources. Results do not necessarily correlate with the |
||||
political views of residents of the states; for example, the predominantly |
||||
liberal state of Oregon is mentioned more in right-leaning sources than in |
||||
left-leaning ones.""") |
||||
print() |
||||
print("| State | Left | Right |") |
||||
print("+----------------+-------+-------+") |
||||
for state, data in classified_states: |
||||
percent_right = f"{round(data['right']*1000)/10}%" |
||||
percent_left = f"{round(data['left']*1000)/10}%" |
||||
|
||||
state_padding = " "*(longest - len(state)) |
||||
print(f"| {state}{state_padding} | {percent_left} | {percent_right} |") |
||||
print("+----------------+-------+-------+") |
||||
print("| State | Left | Right |") |
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python3 |
||||
# SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker |
||||
# |
||||
# This program is free software: you can redistribute it and/or modify it under |
||||
# the terms of the GNU General Public License as published by the Free Software |
||||
# Foundation, either version 3 of the License, or (at your option) any later |
||||
# version. |
||||
# |
||||
# This program is distributed in the hope that it will be useful, but WITHOUT |
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
||||
# details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License along with |
||||
# this program. If not, see <https://www.gnu.org/licenses/>. |
||||
|
||||
import sqlite3 |
||||
import tomli |
||||
import gptc |
||||
|
||||
with open("compiler.toml", "rb") as f: |
||||
config = tomli.load(f) |
||||
|
||||
con = sqlite3.connect("articles.db") |
||||
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);") |
||||
|
||||
raw_model = [ |
||||
{"text": i[0], "category": i[1]} |
||||
for i in con.execute("SELECT text, category FROM articles;") |
||||
] |
||||
|
||||
with open("model.gptc", "w+b") as f: |
||||
gptc.compile( |
||||
raw_model, |
||||
max_ngram_length=config["max_ngram_length"], |
||||
min_count=config["min_count"], |
||||
).serialize(f) |
||||
|
||||
con.commit() |
||||
con.close() |
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3 |
||||
# SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker |
||||
# |
||||
# This program is free software: you can redistribute it and/or modify it under |
||||
# the terms of the GNU General Public License as published by the Free Software |
||||
# Foundation, either version 3 of the License, or (at your option) any later |
||||
# version. |
||||
# |
||||
# This program is distributed in the hope that it will be useful, but WITHOUT |
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
||||
# details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License along with |
||||
# this program. If not, see <https://www.gnu.org/licenses/>. |
||||
|
||||
import os |
||||
import shutil |
||||
import sqlite3 |
||||
|
||||
try: |
||||
shutil.rmtree("build") |
||||
except FileNotFoundError: |
||||
pass |
||||
os.mkdir("build") |
||||
|
||||
shutil.copy("articles.db", "build/articles.db") |
||||
shutil.copy("sources.toml", "build/sources.toml") |
||||
shutil.copy("compiler.toml", "build/compiler.toml") |
||||
shutil.copy("model.gptc", "build/model.gptc") |
||||
|
||||
con = sqlite3.Connection("build/articles.db") |
||||
con.execute("UPDATE articles SET text = '***';") |
||||
con.commit() |
||||
con.execute("VACUUM;") |
||||
con.commit() |
||||
con.close() |
@ -1,8 +0,0 @@
|
||||
#!/bin/sh |
||||
cp articles.db old_articles.db |
||||
sqlite3 articles.db 'UPDATE articles SET text = "***";' |
||||
sqlite3 articles.db 'VACUUM;' |
||||
echo -n "Press enter when done..." |
||||
read |
||||
rm articles.db |
||||
mv old_articles.db articles.db |
@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3 |
||||
# SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
||||
# Copyright (c) 2022 Samuel L Sloniker |
||||
# |
||||
# This program is free software: you can redistribute it and/or modify it under |
||||
# the terms of the GNU General Public License as published by the Free Software |
||||
# Foundation, either version 3 of the License, or (at your option) any later |
||||
# version. |
||||
# |
||||
# This program is distributed in the hope that it will be useful, but WITHOUT |
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
||||
# details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License along with |
||||
# this program. If not, see <https://www.gnu.org/licenses/>. |
||||
|
||||
import sqlite3 |
||||
import tomli |
||||
|
||||
with open("sources.toml", "rb") as f: |
||||
sources = tomli.load(f) |
||||
|
||||
con = sqlite3.connect("articles.db") |
||||
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);") |
||||
|
||||
article_count = len(list(con.execute("SELECT url FROM articles"))) |
||||
left_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'left'"))) |
||||
right_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'right'"))) |
||||
|
||||
source_count = 0 |
||||
left_source_count = 0 |
||||
right_source_count = 0 |
||||
|
||||
left_sources = [] |
||||
right_sources = [] |
||||
|
||||
for source_id, source_info in sources.items(): |
||||
source_count += 1 |
||||
if source_info["category"] == "left": |
||||
left_source_count += 1 |
||||
source_list = left_sources |
||||
else: |
||||
right_source_count += 1 |
||||
source_list = right_sources |
||||
|
||||
source_list.append({ |
||||
"name": source_info["name"], |
||||
"sort": source_info.get("sort", source_info["name"]), |
||||
"count": len(list(con.execute("SELECT url FROM articles WHERE source = ?", (source_id,)))), |
||||
}) |
||||
|
||||
left_sources.sort(key=lambda x: x["sort"]) |
||||
right_sources.sort(key=lambda x: x["sort"]) |
||||
|
||||
left_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in left_sources]) |
||||
right_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in right_sources]) |
||||
|
||||
con.commit() |
||||
con.close() |
||||
|
||||
|
||||
print(f"""\ |
||||
This model contains a total of {article_count} articles from {source_count} sources. |
||||
|
||||
## Left |
||||
|
||||
{left_breakdown} |
||||
|
||||
Left total: {left_article_count} articles from {left_source_count} sources |
||||
|
||||
## Right |
||||
|
||||
{right_breakdown} |
||||
|
||||
Right total: {right_article_count} articles from {right_source_count} sources""") |
@ -1,22 +0,0 @@
|
||||
#!/bin/bash |
||||
|
||||
total=$(sqlite3 articles.db "SELECT url FROM articles" | wc -l) |
||||
left=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'left'" | wc -l) |
||||
right=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'right'" | wc -l) |
||||
|
||||
left_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'left'" | sort | uniq) |
||||
right_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'right'" | sort | uniq) |
||||
|
||||
echo "This model contains a total of $total articles ($left left, $right right)." |
||||
echo "" |
||||
echo "## Left" |
||||
echo "" |
||||
for i in $left_sources; do |
||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)" |
||||
done |
||||
echo "" |
||||
echo "## Right" |
||||
echo "" |
||||
for i in $right_sources; do |
||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)" |
||||
done |
Loading…
Reference in new issue