Compare commits
32 Commits
2022-11-24
...
master
12 changed files with 526 additions and 82 deletions
@ -0,0 +1,70 @@ |
|||||||
|
import gptc |
||||||
|
|
||||||
|
amendments = [ |
||||||
|
("1st", "First"), |
||||||
|
("2nd", "Second"), |
||||||
|
("3rd", "Third"), |
||||||
|
("4th", "Fourth"), |
||||||
|
("5th", "Fifth"), |
||||||
|
("6th", "Sixth"), |
||||||
|
("7th", "Seventh"), |
||||||
|
("8th", "Eighth"), |
||||||
|
("9th", "Ninth"), |
||||||
|
("10th", "Tenth"), |
||||||
|
("11th", "Eleventh"), |
||||||
|
("12th", "Twelfth"), |
||||||
|
("13th", "Thirteenth"), |
||||||
|
("14th", "Fourteenth"), |
||||||
|
("15th", "Fifteenth"), |
||||||
|
("16th", "Sixteenth"), |
||||||
|
("17th", "Seventeenth"), |
||||||
|
("18th", "Eighteenth"), |
||||||
|
("19th", "Nineteenth"), |
||||||
|
("20th", "Twentieth"), |
||||||
|
("21st", "Twenty-first"), |
||||||
|
("22nd", "Twenty-second"), |
||||||
|
("23rd", "Twenty-third"), |
||||||
|
("24th", "Twenty-fourth"), |
||||||
|
("25th", "Twenty-fifth"), |
||||||
|
("26th", "Twenty-sixth"), |
||||||
|
("27th", "Twenty-seventh"), |
||||||
|
] |
||||||
|
|
||||||
|
with open("model.gptc", "rb") as f: |
||||||
|
model = gptc.deserialize(f) |
||||||
|
|
||||||
|
data = {} |
||||||
|
|
||||||
|
for number, name in amendments: |
||||||
|
number_data = model.get(number + " Amendment") |
||||||
|
name_data = model.get(name + " Amendment") |
||||||
|
|
||||||
|
if number_data and not name_data: |
||||||
|
data[name] = number_data |
||||||
|
elif name_data and not number_data: |
||||||
|
data[name] = name_data |
||||||
|
elif number_data and name_data: |
||||||
|
data[name] = { |
||||||
|
key: (number_data[key] + name_data[key]) / 2 |
||||||
|
for key in number_data.keys() |
||||||
|
} |
||||||
|
|
||||||
|
classified_amendments = sorted(data.items(), key=lambda x: x[1]["left"]) |
||||||
|
|
||||||
|
print("# Constitutional Amendment Analysis") |
||||||
|
print() |
||||||
|
print("""This is an analysis of which amendments to the U.S. Constitution are mentioned |
||||||
|
more in right- or left-leaning American news sources. Data do not necessarily |
||||||
|
correlate with support or opposition for the amendment among right- or |
||||||
|
left-leaning Americans.""") |
||||||
|
print() |
||||||
|
print("| Amendment | Left | Right |") |
||||||
|
print("+----------------+-------+-------+") |
||||||
|
for amendment, data in classified_amendments: |
||||||
|
percent_right = f"{data['right']*100:>4.1f}%" |
||||||
|
percent_left = f"{data['left']*100:>4.1f}%" |
||||||
|
|
||||||
|
amendment_padding = " "*(14 - len(amendment)) |
||||||
|
print(f"| {amendment}{amendment_padding} | {percent_left} | {percent_right} |") |
||||||
|
print("+----------------+-------+-------+") |
||||||
|
print("| Amendment | Left | Right |") |
@ -0,0 +1,85 @@ |
|||||||
|
import gptc |
||||||
|
|
||||||
|
states = [ |
||||||
|
"Alabama", |
||||||
|
"Alaska", |
||||||
|
"Arizona", |
||||||
|
"Arkansas", |
||||||
|
"California", |
||||||
|
"Colorado", |
||||||
|
"Connecticut", |
||||||
|
"Delaware", |
||||||
|
"Florida", |
||||||
|
"Georgia", |
||||||
|
"Hawaii", |
||||||
|
"Idaho", |
||||||
|
"Illinois", |
||||||
|
"Indiana", |
||||||
|
"Iowa", |
||||||
|
"Kansas", |
||||||
|
"Kentucky", |
||||||
|
"Louisiana", |
||||||
|
"Maine", |
||||||
|
"Maryland", |
||||||
|
"Massachusetts", |
||||||
|
"Michigan", |
||||||
|
"Minnesota", |
||||||
|
"Mississippi", |
||||||
|
"Missouri", |
||||||
|
"Montana", |
||||||
|
"Nebraska", |
||||||
|
"Nevada", |
||||||
|
"New Hampshire", |
||||||
|
"New Jersey", |
||||||
|
"New Mexico", |
||||||
|
"New York", |
||||||
|
"North Carolina", |
||||||
|
"North Dakota", |
||||||
|
"Ohio", |
||||||
|
"Oklahoma", |
||||||
|
"Oregon", |
||||||
|
"Pennsylvania", |
||||||
|
"Rhode Island", |
||||||
|
"South Carolina", |
||||||
|
"South Dakota", |
||||||
|
"Tennessee", |
||||||
|
"Texas", |
||||||
|
"Utah", |
||||||
|
"Vermont", |
||||||
|
"Virginia", |
||||||
|
"Washington", |
||||||
|
"West Virginia", |
||||||
|
"Wisconsin", |
||||||
|
"Wyoming", |
||||||
|
] |
||||||
|
|
||||||
|
with open("model.gptc", "rb") as f: |
||||||
|
model = gptc.deserialize(f) |
||||||
|
|
||||||
|
classified_states = [] |
||||||
|
|
||||||
|
for state in states: |
||||||
|
classified_states.append((state, model.get(state),)) |
||||||
|
|
||||||
|
classified_states.sort(key=lambda x: x[1]["left"]) |
||||||
|
|
||||||
|
longest = max([len(state) for state in states]) |
||||||
|
|
||||||
|
print("# State Analysis") |
||||||
|
print() |
||||||
|
print("""This is an analysis of which states are mentioned more in right- or left- |
||||||
|
leaning American news sources. Results do not necessarily correlate with the |
||||||
|
political views of residents of the states; for example, the predominantly |
||||||
|
liberal state of Oregon is mentioned more in right-leaning sources than in |
||||||
|
left-leaning ones.""") |
||||||
|
print() |
||||||
|
print("| State | Left | Right |") |
||||||
|
print("+----------------+-------+-------+") |
||||||
|
for state, data in classified_states: |
||||||
|
percent_right = f"{round(data['right']*1000)/10}%" |
||||||
|
percent_left = f"{round(data['left']*1000)/10}%" |
||||||
|
|
||||||
|
state_padding = " "*(longest - len(state)) |
||||||
|
print(f"| {state}{state_padding} | {percent_left} | {percent_right} |") |
||||||
|
print("+----------------+-------+-------+") |
||||||
|
print("| State | Left | Right |") |
@ -0,0 +1,42 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later |
||||||
|
|
||||||
|
# Copyright (c) 2022 Samuel L Sloniker |
||||||
|
# |
||||||
|
# This program is free software: you can redistribute it and/or modify it under |
||||||
|
# the terms of the GNU General Public License as published by the Free Software |
||||||
|
# Foundation, either version 3 of the License, or (at your option) any later |
||||||
|
# version. |
||||||
|
# |
||||||
|
# This program is distributed in the hope that it will be useful, but WITHOUT |
||||||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
||||||
|
# details. |
||||||
|
# |
||||||
|
# You should have received a copy of the GNU General Public License along with |
||||||
|
# this program. If not, see <https://www.gnu.org/licenses/>. |
||||||
|
|
||||||
|
import sqlite3 |
||||||
|
import tomli |
||||||
|
import gptc |
||||||
|
|
||||||
|
with open("compiler.toml", "rb") as f: |
||||||
|
config = tomli.load(f) |
||||||
|
|
||||||
|
con = sqlite3.connect("articles.db") |
||||||
|
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);") |
||||||
|
|
||||||
|
raw_model = [ |
||||||
|
{"text": i[0], "category": i[1]} |
||||||
|
for i in con.execute("SELECT text, category FROM articles;") |
||||||
|
] |
||||||
|
|
||||||
|
with open("model.gptc", "w+b") as f: |
||||||
|
gptc.compile( |
||||||
|
raw_model, |
||||||
|
max_ngram_length=config["max_ngram_length"], |
||||||
|
min_count=config["min_count"], |
||||||
|
).serialize(f) |
||||||
|
|
||||||
|
con.commit() |
||||||
|
con.close() |
@ -0,0 +1,39 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later |
||||||
|
|
||||||
|
# Copyright (c) 2022 Samuel L Sloniker |
||||||
|
# |
||||||
|
# This program is free software: you can redistribute it and/or modify it under |
||||||
|
# the terms of the GNU General Public License as published by the Free Software |
||||||
|
# Foundation, either version 3 of the License, or (at your option) any later |
||||||
|
# version. |
||||||
|
# |
||||||
|
# This program is distributed in the hope that it will be useful, but WITHOUT |
||||||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
||||||
|
# details. |
||||||
|
# |
||||||
|
# You should have received a copy of the GNU General Public License along with |
||||||
|
# this program. If not, see <https://www.gnu.org/licenses/>. |
||||||
|
|
||||||
|
import os |
||||||
|
import shutil |
||||||
|
import sqlite3 |
||||||
|
|
||||||
|
try: |
||||||
|
shutil.rmtree("build") |
||||||
|
except FileNotFoundError: |
||||||
|
pass |
||||||
|
os.mkdir("build") |
||||||
|
|
||||||
|
shutil.copy("articles.db", "build/articles.db") |
||||||
|
shutil.copy("sources.toml", "build/sources.toml") |
||||||
|
shutil.copy("compiler.toml", "build/compiler.toml") |
||||||
|
shutil.copy("model.gptc", "build/model.gptc") |
||||||
|
|
||||||
|
con = sqlite3.Connection("build/articles.db") |
||||||
|
con.execute("UPDATE articles SET text = '***';") |
||||||
|
con.commit() |
||||||
|
con.execute("VACUUM;") |
||||||
|
con.commit() |
||||||
|
con.close() |
@ -1,8 +0,0 @@ |
|||||||
#!/bin/sh |
|
||||||
cp articles.db old_articles.db |
|
||||||
sqlite3 articles.db 'UPDATE articles SET text = "***";' |
|
||||||
sqlite3 articles.db 'VACUUM;' |
|
||||||
echo -n "Press enter when done..." |
|
||||||
read |
|
||||||
rm articles.db |
|
||||||
mv old_articles.db articles.db |
|
@ -0,0 +1,77 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later |
||||||
|
|
||||||
|
# Copyright (c) 2022 Samuel L Sloniker |
||||||
|
# |
||||||
|
# This program is free software: you can redistribute it and/or modify it under |
||||||
|
# the terms of the GNU General Public License as published by the Free Software |
||||||
|
# Foundation, either version 3 of the License, or (at your option) any later |
||||||
|
# version. |
||||||
|
# |
||||||
|
# This program is distributed in the hope that it will be useful, but WITHOUT |
||||||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
||||||
|
# details. |
||||||
|
# |
||||||
|
# You should have received a copy of the GNU General Public License along with |
||||||
|
# this program. If not, see <https://www.gnu.org/licenses/>. |
||||||
|
|
||||||
|
import sqlite3 |
||||||
|
import tomli |
||||||
|
|
||||||
|
with open("sources.toml", "rb") as f: |
||||||
|
sources = tomli.load(f) |
||||||
|
|
||||||
|
con = sqlite3.connect("articles.db") |
||||||
|
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);") |
||||||
|
|
||||||
|
article_count = len(list(con.execute("SELECT url FROM articles"))) |
||||||
|
left_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'left'"))) |
||||||
|
right_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'right'"))) |
||||||
|
|
||||||
|
source_count = 0 |
||||||
|
left_source_count = 0 |
||||||
|
right_source_count = 0 |
||||||
|
|
||||||
|
left_sources = [] |
||||||
|
right_sources = [] |
||||||
|
|
||||||
|
for source_id, source_info in sources.items(): |
||||||
|
source_count += 1 |
||||||
|
if source_info["category"] == "left": |
||||||
|
left_source_count += 1 |
||||||
|
source_list = left_sources |
||||||
|
else: |
||||||
|
right_source_count += 1 |
||||||
|
source_list = right_sources |
||||||
|
|
||||||
|
source_list.append({ |
||||||
|
"name": source_info["name"], |
||||||
|
"sort": source_info.get("sort", source_info["name"]), |
||||||
|
"count": len(list(con.execute("SELECT url FROM articles WHERE source = ?", (source_id,)))), |
||||||
|
}) |
||||||
|
|
||||||
|
left_sources.sort(key=lambda x: x["sort"]) |
||||||
|
right_sources.sort(key=lambda x: x["sort"]) |
||||||
|
|
||||||
|
left_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in left_sources]) |
||||||
|
right_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in right_sources]) |
||||||
|
|
||||||
|
con.commit() |
||||||
|
con.close() |
||||||
|
|
||||||
|
|
||||||
|
print(f"""\ |
||||||
|
This model contains a total of {article_count} articles from {source_count} sources. |
||||||
|
|
||||||
|
## Left |
||||||
|
|
||||||
|
{left_breakdown} |
||||||
|
|
||||||
|
Left total: {left_article_count} articles from {left_source_count} sources |
||||||
|
|
||||||
|
## Right |
||||||
|
|
||||||
|
{right_breakdown} |
||||||
|
|
||||||
|
Right total: {right_article_count} articles from {right_source_count} sources""") |
@ -1,22 +0,0 @@ |
|||||||
#!/bin/bash |
|
||||||
|
|
||||||
total=$(sqlite3 articles.db "SELECT url FROM articles" | wc -l) |
|
||||||
left=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'left'" | wc -l) |
|
||||||
right=$(sqlite3 articles.db "SELECT url FROM articles WHERE category = 'right'" | wc -l) |
|
||||||
|
|
||||||
left_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'left'" | sort | uniq) |
|
||||||
right_sources=$(sqlite3 articles.db "SELECT source FROM articles WHERE category = 'right'" | sort | uniq) |
|
||||||
|
|
||||||
echo "This model contains a total of $total articles ($left left, $right right)." |
|
||||||
echo "" |
|
||||||
echo "## Left" |
|
||||||
echo "" |
|
||||||
for i in $left_sources; do |
|
||||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)" |
|
||||||
done |
|
||||||
echo "" |
|
||||||
echo "## Right" |
|
||||||
echo "" |
|
||||||
for i in $right_sources; do |
|
||||||
echo "* $i: $(sqlite3 articles.db "SELECT url FROM articles WHERE source = '$i'" | wc -l)" |
|
||||||
done |
|
Loading…
Reference in new issue