Export compiler.toml

Decrease ngram length to 5
Add analyses
2023-01-08 08:48:20 -08:00 · 2023-01-08 08:47:59 -08:00 · 2023-01-08 08:45:22 -08:00 · 2022-12-24 12:29:00 -08:00 · 2022-12-20 17:50:21 -08:00 · 2022-12-20 17:31:12 -08:00
7 changed files with 176 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -40,7 +40,9 @@ Inclusion of a site in this model is not an endorsement of the site.
 * Epoch Times
 * The Federalist
 * Fox News
+* LifeSiteNews
 * New York Post
 * Not the Bee
 * One America News Network
+* RedState
 * Washington Examiner
--- a/analyses/constitutional_amendments.py
+++ b/analyses/constitutional_amendments.py
@ -0,0 +1,70 @@
+import gptc
+
+amendments = [
+    ("1st", "First"),
+    ("2nd", "Second"),
+    ("3rd", "Third"),
+    ("4th", "Fourth"),
+    ("5th", "Fifth"),
+    ("6th", "Sixth"),
+    ("7th", "Seventh"),
+    ("8th", "Eighth"),
+    ("9th", "Ninth"),
+    ("10th", "Tenth"),
+    ("11th", "Eleventh"),
+    ("12th", "Twelfth"),
+    ("13th", "Thirteenth"),
+    ("14th", "Fourteenth"),
+    ("15th", "Fifteenth"),
+    ("16th", "Sixteenth"),
+    ("17th", "Seventeenth"),
+    ("18th", "Eighteenth"),
+    ("19th", "Nineteenth"),
+    ("20th", "Twentieth"),
+    ("21st", "Twenty-first"),
+    ("22nd", "Twenty-second"),
+    ("23rd", "Twenty-third"),
+    ("24th", "Twenty-fourth"),
+    ("25th", "Twenty-fifth"),
+    ("26th", "Twenty-sixth"),
+    ("27th", "Twenty-seventh"),
+]
+
+with open("model.gptc", "rb") as f:
+    model = gptc.deserialize(f)
+
+data = {}
+
+for number, name in amendments:
+    number_data = model.get(number + " Amendment")
+    name_data = model.get(name + " Amendment")
+
+    if number_data and not name_data:
+        data[name] = number_data
+    elif name_data and not number_data:
+        data[name] = name_data
+    elif number_data and name_data:
+        data[name] = {
+            key: (number_data[key] + name_data[key]) / 2
+            for key in number_data.keys()
+        }
+
+classified_amendments = sorted(data.items(), key=lambda x: x[1]["left"])
+
+print("# Constitutional Amendment Analysis")
+print()
+print("""This is an analysis of which amendments to the U.S. Constitution are mentioned
+more in right- or left-leaning American news sources. Data do not necessarily
+correlate with support or opposition for the amendment among right- or
+left-leaning Americans.""")
+print()
+print("| Amendment      | Left  | Right |")
+print("+----------------+-------+-------+")
+for amendment, data in classified_amendments:
+    percent_right = f"{data['right']*100:>4.1f}%"
+    percent_left = f"{data['left']*100:>4.1f}%"
+
+    amendment_padding = " "*(14 - len(amendment))
+    print(f"| {amendment}{amendment_padding} | {percent_left} | {percent_right} |")
+print("+----------------+-------+-------+")
+print("| Amendment      | Left  | Right |")
--- a/analyses/states.py
+++ b/analyses/states.py
@ -0,0 +1,85 @@
+import gptc
+
+states = [
+    "Alabama",
+    "Alaska",
+    "Arizona",
+    "Arkansas",
+    "California",
+    "Colorado",
+    "Connecticut",
+    "Delaware",
+    "Florida",
+    "Georgia",
+    "Hawaii",
+    "Idaho",
+    "Illinois",
+    "Indiana",
+    "Iowa",
+    "Kansas",
+    "Kentucky",
+    "Louisiana",
+    "Maine",
+    "Maryland",
+    "Massachusetts",
+    "Michigan",
+    "Minnesota",
+    "Mississippi",
+    "Missouri",
+    "Montana",
+    "Nebraska",
+    "Nevada",
+    "New Hampshire",
+    "New Jersey",
+    "New Mexico",
+    "New York",
+    "North Carolina",
+    "North Dakota",
+    "Ohio",
+    "Oklahoma",
+    "Oregon",
+    "Pennsylvania",
+    "Rhode Island",
+    "South Carolina",
+    "South Dakota",
+    "Tennessee",
+    "Texas",
+    "Utah",
+    "Vermont",
+    "Virginia",
+    "Washington",
+    "West Virginia",
+    "Wisconsin",
+    "Wyoming",
+]
+
+with open("model.gptc", "rb") as f:
+    model = gptc.deserialize(f)
+
+classified_states = []
+
+for state in states:
+    classified_states.append((state, model.get(state),))
+
+classified_states.sort(key=lambda x: x[1]["left"])
+
+longest = max([len(state) for state in states])
+
+print("# State Analysis")
+print()
+print("""This is an analysis of which states are mentioned more in right- or left-
+leaning American news sources. Results do not necessarily correlate with the
+political views of residents of the states; for example, the predominantly
+liberal state of Oregon is mentioned more in right-leaning sources than in
+left-leaning ones.""")
+print()
+print("| State          | Left  | Right |")
+print("+----------------+-------+-------+")
+for state, data in classified_states:
+    percent_right = f"{round(data['right']*1000)/10}%"
+    percent_left = f"{round(data['left']*1000)/10}%"
+
+    state_padding = " "*(longest - len(state))
+    print(f"| {state}{state_padding} | {percent_left} | {percent_right} |")
+print("+----------------+-------+-------+")
+print("| State          | Left  | Right |")
--- a/compile.py
+++ b/compile.py
@ -32,13 +32,11 @@ raw_model = [
 ]

 with open("model.gptc", "w+b") as f:
-    f.write(
-        gptc.compile(
-            raw_model,
-            max_ngram_length=config["max_ngram_length"],
-            min_count=config["min_count"],
-        ).serialize()
-    )
+    gptc.compile(
+        raw_model,
+        max_ngram_length=config["max_ngram_length"],
+        min_count=config["min_count"],
+    ).serialize(f)

 con.commit()
 con.close()
--- a/compiler.toml
+++ b/compiler.toml
@ -1,2 +1,2 @@
-max_ngram_length=8
+max_ngram_length=5
 min_count=5
--- a/export.py
+++ b/export.py
@ -28,6 +28,7 @@ os.mkdir("build")

 shutil.copy("articles.db", "build/articles.db")
 shutil.copy("sources.toml", "build/sources.toml")
+shutil.copy("compiler.toml", "build/compiler.toml")
 shutil.copy("model.gptc", "build/model.gptc")

 con = sqlite3.Connection("build/articles.db")
--- a/sources.toml
+++ b/sources.toml
@ -129,6 +129,12 @@ category="right"
 contains_articles=true
 name="Fox News"

+[lifesitenews]
+feed="https://www.lifesitenews.com/ldn/rss/headlines.xml"
+category="right"
+contains_articles=false
+name="LifeSiteNews"
+
 [not_the_bee]
 feed="https://notthebee.com/feed"
 category="right"
@ -147,6 +153,12 @@ category="right"
 contains_articles=true
 name="One America News Network"

+[redstate]
+feed="https://redstate.com/feed"
+category="right"
+contains_articles=false
+name="RedState"
+
 [washington_examiner]
 feed="https://feeds.feedburner.com/dcexaminer/Politics"
 category="right"
Author	SHA1	Message	Date
Samuel Sloniker	9c66b18cfe	Export compiler.toml	2023-01-08 08:48:20 -08:00
Samuel Sloniker	fe088822e1	Decrease ngram length to 5	2023-01-08 08:47:59 -08:00
Samuel Sloniker	1daab919ea	Add analyses	2023-01-08 08:45:22 -08:00
Samuel Sloniker	4eeb8d2d17	Use GPTC v4.0.0	2022-12-24 12:29:00 -08:00
Samuel Sloniker	f1ccaaabab	Add LifeSiteNews	2022-12-20 17:50:21 -08:00
Samuel Sloniker	9d82b07f17	Add RedState	2022-12-20 17:31:12 -08:00