Export compiler.toml

Decrease ngram length to 5
Add analyses
2023-01-08 08:48:20 -08:00 · 2023-01-08 08:47:59 -08:00 · 2023-01-08 08:45:22 -08:00 · 2022-12-24 12:29:00 -08:00 · 2022-12-20 17:50:21 -08:00 · 2022-12-20 17:31:12 -08:00
7 changed files with 176 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -40,7 +40,9 @@ Inclusion of a site in this model is not an endorsement of the site.
 * Epoch Times
 * The Federalist
 * Fox News
 * LifeSiteNews
 * New York Post
 * Not the Bee
 * One America News Network
 * RedState
 * Washington Examiner
--- a/analyses/constitutional_amendments.py
+++ b/analyses/constitutional_amendments.py
@ -0,0 +1,70 @@
 import gptc
 amendments = [
    ("1st", "First"),
    ("2nd", "Second"),
    ("3rd", "Third"),
    ("4th", "Fourth"),
    ("5th", "Fifth"),
    ("6th", "Sixth"),
    ("7th", "Seventh"),
    ("8th", "Eighth"),
    ("9th", "Ninth"),
    ("10th", "Tenth"),
    ("11th", "Eleventh"),
    ("12th", "Twelfth"),
    ("13th", "Thirteenth"),
    ("14th", "Fourteenth"),
    ("15th", "Fifteenth"),
    ("16th", "Sixteenth"),
    ("17th", "Seventeenth"),
    ("18th", "Eighteenth"),
    ("19th", "Nineteenth"),
    ("20th", "Twentieth"),
    ("21st", "Twenty-first"),
    ("22nd", "Twenty-second"),
    ("23rd", "Twenty-third"),
    ("24th", "Twenty-fourth"),
    ("25th", "Twenty-fifth"),
    ("26th", "Twenty-sixth"),
    ("27th", "Twenty-seventh"),
 ]
 with open("model.gptc", "rb") as f:
    model = gptc.deserialize(f)
 data = {}
 for number, name in amendments:
    number_data = model.get(number + " Amendment")
    name_data = model.get(name + " Amendment")
    if number_data and not name_data:
        data[name] = number_data
    elif name_data and not number_data:
        data[name] = name_data
    elif number_data and name_data:
        data[name] = {
            key: (number_data[key] + name_data[key]) / 2
            for key in number_data.keys()
        }
 classified_amendments = sorted(data.items(), key=lambda x: x[1]["left"])
 print("# Constitutional Amendment Analysis")
 print()
 print("""This is an analysis of which amendments to the U.S. Constitution are mentioned
 more in right- or left-leaning American news sources. Data do not necessarily
 correlate with support or opposition for the amendment among right- or
 left-leaning Americans.""")
 print()
 print("| Amendment      | Left  | Right |")
 print("+----------------+-------+-------+")
 for amendment, data in classified_amendments:
    percent_right = f"{data['right']*100:>4.1f}%"
    percent_left = f"{data['left']*100:>4.1f}%"
    amendment_padding = " "*(14 - len(amendment))
    print(f"| {amendment}{amendment_padding} | {percent_left} | {percent_right} |")
 print("+----------------+-------+-------+")
 print("| Amendment      | Left  | Right |")
--- a/analyses/states.py
+++ b/analyses/states.py
@ -0,0 +1,85 @@
 import gptc
 states = [
    "Alabama",
    "Alaska",
    "Arizona",
    "Arkansas",
    "California",
    "Colorado",
    "Connecticut",
    "Delaware",
    "Florida",
    "Georgia",
    "Hawaii",
    "Idaho",
    "Illinois",
    "Indiana",
    "Iowa",
    "Kansas",
    "Kentucky",
    "Louisiana",
    "Maine",
    "Maryland",
    "Massachusetts",
    "Michigan",
    "Minnesota",
    "Mississippi",
    "Missouri",
    "Montana",
    "Nebraska",
    "Nevada",
    "New Hampshire",
    "New Jersey",
    "New Mexico",
    "New York",
    "North Carolina",
    "North Dakota",
    "Ohio",
    "Oklahoma",
    "Oregon",
    "Pennsylvania",
    "Rhode Island",
    "South Carolina",
    "South Dakota",
    "Tennessee",
    "Texas",
    "Utah",
    "Vermont",
    "Virginia",
    "Washington",
    "West Virginia",
    "Wisconsin",
    "Wyoming",
 ]
 with open("model.gptc", "rb") as f:
    model = gptc.deserialize(f)
 classified_states = []
 for state in states:
    classified_states.append((state, model.get(state),))
 classified_states.sort(key=lambda x: x[1]["left"])
 longest = max([len(state) for state in states])
 print("# State Analysis")
 print()
 print("""This is an analysis of which states are mentioned more in right- or left-
 leaning American news sources. Results do not necessarily correlate with the
 political views of residents of the states; for example, the predominantly
 liberal state of Oregon is mentioned more in right-leaning sources than in
 left-leaning ones.""")
 print()
 print("| State          | Left  | Right |")
 print("+----------------+-------+-------+")
 for state, data in classified_states:
    percent_right = f"{round(data['right']*1000)/10}%"
    percent_left = f"{round(data['left']*1000)/10}%"
    state_padding = " "*(longest - len(state))
    print(f"| {state}{state_padding} | {percent_left} | {percent_right} |")
 print("+----------------+-------+-------+")
 print("| State          | Left  | Right |")
--- a/compile.py
+++ b/compile.py
@ -32,13 +32,11 @@ raw_model = [
 ]
 with open("model.gptc", "w+b") as f:
    f.write(
    gptc.compile(
        raw_model,
        max_ngram_length=config["max_ngram_length"],
        min_count=config["min_count"],
-        ).serialize()
+    ).serialize(f)
    )
 con.commit()
 con.close()
--- a/compiler.toml
+++ b/compiler.toml
@ -1,2 +1,2 @@
-max_ngram_length=3
+max_ngram_length=5
 min_count=5
--- a/export.py
+++ b/export.py
@ -28,6 +28,7 @@ os.mkdir("build")
 shutil.copy("articles.db", "build/articles.db")
 shutil.copy("sources.toml", "build/sources.toml")
 shutil.copy("compiler.toml", "build/compiler.toml")
 shutil.copy("model.gptc", "build/model.gptc")
 con = sqlite3.Connection("build/articles.db")
--- a/sources.toml
+++ b/sources.toml
@ -129,6 +129,12 @@ category="right"
 contains_articles=true
 name="Fox News"
 [lifesitenews]
 feed="https://www.lifesitenews.com/ldn/rss/headlines.xml"
 category="right"
 contains_articles=false
 name="LifeSiteNews"
 [not_the_bee]
 feed="https://notthebee.com/feed"
 category="right"
@ -147,6 +153,12 @@ category="right"
 contains_articles=true
 name="One America News Network"
 [redstate]
 feed="https://redstate.com/feed"
 category="right"
 contains_articles=false
 name="RedState"
 [washington_examiner]
 feed="https://feeds.feedburner.com/dcexaminer/Politics"
 category="right"
Author	SHA1	Message	Date
Samuel Sloniker	9c66b18cfe	Export compiler.toml	2023-01-08 08:48:20 -08:00
Samuel Sloniker	fe088822e1	Decrease ngram length to 5	2023-01-08 08:47:59 -08:00
Samuel Sloniker	1daab919ea	Add analyses	2023-01-08 08:45:22 -08:00
Samuel Sloniker	4eeb8d2d17	Use GPTC v4.0.0	2022-12-24 12:29:00 -08:00
Samuel Sloniker	f1ccaaabab	Add LifeSiteNews	2022-12-20 17:50:21 -08:00
Samuel Sloniker	9d82b07f17	Add RedState	2022-12-20 17:31:12 -08:00
Samuel Sloniker	f25594d771	Reduce model size	2022-11-28 18:00:31 -08:00
Samuel Sloniker	af9e5e92a3	Use 10-grams	2022-11-28 17:40:10 -08:00
`@ -1,2 +1,2 @@`
	`max_ngram_length=3`	`max_ngram_length=5`
	`min_count=5`	`min_count=5`