gptc-news-model/stats.py

#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-3.0-or-later

# Copyright (c) 2022 Samuel L Sloniker
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.

import sqlite3
import tomli

with open("sources.toml", "rb") as f:
    sources = tomli.load(f)

con = sqlite3.connect("articles.db")
con.execute("CREATE TABLE IF NOT EXISTS articles(source, category, url, text);")

article_count = len(list(con.execute("SELECT url FROM articles")))
left_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'left'")))
right_article_count = len(list(con.execute("SELECT url FROM articles WHERE category = 'right'")))

source_count = 0
left_source_count = 0
right_source_count = 0

left_sources = []
right_sources = []

for source_id, source_info in sources.items():
    source_count += 1
    if source_info["category"] == "left":
        left_source_count += 1
        source_list = left_sources
    else:
        right_source_count += 1
        source_list = right_sources

    source_list.append({
        "name": source_info["name"],
        "sort": source_info.get("sort", source_info["name"]),
        "count": len(list(con.execute("SELECT url FROM articles WHERE source = ?", (source_id,)))),
        })

left_sources.sort(key=lambda x: x["sort"])
right_sources.sort(key=lambda x: x["sort"])

left_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in left_sources])
right_breakdown = "\n".join([f"* {source['name']}: {source['count']}" for source in right_sources])

con.commit()
con.close()


print(f"""\
This model contains a total of {article_count} articles from {source_count} sources.

## Left

{left_breakdown}

Left total: {left_article_count} articles from {left_source_count} sources

## Right

{right_breakdown}

Right total: {right_article_count} articles from {right_source_count} sources""")