Add ability to exclude articles by URL
This commit is contained in:
parent
9562634a03
commit
7bf17b150e
16
download.py
16
download.py
|
@ -6,6 +6,14 @@ import tomli
|
|||
import gptc
|
||||
import bs4
|
||||
|
||||
|
||||
def matches(string, checks):
|
||||
for check in checks:
|
||||
if check["type"] == "startswith" and string.startswith(check["pattern"]):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
with open("sources.toml", "rb") as f:
|
||||
sources = tomli.load(f)
|
||||
|
||||
|
@ -38,12 +46,13 @@ try:
|
|||
entries = [
|
||||
entry
|
||||
for entry in feedparser.parse(url)["entries"]
|
||||
if not entry["link"] in known
|
||||
if not entry["link"] in known and not matches(entry["link"], config.get("exclude", []))
|
||||
]
|
||||
print(f"Fetched feed. Found {len(entries)} new articles.")
|
||||
|
||||
if contains_articles:
|
||||
for entry in entries:
|
||||
try:
|
||||
print(f"Saving {entry['link']}")
|
||||
con.execute(
|
||||
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
||||
|
@ -56,8 +65,11 @@ try:
|
|||
).text,
|
||||
),
|
||||
)
|
||||
except KeyError:
|
||||
print("Not enough information. Skipping.")
|
||||
else:
|
||||
for entry in entries:
|
||||
try:
|
||||
print(f"Downloading {entry['link']}...")
|
||||
con.execute(
|
||||
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
||||
|
@ -69,6 +81,8 @@ try:
|
|||
),
|
||||
)
|
||||
print(f"Done downloading.")
|
||||
except KeyError:
|
||||
print("Not enough information. Skipping.")
|
||||
finally:
|
||||
con.commit()
|
||||
|
||||
|
|
|
@ -49,6 +49,10 @@ contains_articles=true
|
|||
feed="https://nypost.com/feed"
|
||||
category="right"
|
||||
contains_articles=false
|
||||
exclude=[
|
||||
{ type="startswith", pattern="https://pagesix.com" },
|
||||
{ type="startswith", pattern="https://decider.com" },
|
||||
]
|
||||
|
||||
[federalist]
|
||||
feed="https://thefederalist.com/feed"
|
||||
|
|
Loading…
Reference in New Issue
Block a user