Add ability to exclude articles by URL
This commit is contained in:
parent
9562634a03
commit
7bf17b150e
16
download.py
16
download.py
|
@ -6,6 +6,14 @@ import tomli
|
||||||
import gptc
|
import gptc
|
||||||
import bs4
|
import bs4
|
||||||
|
|
||||||
|
|
||||||
|
def matches(string, checks):
|
||||||
|
for check in checks:
|
||||||
|
if check["type"] == "startswith" and string.startswith(check["pattern"]):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
with open("sources.toml", "rb") as f:
|
with open("sources.toml", "rb") as f:
|
||||||
sources = tomli.load(f)
|
sources = tomli.load(f)
|
||||||
|
|
||||||
|
@ -38,12 +46,13 @@ try:
|
||||||
entries = [
|
entries = [
|
||||||
entry
|
entry
|
||||||
for entry in feedparser.parse(url)["entries"]
|
for entry in feedparser.parse(url)["entries"]
|
||||||
if not entry["link"] in known
|
if not entry["link"] in known and not matches(entry["link"], config.get("exclude", []))
|
||||||
]
|
]
|
||||||
print(f"Fetched feed. Found {len(entries)} new articles.")
|
print(f"Fetched feed. Found {len(entries)} new articles.")
|
||||||
|
|
||||||
if contains_articles:
|
if contains_articles:
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
try:
|
||||||
print(f"Saving {entry['link']}")
|
print(f"Saving {entry['link']}")
|
||||||
con.execute(
|
con.execute(
|
||||||
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
||||||
|
@ -56,8 +65,11 @@ try:
|
||||||
).text,
|
).text,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
except KeyError:
|
||||||
|
print("Not enough information. Skipping.")
|
||||||
else:
|
else:
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
try:
|
||||||
print(f"Downloading {entry['link']}...")
|
print(f"Downloading {entry['link']}...")
|
||||||
con.execute(
|
con.execute(
|
||||||
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
"INSERT INTO articles VALUES (?, ?, ?, ?);",
|
||||||
|
@ -69,6 +81,8 @@ try:
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
print(f"Done downloading.")
|
print(f"Done downloading.")
|
||||||
|
except KeyError:
|
||||||
|
print("Not enough information. Skipping.")
|
||||||
finally:
|
finally:
|
||||||
con.commit()
|
con.commit()
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,10 @@ contains_articles=true
|
||||||
feed="https://nypost.com/feed"
|
feed="https://nypost.com/feed"
|
||||||
category="right"
|
category="right"
|
||||||
contains_articles=false
|
contains_articles=false
|
||||||
|
exclude=[
|
||||||
|
{ type="startswith", pattern="https://pagesix.com" },
|
||||||
|
{ type="startswith", pattern="https://decider.com" },
|
||||||
|
]
|
||||||
|
|
||||||
[federalist]
|
[federalist]
|
||||||
feed="https://thefederalist.com/feed"
|
feed="https://thefederalist.com/feed"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user