parsing improvements for terminus.i2p and gitea rss feeds, separate clean channel for clearnet bridging

main
simp 2025-09-19 20:02:43 +00:00
parent 8937de048b
commit 4d4dee54e5
4 changed files with 1516 additions and 1394 deletions

121
app.py
View File

@ -133,7 +133,7 @@ generate_rss_feed()
rss_icon = get_svg(script_directory, 'rss')
up_icon = get_svg(script_directory, 'up')
go_up = f'''<a href="#top" id="btn">{up_icon}</button>'''
footer = f'''|<a href="http://skank.i2p" target="_blank"> Running on I2P+</a> | <a href="/tos">TOS</a> | <a href="/changelog">Changelog</a> | <a href="{ah}">AH</a> | <a href="http://status.simp.i2p" target="_blank">Status</a> | <a href="http://teq64ym42ixllnnu555jbshqmknhrar2l77gugarud7oogyskfla.b32.i2p/nyKN0De0vnXLPwHIPBB23w" rel="nofollow"><img src="/static/imgs/cum.png" alt="a snack for later"></a><a href="/links" rel="nofollow"><img src="/static/imgs/cum.png" alt="you're still hungry?"></a>'''
footer = f'''<a href="http://skank.i2p" target="_blank"> Running on I2P+</a> | <a href="/tos">TOS</a> | <a href="/changelog">Changelog</a> | <a href="{ah}">AH</a> | <a href="http://simp.i2p/donate" target="_blank">Donate</a> | <a href="http://status.simp.i2p" target="_blank">Status</a> <a href="http://teq64ym42ixllnnu555jbshqmknhrar2l77gugarud7oogyskfla.b32.i2p/nyKN0De0vnXLPwHIPBB23w" rel="nofollow"><img src="/static/imgs/cum.png" alt="a snack for later"></a><a href="/links" rel="nofollow"><img src="/static/imgs/cum.png" alt="you're still hungry?"></a>'''
########
#dbs
########
@ -599,6 +599,53 @@ class Irc_bot:
y += split_i
else:
self.msg_l.append([msg, channel])
def get_title(shorten, latest_article):
gits = [
'git.simp.i2p',
'git.community.i2p',
'git.idk.i2p',
]
if shorten in gits:
try:
title = (latest_article.content[0]['value'].split('\n')[1])
except Exception as e:
title = latest_article.title
elif 'title' in latest_article:
title = latest_article.title
else:
if 'terminus_answer' in latest_article:
post_number_list = latest_article.terminus_answer.split(' ')
post_number = post_number_list[len(post_number_list)-1]
terminus_answer = f' reply to #{post_number} '
else:
post_number = ''
terminus_answer = 'new post'
if 'terminus_timestamp' in latest_article:
terminus_timestamp = f' at {latest_article.terminus_timestamp} '
else:
terminus_timestamp = ''
if 'terminus_author' in latest_article:
terminus_author = f' by {latest_article.terminus_author} '
else:
terminus_author = ' anon '
def strip_bbcode(msg, post_number):
max_chars = 200
s = [
']', '[b', '[i', '[li', '[s', '[sp', '[h', '[u', '[url', '[wiki', '[t', '[find', '"', '\n', '>', '>>'
]
if len(post_number) != 0:
s.append(post_number)
for i in s:
msg = msg.replace(i, '')
msg_len = len(msg)
if msg_len > max_chars:
msg = f'{msg[:max_chars]}...'
return msg
description = strip_bbcode(latest_article.description, post_number)
title = f'/s {latest_article.terminus_sub}{terminus_timestamp}{terminus_answer}{terminus_author} {description}'
return title
async def get_feed(
session: aiohttp.ClientSession,
color: str,
@ -648,19 +695,26 @@ class Irc_bot:
feed = feedparser.parse(html)
if feed.entries:
self.feed_count += 1
shorten = get_short_url(url)
latest_article = feed.entries[0]
title = latest_article.title
link = latest_article.link
# title = latest_article.title
title = get_title(shorten, latest_article)
# link = latest_article.link
# print(f"[Processing] TITLE {title} | feed_dict[latest] {(self.feed_dict[url])['latest']} | {(self.feed_dict[url])['first_run']}")
if title != (self.feed_dict[url])['latest'] and (self.feed_dict[url])['first_run'] == False:
for i in range(0, len(feed.entries)):
a = feed.entries[i]
if a.title not in (self.feed_dict[url])['title']:
(self.feed_dict[url])['title'].append(a.title)
a_title = get_title(shorten, a)
# if a.title not in (self.feed_dict[url])['title']:
if a_title not in (self.feed_dict[url])['title']:
(self.feed_dict[url])['title'].append(a_title)
if shorten != 'hackernews.i2p':
if a.link[:4].lower() != 'http':
post_url = f'{url}{a.link}'
else:
post_url = a.link
else:
post_url = a.comments.replace('https://news.ycombinator.com', 'http://hackernews.i2p')
(self.feed_dict[url])['link'].append(post_url)
try:
(self.feed_dict[url])['description'].append(a.description)
@ -674,7 +728,17 @@ class Irc_bot:
msg = []
entries = len((self.feed_dict[url])['title'])
irc_pulls = entries - self.max_posts_per_pull
switch_urls = [
'iranfreedom.org',
'paltepuk.neocities.org',
'127.0.0.1',
'127.0.0.1:7672',
'localhost',
'localhost:7672',
]
forbidden = ['#i2p-news-clean']
for i in range(0, entries):
# for i in range(entries, 0, -1):
if (self.feed_dict[url])['posted'][i] == False:
msg_title = (self.feed_dict[url])['title'][i]
msg_description = (self.feed_dict[url])['description'][i]
@ -682,10 +746,19 @@ class Irc_bot:
msg_description = f'{msg_description[:50]}...'
msg_url = (self.feed_dict[url])['link'][i]
col = self.color_dict[feed_url[url]['category']]
shorten = get_short_url(url)
if get_host(msg_url).casefold() == '127.0.0.1':
url_q = msg_url.split(shorten)[1]
msg_url = f'http://{shorten}{url_q}'
the_host = get_host(msg_url)
# if the_host in switch_urls:
# # url_q = msg_url.split(shorten)[1]
# url_q = msg_url.replace(the_host, shorten)
# msg_url = f'http://{url_q}'
if the_host in switch_urls:
url_q = msg_url.replace(the_host, shorten)
if len(url_q) > 7:
url_q = url_q.replace('https://', 'http://')
if url_q[:7] != 'http://':
url_q = f'http://{url_q}'
msg_url = url_q
msg_s = (f"{col}[{shorten}]{Color.normal} {msg_title} - {msg_url}")
# msg_f = f'''{msg_title} - {msg_description} ::{msg_url}'''
msg_f = f'''::{msg_url} @@@{msg_title}'''
@ -693,11 +766,11 @@ class Irc_bot:
self.new_posts += 1
if i >= irc_pulls:
for item in self.channel_join:
if item not in forbidden:
send_irc_msg(self, msg_s, self.irc, item)
save_feed_file(self, f'''{shorten}:{feed_url[url]['category']}''', msg_f, script_directory, feed_url[url]['category'])
save_feed_file(self, f'''{shorten}:{feed_url[url]['category']}''', msg_f, script_directory, 'all')
# if feed_url[url]['category'] == 'torrents':
if shorten.lower() == 'tracker2.postman.i2p' or is_url_ours(url) == True or feed_url[url]['category'].lower() == 'torrents':
for c in self.channel_send:
payload = {
@ -707,6 +780,17 @@ class Irc_bot:
'url': msg_url,
}
send_message(self.bot_send, payload)
else:
for item in forbidden:
send_irc_msg(self, msg_s, self.irc, item)
if shorten.lower() == 'gatheryourparty.i2p':
payload = {
'sender': get_short_url(url),
'send_to': '#gatheryourparty',
'title': msg_title,
'url': msg_url,
}
send_message(self.bot_send, payload)
generate_rss_feed()
@ -720,8 +804,9 @@ class Irc_bot:
(self.feed_dict[url])['first_run'] = False
for i in range(0, len(feed.entries)):
a = feed.entries[i]
if a.title not in (self.feed_dict[url])['title']:
(self.feed_dict[url])['title'].append(a.title)
a_title = get_title(shorten, a)
if a_title not in (self.feed_dict[url])['title']:
(self.feed_dict[url])['title'].append(a_title)
(self.feed_dict[url])['link'].append(a.link)
try:
(self.feed_dict[url])['description'].append(a.description)
@ -734,6 +819,7 @@ class Irc_bot:
except Exception as e:
print(f'[{str(self.feed_count)}/{str(len(self.feed_dict))}] RSS feed error {e} on {get_short_url(url)}')
async def gather_feeds(self, feeds, proxy_url, **kwargs):
async with aiohttp.ClientSession() as session:
tasks = []
@ -962,7 +1048,7 @@ class Irc_bot:
self.new_users.pop(joined_named)
# msg.append(f'no longer new user: {joined_named}')
else:
msg.append(f'new user')
# msg.append(f'new user')
if joined_named not in self.new_users:
self.new_users[joined_named] = 0
time_now_obj = datetime.now()
@ -1046,6 +1132,8 @@ class Irc_bot:
new_nick = True
elif line[3].lower() == ":if" and line[4].lower() == 'you':
identify_bot(self)
elif line[2] == 375:
identify_bot(self)
elif line[3].lower() == ":this" and line[6].lower() == 'registered.':
identify_bot(self)
elif user.lower() == 'nickserv':
@ -1369,7 +1457,7 @@ class Irc_bot:
send_to = where_to_send(line)
msg = [
f'i follow {str(len(feed_url))} RSS feeds accross i2p for updates.',
f'message {primary} if you want your feed added here. It must be accessible within i2p',
f'message an OP if you want your feed added here. It must be accessible within i2p',
]
for item in msg:
send_irc_msg(self, item, self.irc, send_to)
@ -1448,6 +1536,9 @@ class Irc_bot:
register_bot(self, line)
elif line[1] == '451' and line[2] == username:
register_bot(self, line)
elif line[1] == '376' or line[1] == '266':
print('IDENTIFY BOT')
identify_bot(self)
elif len(line) > 3:
if len(line[3]) > 2:
if (line[3])[1:] == f'{tr}admin':
@ -1466,7 +1557,7 @@ class Irc_bot:
return(state)
self.irc = irc_connect(self.username, self.password, self.hostname, self.port)
time.sleep(7)
time.sleep(5)
JOI = f'JOIN {self.channel_join}\r\n'
JOIN = JOI.encode(encoding='UTF-8',errors='strict')
PASS = f'PRIVMSG NickServ IDENTIFY {self.password}\r\n'

View File

@ -1,3 +1,6 @@
19/9/2025: added better parsing for terminus.i2p and rss feeds from gitea, separate "clean" channel (no torrents)
18/09/2025: added 25 new rss feeds, removed some dead ones
28/03/2025: added rss feeds from git.simp.i2p along with other new feeds, fixed another bug with html code showing up correctly, updated footer
21/01/2025: Improvements in parsing urls and html tags
08/12/2024: CSS improvements, thank you to dr|z3d, bug fixes
30/11/2024: Live testing in IRC, added /feedlinks so alive feeds can be copied as a list

View File

@ -295,7 +295,7 @@ def format_msg(single_msg_raw, topic):
post_url = f'http://{nick_raw}{l[i][2:]}'
else:
post_url = (single_msg.split('::')[1]).split(' @@@')[0]
if get_host(post_url).casefold() == '127.0.0.1':
if get_host(post_url) == '127.0.0.1':
url_q = post_url.split(get_short_url(post_url))[1]
post_url = f'http://{nick_raw}{url_q}'
l[i] = ''

View File

@ -1,12 +1,10 @@
http://simp.i2p/feed torrents
http://righttoprivacy.i2p/rss/ blog
http://angelwood.i2p/feed.rss blog
http://gl6vzyjnv62kp3vjmmouwlvsrbp2bujk2cev6frgnln5vfepy2xq.b32.i2p/index.xml blog
http://dujemihanovic.i2p/index.xml blog
http://torrentfreak.i2p/feed/ torrents
http://arstechnica.i2p/feed news
http://simp.i2p/blog/feed blog
http://ghativega.i2p/atom.xml blog
http://cool-website.i2p/rss.xml blog
http://jacksonchen666.i2p/feeds/ blog
http://mdleom.i2p/atom.xml blog
@ -19,9 +17,7 @@ http://ellipticnews.i2p/feeds/posts/default blog
http://kuukkanen.i2p/feed.xml blog
http://hackaday.i2p/feed news
http://hongkongfreepress.i2p/ news
http://git.idk.i2p/i2p-hackers/i2p.i2p/-/merge_requests.atom?state=opened developer
http://zzz.i2p/topics.rss forum
http://git.idk.i2p/i2p-hackers/i2p.i2p/-/issues.atom?state=opened developer
http://stats.i2p/cgi-bin/newhosts.xml blog
http://tracker2.postman.i2p/?view=RSS&mapset=-1 torrents
http://hq.postman.i2p/?feed=atom blog
@ -36,7 +32,6 @@ http://tails.i2p/news/index.en.atom blog
http://discuss.i2p/app.php/feed/forums?sid=69da73f2a323f778252a08a4c93a887d forum
http://sciencedaily.i2p/rss news
http://schneieronsecurity.i2p/rss news
http://orizuru.i2p/atom.xml news
http://s-config.i2p/rss blog
http://shreddit.i2p/r/i2p/.rss forum
http://natter.i2p/StormyCloudInc/rss forum
@ -51,27 +46,60 @@ http://techxplore.i2p/rss-feed/ news
http://notbob.i2p/blog.xml blog
http://theatlantic.i2p/feed/all/ news
http://i2p-projekt.i2p/en/feed/blog/atom developer
http://amnesie.i2p/rss.xml blog
http://1337z.i2p/rss.xml blog
http://git.skank.i2p/rez/plus.atom developer
http://hackernews.i2p/rss news
http://paltepuk.i2p/blog/index.i2p.xml blog
http://deurachavich.i2p/rss.xml blog
http://fury.i2p/feed.xml blog
https://blog.everypizza.im/feed/feed.xml blog
http://mdleom.i2p/atom.xml blog
http://masflam.i2p/blog/feed.rss blog
http://libresolutionsnetwork.i2p/rss blog
http://jacksonchen666.i2p/posts/index.xml blog
http://dankaminsky.i2p/feed blog
http://darkrealm.i2p/index.php?act=rss blog
http://franciscogg.i2p/rss.xml blog
http://shadowforums.i2p/!feed forum
http://darkrealm.i2p/index.php?act=rss blog
http://kulervod.i2p/feeds/local.xml?sort=Active forum
http://git.simp.i2p/simp/rayhunter.rss developer
http://git.simp.i2p/simp/guessthesong.rss developer
http://git.simp.i2p/fuzzykitten/dev_endboard.rss developer
http://git.simp.i2p/simp/i2music.rss developer
http://git.simp.i2p/simp/i2pnews.rss developer
http://git.simp.i2p/simp/shorturl.rss developer
http://taz.i2p/rss.xml blog
http://forum.midgard.i2p/syndication.php forum
http://git.simp.i2p/simp/TuckIt.rss developer
http://terminus.i2p/rss forum
http://git.idk.i2p/I2P_Developers/i2p.i2p.rss developer
http://git.idk.i2p/idk/Go_I2p.rss developer
http://git.idk.i2p/I2P_Developers/i2p.plugins.zzzot.rss developer
http://git.idk.i2p/idk/I2PSnark-RPC.rss developer
http://git.community.i2p/PurpleI2P/pyseeder.rss developer
http://git.community.i2p/PurpleI2P/i2pd-tools.rss developer
http://git.community.i2p/PurpleI2P/i2pd.rss developer
http://gatheryourparty.i2p/rss.xml blog
http://monkemanx.i2p/index.xml blog
http://masflam.i2p/feed/ blog
http://thricegreat.i2p/rss.xml blog
http://taiwan.i2p/rss2.xml blog
http://shittyweb.i2p/blog/feed.xml blog
http://silosneeded.i2p/feed.xml blog
http://pabloshell.i2p/rss.xml blog
http://masflam.i2p/feed/?type=rss blog
http://maidzone.i2p/blog.rss blog
http://med0ed.i2p/rss.xml blog
http://jerryhome.i2p/atom.xml blog
http://lulu-cats.i2p/index.xml blog
http://joshuatshaffer.i2p/index.xml blog
http://jakob.i2p/feed.xml blog
http://itphx.i2p/feed blog
http://gettie.i2p/rss.xml blog
http://darksavantcrusaders.i2p/feed.xml blog
http://cxj.i2p/index.xml blog
http://cosmicflow.i2p/rss/news.xml blog
http://gedanken.i2p/feed.rss blog
http://iranfreedom.i2p/en/feed/ news
http://opendemocracy.i2p/feed news
http://sur.i2p/feed news
http://upstreamjournal.i2p/feed news
http://clap.i2p/feed news
http://git.simp.i2p/simp/Eepstore.rss developer
http://git.simp.i2p/simp/emissary.rss developer
http://git.simp.i2p/simp/mantaray.rss developer
http://git.simp.i2p/simp/yosemite.rss developer