parsing improvements for terminus.i2p and gitea rss feeds, separate clean channel for clearnet bridging

2025-09-19 20:02:43 +00:00 · 2025-09-19 20:02:43 +00:00 · 4d4dee54e5
parent 8937de048b
commit 4d4dee54e5
4 changed files with 1516 additions and 1394 deletions
--- a/app.py
+++ b/app.py
@ -133,7 +133,7 @@ generate_rss_feed()
 rss_icon = get_svg(script_directory, 'rss')
 up_icon = get_svg(script_directory, 'up')
 go_up = f'''<a href="#top" id="btn">{up_icon}</button>'''
-footer = f'''|<a href="http://skank.i2p" target="_blank"> Running on I2P+</a> | <a href="/tos">TOS</a> | <a href="/changelog">Changelog</a> | <a href="{ah}">AH</a> | <a href="http://status.simp.i2p" target="_blank">Status</a> | <a href="http://teq64ym42ixllnnu555jbshqmknhrar2l77gugarud7oogyskfla.b32.i2p/nyKN0De0vnXLPwHIPBB23w" rel="nofollow"><img src="/static/imgs/cum.png" alt="a snack for later"></a><a href="/links" rel="nofollow"><img src="/static/imgs/cum.png" alt="you're still hungry?"></a>'''
+footer = f'''<a href="http://skank.i2p" target="_blank"> Running on I2P+</a> | <a href="/tos">TOS</a> | <a href="/changelog">Changelog</a> | <a href="{ah}">AH</a> | <a href="http://simp.i2p/donate" target="_blank">Donate</a> | <a href="http://status.simp.i2p" target="_blank">Status</a> <a href="http://teq64ym42ixllnnu555jbshqmknhrar2l77gugarud7oogyskfla.b32.i2p/nyKN0De0vnXLPwHIPBB23w" rel="nofollow"><img src="/static/imgs/cum.png" alt="a snack for later"></a><a href="/links" rel="nofollow"><img src="/static/imgs/cum.png" alt="you're still hungry?"></a>'''
 ########
 #dbs
 ########
@ -599,6 +599,53 @@ class Irc_bot:
 					y += split_i
 			else:
 				self.msg_l.append([msg, channel])
+				
+		def get_title(shorten, latest_article):
+			gits = [
+				'git.simp.i2p',
+				'git.community.i2p',
+				'git.idk.i2p',
+			]
+			if shorten in gits:
+				try:
+					title = (latest_article.content[0]['value'].split('\n')[1])
+				except Exception as e:
+					title = latest_article.title
+			elif 'title' in latest_article:
+				title = latest_article.title
+			else:
+				if 'terminus_answer' in latest_article:
+					post_number_list = latest_article.terminus_answer.split(' ')
+					post_number = post_number_list[len(post_number_list)-1]
+					terminus_answer = f' reply to #{post_number} '
+				else:
+					post_number = ''
+					terminus_answer = 'new post'
+				if 'terminus_timestamp' in latest_article:
+					terminus_timestamp = f' at {latest_article.terminus_timestamp} '
+				else:
+					terminus_timestamp = ''
+				if 'terminus_author' in latest_article:
+					terminus_author = f' by {latest_article.terminus_author} '
+				else:
+					terminus_author = ' anon '
+				def strip_bbcode(msg, post_number):
+					max_chars = 200
+					s = [
+						']', '[b', '[i', '[li', '[s', '[sp', '[h', '[u', '[url', '[wiki', '[t', '[find', '"', '\n', '>', '>>'
+					]
+					if len(post_number) != 0:
+						s.append(post_number)
+					for i in s:
+						msg = msg.replace(i, '')
+					msg_len = len(msg)
+					if msg_len > max_chars:
+						msg = f'{msg[:max_chars]}...'
+					return msg
+				description = strip_bbcode(latest_article.description, post_number)
+				title = f'/s {latest_article.terminus_sub}{terminus_timestamp}{terminus_answer}{terminus_author} {description}'
+			return title
+	
 		async def get_feed(
 			session: aiohttp.ClientSession,
 			color: str,
@ -648,19 +695,26 @@ class Irc_bot:
 				feed = feedparser.parse(html)
 				if feed.entries:
 					self.feed_count += 1
+					shorten = get_short_url(url)
 					latest_article = feed.entries[0]
-                    title = latest_article.title
-                    link = latest_article.link
+					# title = latest_article.title
+					title = get_title(shorten, latest_article)
+					# link = latest_article.link
 					# print(f"[Processing] TITLE {title} | feed_dict[latest] {(self.feed_dict[url])['latest']} | {(self.feed_dict[url])['first_run']}")
 					if title != (self.feed_dict[url])['latest'] and (self.feed_dict[url])['first_run'] == False:
 						for i in range(0, len(feed.entries)):
 							a = feed.entries[i]
-                            if a.title not in (self.feed_dict[url])['title']:
-                                (self.feed_dict[url])['title'].append(a.title)
+							a_title = get_title(shorten, a)
+							# if a.title not in (self.feed_dict[url])['title']:
+							if a_title not in (self.feed_dict[url])['title']:
+								(self.feed_dict[url])['title'].append(a_title)
+								if shorten != 'hackernews.i2p':
 									if a.link[:4].lower() != 'http':
 										post_url = f'{url}{a.link}'
 									else:
 										post_url = a.link
+								else:
+									post_url = a.comments.replace('https://news.ycombinator.com', 'http://hackernews.i2p')
 								(self.feed_dict[url])['link'].append(post_url)
 								try:
 									(self.feed_dict[url])['description'].append(a.description)
@ -674,7 +728,17 @@ class Irc_bot:
 						msg = []
 						entries = len((self.feed_dict[url])['title'])
 						irc_pulls = entries - self.max_posts_per_pull 
+						switch_urls = [
+							'iranfreedom.org',
+							'paltepuk.neocities.org',
+							'127.0.0.1',
+							'127.0.0.1:7672',
+							'localhost',
+							'localhost:7672',
+						]
+						forbidden = ['#i2p-news-clean']
 						for i in range(0, entries):
+						# for i in range(entries, 0, -1):
 							if (self.feed_dict[url])['posted'][i] == False:
 								msg_title = (self.feed_dict[url])['title'][i]
 								msg_description = (self.feed_dict[url])['description'][i]
@ -682,10 +746,19 @@ class Irc_bot:
 									msg_description = f'{msg_description[:50]}...'
 								msg_url = (self.feed_dict[url])['link'][i]
 								col = self.color_dict[feed_url[url]['category']]
-                                shorten = get_short_url(url)
-                                if get_host(msg_url).casefold() == '127.0.0.1':
-                                    url_q = msg_url.split(shorten)[1]
-                                    msg_url = f'http://{shorten}{url_q}'
+								the_host = get_host(msg_url)
+								# if the_host in switch_urls:
+								#     # url_q = msg_url.split(shorten)[1]
+								#     url_q = msg_url.replace(the_host, shorten)
+								#     msg_url = f'http://{url_q}'
+								if the_host in switch_urls:
+									url_q = msg_url.replace(the_host, shorten)
+									if len(url_q) > 7:
+										url_q = url_q.replace('https://', 'http://')
+										if url_q[:7] != 'http://':
+											url_q = f'http://{url_q}'
+									msg_url = url_q
+								
 								msg_s = (f"{col}[{shorten}]{Color.normal} {msg_title} - {msg_url}")
 								# msg_f = f'''{msg_title} - {msg_description} ::{msg_url}'''
 								msg_f = f'''::{msg_url} @@@{msg_title}'''
@ -693,11 +766,11 @@ class Irc_bot:
 								self.new_posts += 1
 								if i >= irc_pulls:
 									for item in self.channel_join:
+										if item not in forbidden:
 											send_irc_msg(self, msg_s, self.irc, item)
 								save_feed_file(self, f'''{shorten}:{feed_url[url]['category']}''', msg_f, script_directory, feed_url[url]['category'])
 								save_feed_file(self, f'''{shorten}:{feed_url[url]['category']}''', msg_f, script_directory, 'all')
 								# if feed_url[url]['category'] == 'torrents':
-
 								if shorten.lower() == 'tracker2.postman.i2p' or is_url_ours(url) == True or feed_url[url]['category'].lower() == 'torrents':
 									for c in self.channel_send:
 										payload = {
@ -707,6 +780,17 @@ class Irc_bot:
 										'url': msg_url,
 										}
 										send_message(self.bot_send, payload)    
+								else:
+									for item in forbidden:
+										send_irc_msg(self, msg_s, self.irc, item)
+								if shorten.lower() == 'gatheryourparty.i2p':
+									payload = {
+									'sender': get_short_url(url),
+									'send_to': '#gatheryourparty',
+									'title': msg_title,
+									'url': msg_url,
+									}
+									send_message(self.bot_send, payload) 
 						generate_rss_feed()

 						
@ -720,8 +804,9 @@ class Irc_bot:
 						(self.feed_dict[url])['first_run'] = False
 						for i in range(0, len(feed.entries)):
 							a = feed.entries[i]
-                            if a.title not in (self.feed_dict[url])['title']:
-                                (self.feed_dict[url])['title'].append(a.title)
+							a_title = get_title(shorten, a)
+							if a_title not in (self.feed_dict[url])['title']:
+								(self.feed_dict[url])['title'].append(a_title)
 								(self.feed_dict[url])['link'].append(a.link)
 								try:
 									(self.feed_dict[url])['description'].append(a.description)
@ -734,6 +819,7 @@ class Irc_bot:
 			except Exception as e:
 				print(f'[{str(self.feed_count)}/{str(len(self.feed_dict))}] RSS feed error {e} on {get_short_url(url)}')

+
 		async def gather_feeds(self, feeds, proxy_url, **kwargs):
 			async with aiohttp.ClientSession() as session:
 				tasks = []
@ -962,7 +1048,7 @@ class Irc_bot:
 									self.new_users.pop(joined_named)
 									# msg.append(f'no longer new user: {joined_named}') 
 						else:
-                            msg.append(f'new user')
+							# msg.append(f'new user')
 							if joined_named not in self.new_users:
 								self.new_users[joined_named] = 0
 							time_now_obj = datetime.now()
@ -1046,6 +1132,8 @@ class Irc_bot:
 						new_nick = True
 					elif line[3].lower() == ":if" and line[4].lower() == 'you':
 						identify_bot(self)
+					elif line[2] == 375:
+						identify_bot(self)
 					elif line[3].lower() == ":this" and line[6].lower() == 'registered.':
 						identify_bot(self) 
 					elif user.lower() == 'nickserv':
@ -1369,7 +1457,7 @@ class Irc_bot:
 					send_to = where_to_send(line)
 					msg = [
 					f'i follow {str(len(feed_url))} RSS feeds accross i2p for updates.',
-                    f'message {primary} if you want your feed added here. It must be accessible within i2p',
+					f'message an OP if you want your feed added here. It must be accessible within i2p',
 					]
 					for item in msg:
 						send_irc_msg(self, item, self.irc, send_to)
@ -1448,6 +1536,9 @@ class Irc_bot:
 							register_bot(self, line)
 						elif line[1] == '451' and line[2] == username:
 							register_bot(self, line)
+						elif line[1] == '376' or line[1] == '266':
+							print('IDENTIFY BOT')
+							identify_bot(self)            
 						elif len(line) > 3:
 							if len(line[3]) > 2:
 								if (line[3])[1:] == f'{tr}admin':  
@ -1466,7 +1557,7 @@ class Irc_bot:
 			return(state)  
 		self.irc = irc_connect(self.username, self.password, self.hostname, self.port)
 		
-        time.sleep(7)
+		time.sleep(5)
 		JOI = f'JOIN {self.channel_join}\r\n'
 		JOIN = JOI.encode(encoding='UTF-8',errors='strict')
 		PASS = f'PRIVMSG NickServ IDENTIFY {self.password}\r\n'
--- a/changelognewsbot.txt
+++ b/changelognewsbot.txt
@ -1,3 +1,6 @@
+19/9/2025: added better parsing for terminus.i2p and rss feeds from gitea, separate "clean" channel (no torrents)
+18/09/2025: added 25 new rss feeds, removed some dead ones
+28/03/2025: added rss feeds from git.simp.i2p along with other new feeds, fixed another bug with  html code showing up correctly, updated footer
 21/01/2025: Improvements in parsing urls and html tags
 08/12/2024: CSS improvements, thank you to dr|z3d, bug fixes
 30/11/2024: Live testing in IRC, added /feedlinks so alive feeds can be copied as a list
--- a/functions.py
+++ b/functions.py
@ -295,7 +295,7 @@ def format_msg(single_msg_raw, topic):
                post_url = f'http://{nick_raw}{l[i][2:]}'
            else:
                post_url = (single_msg.split('::')[1]).split(' @@@')[0]
-            if get_host(post_url).casefold() == '127.0.0.1':
+            if get_host(post_url) == '127.0.0.1':
                url_q = post_url.split(get_short_url(post_url))[1]
                post_url = f'http://{nick_raw}{url_q}'
            l[i] = ''
--- a/rsslist.txt
+++ b/rsslist.txt
@ -1,12 +1,10 @@
 http://simp.i2p/feed torrents
 http://righttoprivacy.i2p/rss/ blog
 http://angelwood.i2p/feed.rss blog
-http://gl6vzyjnv62kp3vjmmouwlvsrbp2bujk2cev6frgnln5vfepy2xq.b32.i2p/index.xml blog
 http://dujemihanovic.i2p/index.xml blog
 http://torrentfreak.i2p/feed/ torrents
 http://arstechnica.i2p/feed news
 http://simp.i2p/blog/feed blog
-http://ghativega.i2p/atom.xml blog
 http://cool-website.i2p/rss.xml blog
 http://jacksonchen666.i2p/feeds/ blog
 http://mdleom.i2p/atom.xml blog
@ -19,9 +17,7 @@ http://ellipticnews.i2p/feeds/posts/default blog
 http://kuukkanen.i2p/feed.xml blog
 http://hackaday.i2p/feed news
 http://hongkongfreepress.i2p/ news
-http://git.idk.i2p/i2p-hackers/i2p.i2p/-/merge_requests.atom?state=opened developer
 http://zzz.i2p/topics.rss forum
-http://git.idk.i2p/i2p-hackers/i2p.i2p/-/issues.atom?state=opened developer
 http://stats.i2p/cgi-bin/newhosts.xml blog
 http://tracker2.postman.i2p/?view=RSS&mapset=-1 torrents
 http://hq.postman.i2p/?feed=atom blog
@ -36,7 +32,6 @@ http://tails.i2p/news/index.en.atom blog
 http://discuss.i2p/app.php/feed/forums?sid=69da73f2a323f778252a08a4c93a887d forum
 http://sciencedaily.i2p/rss news
 http://schneieronsecurity.i2p/rss news
-http://orizuru.i2p/atom.xml news
 http://s-config.i2p/rss blog
 http://shreddit.i2p/r/i2p/.rss forum
 http://natter.i2p/StormyCloudInc/rss forum
@ -51,27 +46,60 @@ http://techxplore.i2p/rss-feed/ news
 http://notbob.i2p/blog.xml blog
 http://theatlantic.i2p/feed/all/ news
 http://i2p-projekt.i2p/en/feed/blog/atom developer
-http://amnesie.i2p/rss.xml blog
 http://1337z.i2p/rss.xml blog
 http://git.skank.i2p/rez/plus.atom developer
 http://hackernews.i2p/rss news
 http://paltepuk.i2p/blog/index.i2p.xml blog
 http://deurachavich.i2p/rss.xml blog
-http://fury.i2p/feed.xml blog
-https://blog.everypizza.im/feed/feed.xml blog
-http://mdleom.i2p/atom.xml blog
-http://masflam.i2p/blog/feed.rss blog
 http://libresolutionsnetwork.i2p/rss blog
 http://jacksonchen666.i2p/posts/index.xml blog
 http://dankaminsky.i2p/feed blog
-http://darkrealm.i2p/index.php?act=rss blog 
 http://franciscogg.i2p/rss.xml blog
 http://shadowforums.i2p/!feed forum
+http://darkrealm.i2p/index.php?act=rss blog
 http://kulervod.i2p/feeds/local.xml?sort=Active forum
 http://git.simp.i2p/simp/rayhunter.rss developer
+http://git.simp.i2p/simp/guessthesong.rss developer
 http://git.simp.i2p/fuzzykitten/dev_endboard.rss developer
 http://git.simp.i2p/simp/i2music.rss developer
 http://git.simp.i2p/simp/i2pnews.rss developer
 http://git.simp.i2p/simp/shorturl.rss developer
-http://taz.i2p/rss.xml blog
-http://forum.midgard.i2p/syndication.php forum
+http://git.simp.i2p/simp/TuckIt.rss developer
+http://terminus.i2p/rss forum
+http://git.idk.i2p/I2P_Developers/i2p.i2p.rss developer
+http://git.idk.i2p/idk/Go_I2p.rss developer
+http://git.idk.i2p/I2P_Developers/i2p.plugins.zzzot.rss developer
+http://git.idk.i2p/idk/I2PSnark-RPC.rss developer
+http://git.community.i2p/PurpleI2P/pyseeder.rss developer
+http://git.community.i2p/PurpleI2P/i2pd-tools.rss developer
+http://git.community.i2p/PurpleI2P/i2pd.rss developer
+http://gatheryourparty.i2p/rss.xml blog
+http://monkemanx.i2p/index.xml blog
+http://masflam.i2p/feed/ blog
+http://thricegreat.i2p/rss.xml blog
+http://taiwan.i2p/rss2.xml blog
+http://shittyweb.i2p/blog/feed.xml blog
+http://silosneeded.i2p/feed.xml blog
+http://pabloshell.i2p/rss.xml blog
+http://masflam.i2p/feed/?type=rss blog
+http://maidzone.i2p/blog.rss blog
+http://med0ed.i2p/rss.xml blog
+http://jerryhome.i2p/atom.xml blog
+http://lulu-cats.i2p/index.xml blog
+http://joshuatshaffer.i2p/index.xml blog
+http://jakob.i2p/feed.xml blog
+http://itphx.i2p/feed blog
+http://gettie.i2p/rss.xml blog
+http://darksavantcrusaders.i2p/feed.xml blog
+http://cxj.i2p/index.xml blog
+http://cosmicflow.i2p/rss/news.xml blog
+http://gedanken.i2p/feed.rss blog
+http://iranfreedom.i2p/en/feed/ news
+http://opendemocracy.i2p/feed news
+http://sur.i2p/feed news
+http://upstreamjournal.i2p/feed news
+http://clap.i2p/feed news
+http://git.simp.i2p/simp/Eepstore.rss developer
+http://git.simp.i2p/simp/emissary.rss developer
+http://git.simp.i2p/simp/mantaray.rss developer
+http://git.simp.i2p/simp/yosemite.rss developer