mirror of http://git.simp.i2p/simp/i2pnews.git
parsing improvements for terminus.i2p and gitea rss feeds, separate clean channel for clearnet bridging
parent
8937de048b
commit
4d4dee54e5
123
app.py
123
app.py
|
@ -133,7 +133,7 @@ generate_rss_feed()
|
|||
rss_icon = get_svg(script_directory, 'rss')
|
||||
up_icon = get_svg(script_directory, 'up')
|
||||
go_up = f'''<a href="#top" id="btn">{up_icon}</button>'''
|
||||
footer = f'''|<a href="http://skank.i2p" target="_blank"> Running on I2P+</a> | <a href="/tos">TOS</a> | <a href="/changelog">Changelog</a> | <a href="{ah}">AH</a> | <a href="http://status.simp.i2p" target="_blank">Status</a> | <a href="http://teq64ym42ixllnnu555jbshqmknhrar2l77gugarud7oogyskfla.b32.i2p/nyKN0De0vnXLPwHIPBB23w" rel="nofollow"><img src="/static/imgs/cum.png" alt="a snack for later"></a><a href="/links" rel="nofollow"><img src="/static/imgs/cum.png" alt="you're still hungry?"></a>'''
|
||||
footer = f'''<a href="http://skank.i2p" target="_blank"> Running on I2P+</a> | <a href="/tos">TOS</a> | <a href="/changelog">Changelog</a> | <a href="{ah}">AH</a> | <a href="http://simp.i2p/donate" target="_blank">Donate</a> | <a href="http://status.simp.i2p" target="_blank">Status</a> <a href="http://teq64ym42ixllnnu555jbshqmknhrar2l77gugarud7oogyskfla.b32.i2p/nyKN0De0vnXLPwHIPBB23w" rel="nofollow"><img src="/static/imgs/cum.png" alt="a snack for later"></a><a href="/links" rel="nofollow"><img src="/static/imgs/cum.png" alt="you're still hungry?"></a>'''
|
||||
########
|
||||
#dbs
|
||||
########
|
||||
|
@ -488,7 +488,7 @@ class Irc_bot:
|
|||
self.feed_dict = {}
|
||||
self.new_posts = 0
|
||||
self.feed_count = 0
|
||||
self.channel_send =['#torrents', '#torrent']
|
||||
self.channel_send = ['#torrents', '#torrent']
|
||||
self.bot_send = [bot_send]
|
||||
self.color_dict = {
|
||||
'blog': Color.pink,
|
||||
|
@ -599,6 +599,53 @@ class Irc_bot:
|
|||
y += split_i
|
||||
else:
|
||||
self.msg_l.append([msg, channel])
|
||||
|
||||
def get_title(shorten, latest_article):
|
||||
gits = [
|
||||
'git.simp.i2p',
|
||||
'git.community.i2p',
|
||||
'git.idk.i2p',
|
||||
]
|
||||
if shorten in gits:
|
||||
try:
|
||||
title = (latest_article.content[0]['value'].split('\n')[1])
|
||||
except Exception as e:
|
||||
title = latest_article.title
|
||||
elif 'title' in latest_article:
|
||||
title = latest_article.title
|
||||
else:
|
||||
if 'terminus_answer' in latest_article:
|
||||
post_number_list = latest_article.terminus_answer.split(' ')
|
||||
post_number = post_number_list[len(post_number_list)-1]
|
||||
terminus_answer = f' reply to #{post_number} '
|
||||
else:
|
||||
post_number = ''
|
||||
terminus_answer = 'new post'
|
||||
if 'terminus_timestamp' in latest_article:
|
||||
terminus_timestamp = f' at {latest_article.terminus_timestamp} '
|
||||
else:
|
||||
terminus_timestamp = ''
|
||||
if 'terminus_author' in latest_article:
|
||||
terminus_author = f' by {latest_article.terminus_author} '
|
||||
else:
|
||||
terminus_author = ' anon '
|
||||
def strip_bbcode(msg, post_number):
|
||||
max_chars = 200
|
||||
s = [
|
||||
']', '[b', '[i', '[li', '[s', '[sp', '[h', '[u', '[url', '[wiki', '[t', '[find', '"', '\n', '>', '>>'
|
||||
]
|
||||
if len(post_number) != 0:
|
||||
s.append(post_number)
|
||||
for i in s:
|
||||
msg = msg.replace(i, '')
|
||||
msg_len = len(msg)
|
||||
if msg_len > max_chars:
|
||||
msg = f'{msg[:max_chars]}...'
|
||||
return msg
|
||||
description = strip_bbcode(latest_article.description, post_number)
|
||||
title = f'/s {latest_article.terminus_sub}{terminus_timestamp}{terminus_answer}{terminus_author} {description}'
|
||||
return title
|
||||
|
||||
async def get_feed(
|
||||
session: aiohttp.ClientSession,
|
||||
color: str,
|
||||
|
@ -648,19 +695,26 @@ class Irc_bot:
|
|||
feed = feedparser.parse(html)
|
||||
if feed.entries:
|
||||
self.feed_count += 1
|
||||
shorten = get_short_url(url)
|
||||
latest_article = feed.entries[0]
|
||||
title = latest_article.title
|
||||
link = latest_article.link
|
||||
# title = latest_article.title
|
||||
title = get_title(shorten, latest_article)
|
||||
# link = latest_article.link
|
||||
# print(f"[Processing] TITLE {title} | feed_dict[latest] {(self.feed_dict[url])['latest']} | {(self.feed_dict[url])['first_run']}")
|
||||
if title != (self.feed_dict[url])['latest'] and (self.feed_dict[url])['first_run'] == False:
|
||||
for i in range(0, len(feed.entries)):
|
||||
a = feed.entries[i]
|
||||
if a.title not in (self.feed_dict[url])['title']:
|
||||
(self.feed_dict[url])['title'].append(a.title)
|
||||
a_title = get_title(shorten, a)
|
||||
# if a.title not in (self.feed_dict[url])['title']:
|
||||
if a_title not in (self.feed_dict[url])['title']:
|
||||
(self.feed_dict[url])['title'].append(a_title)
|
||||
if shorten != 'hackernews.i2p':
|
||||
if a.link[:4].lower() != 'http':
|
||||
post_url = f'{url}{a.link}'
|
||||
else:
|
||||
post_url = a.link
|
||||
else:
|
||||
post_url = a.comments.replace('https://news.ycombinator.com', 'http://hackernews.i2p')
|
||||
(self.feed_dict[url])['link'].append(post_url)
|
||||
try:
|
||||
(self.feed_dict[url])['description'].append(a.description)
|
||||
|
@ -674,7 +728,17 @@ class Irc_bot:
|
|||
msg = []
|
||||
entries = len((self.feed_dict[url])['title'])
|
||||
irc_pulls = entries - self.max_posts_per_pull
|
||||
switch_urls = [
|
||||
'iranfreedom.org',
|
||||
'paltepuk.neocities.org',
|
||||
'127.0.0.1',
|
||||
'127.0.0.1:7672',
|
||||
'localhost',
|
||||
'localhost:7672',
|
||||
]
|
||||
forbidden = ['#i2p-news-clean']
|
||||
for i in range(0, entries):
|
||||
# for i in range(entries, 0, -1):
|
||||
if (self.feed_dict[url])['posted'][i] == False:
|
||||
msg_title = (self.feed_dict[url])['title'][i]
|
||||
msg_description = (self.feed_dict[url])['description'][i]
|
||||
|
@ -682,10 +746,19 @@ class Irc_bot:
|
|||
msg_description = f'{msg_description[:50]}...'
|
||||
msg_url = (self.feed_dict[url])['link'][i]
|
||||
col = self.color_dict[feed_url[url]['category']]
|
||||
shorten = get_short_url(url)
|
||||
if get_host(msg_url).casefold() == '127.0.0.1':
|
||||
url_q = msg_url.split(shorten)[1]
|
||||
msg_url = f'http://{shorten}{url_q}'
|
||||
the_host = get_host(msg_url)
|
||||
# if the_host in switch_urls:
|
||||
# # url_q = msg_url.split(shorten)[1]
|
||||
# url_q = msg_url.replace(the_host, shorten)
|
||||
# msg_url = f'http://{url_q}'
|
||||
if the_host in switch_urls:
|
||||
url_q = msg_url.replace(the_host, shorten)
|
||||
if len(url_q) > 7:
|
||||
url_q = url_q.replace('https://', 'http://')
|
||||
if url_q[:7] != 'http://':
|
||||
url_q = f'http://{url_q}'
|
||||
msg_url = url_q
|
||||
|
||||
msg_s = (f"{col}[{shorten}]{Color.normal} {msg_title} - {msg_url}")
|
||||
# msg_f = f'''{msg_title} - {msg_description} ::{msg_url}'''
|
||||
msg_f = f'''::{msg_url} @@@{msg_title}'''
|
||||
|
@ -693,11 +766,11 @@ class Irc_bot:
|
|||
self.new_posts += 1
|
||||
if i >= irc_pulls:
|
||||
for item in self.channel_join:
|
||||
if item not in forbidden:
|
||||
send_irc_msg(self, msg_s, self.irc, item)
|
||||
save_feed_file(self, f'''{shorten}:{feed_url[url]['category']}''', msg_f, script_directory, feed_url[url]['category'])
|
||||
save_feed_file(self, f'''{shorten}:{feed_url[url]['category']}''', msg_f, script_directory, 'all')
|
||||
# if feed_url[url]['category'] == 'torrents':
|
||||
|
||||
if shorten.lower() == 'tracker2.postman.i2p' or is_url_ours(url) == True or feed_url[url]['category'].lower() == 'torrents':
|
||||
for c in self.channel_send:
|
||||
payload = {
|
||||
|
@ -707,6 +780,17 @@ class Irc_bot:
|
|||
'url': msg_url,
|
||||
}
|
||||
send_message(self.bot_send, payload)
|
||||
else:
|
||||
for item in forbidden:
|
||||
send_irc_msg(self, msg_s, self.irc, item)
|
||||
if shorten.lower() == 'gatheryourparty.i2p':
|
||||
payload = {
|
||||
'sender': get_short_url(url),
|
||||
'send_to': '#gatheryourparty',
|
||||
'title': msg_title,
|
||||
'url': msg_url,
|
||||
}
|
||||
send_message(self.bot_send, payload)
|
||||
generate_rss_feed()
|
||||
|
||||
|
||||
|
@ -720,8 +804,9 @@ class Irc_bot:
|
|||
(self.feed_dict[url])['first_run'] = False
|
||||
for i in range(0, len(feed.entries)):
|
||||
a = feed.entries[i]
|
||||
if a.title not in (self.feed_dict[url])['title']:
|
||||
(self.feed_dict[url])['title'].append(a.title)
|
||||
a_title = get_title(shorten, a)
|
||||
if a_title not in (self.feed_dict[url])['title']:
|
||||
(self.feed_dict[url])['title'].append(a_title)
|
||||
(self.feed_dict[url])['link'].append(a.link)
|
||||
try:
|
||||
(self.feed_dict[url])['description'].append(a.description)
|
||||
|
@ -734,6 +819,7 @@ class Irc_bot:
|
|||
except Exception as e:
|
||||
print(f'[{str(self.feed_count)}/{str(len(self.feed_dict))}] RSS feed error {e} on {get_short_url(url)}')
|
||||
|
||||
|
||||
async def gather_feeds(self, feeds, proxy_url, **kwargs):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = []
|
||||
|
@ -962,7 +1048,7 @@ class Irc_bot:
|
|||
self.new_users.pop(joined_named)
|
||||
# msg.append(f'no longer new user: {joined_named}')
|
||||
else:
|
||||
msg.append(f'new user')
|
||||
# msg.append(f'new user')
|
||||
if joined_named not in self.new_users:
|
||||
self.new_users[joined_named] = 0
|
||||
time_now_obj = datetime.now()
|
||||
|
@ -1046,6 +1132,8 @@ class Irc_bot:
|
|||
new_nick = True
|
||||
elif line[3].lower() == ":if" and line[4].lower() == 'you':
|
||||
identify_bot(self)
|
||||
elif line[2] == 375:
|
||||
identify_bot(self)
|
||||
elif line[3].lower() == ":this" and line[6].lower() == 'registered.':
|
||||
identify_bot(self)
|
||||
elif user.lower() == 'nickserv':
|
||||
|
@ -1369,7 +1457,7 @@ class Irc_bot:
|
|||
send_to = where_to_send(line)
|
||||
msg = [
|
||||
f'i follow {str(len(feed_url))} RSS feeds accross i2p for updates.',
|
||||
f'message {primary} if you want your feed added here. It must be accessible within i2p',
|
||||
f'message an OP if you want your feed added here. It must be accessible within i2p',
|
||||
]
|
||||
for item in msg:
|
||||
send_irc_msg(self, item, self.irc, send_to)
|
||||
|
@ -1448,6 +1536,9 @@ class Irc_bot:
|
|||
register_bot(self, line)
|
||||
elif line[1] == '451' and line[2] == username:
|
||||
register_bot(self, line)
|
||||
elif line[1] == '376' or line[1] == '266':
|
||||
print('IDENTIFY BOT')
|
||||
identify_bot(self)
|
||||
elif len(line) > 3:
|
||||
if len(line[3]) > 2:
|
||||
if (line[3])[1:] == f'{tr}admin':
|
||||
|
@ -1466,7 +1557,7 @@ class Irc_bot:
|
|||
return(state)
|
||||
self.irc = irc_connect(self.username, self.password, self.hostname, self.port)
|
||||
|
||||
time.sleep(7)
|
||||
time.sleep(5)
|
||||
JOI = f'JOIN {self.channel_join}\r\n'
|
||||
JOIN = JOI.encode(encoding='UTF-8',errors='strict')
|
||||
PASS = f'PRIVMSG NickServ IDENTIFY {self.password}\r\n'
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
19/9/2025: added better parsing for terminus.i2p and rss feeds from gitea, separate "clean" channel (no torrents)
|
||||
18/09/2025: added 25 new rss feeds, removed some dead ones
|
||||
28/03/2025: added rss feeds from git.simp.i2p along with other new feeds, fixed another bug with html code showing up correctly, updated footer
|
||||
21/01/2025: Improvements in parsing urls and html tags
|
||||
08/12/2024: CSS improvements, thank you to dr|z3d, bug fixes
|
||||
30/11/2024: Live testing in IRC, added /feedlinks so alive feeds can be copied as a list
|
||||
|
|
|
@ -295,7 +295,7 @@ def format_msg(single_msg_raw, topic):
|
|||
post_url = f'http://{nick_raw}{l[i][2:]}'
|
||||
else:
|
||||
post_url = (single_msg.split('::')[1]).split(' @@@')[0]
|
||||
if get_host(post_url).casefold() == '127.0.0.1':
|
||||
if get_host(post_url) == '127.0.0.1':
|
||||
url_q = post_url.split(get_short_url(post_url))[1]
|
||||
post_url = f'http://{nick_raw}{url_q}'
|
||||
l[i] = ''
|
||||
|
|
54
rsslist.txt
54
rsslist.txt
|
@ -1,12 +1,10 @@
|
|||
http://simp.i2p/feed torrents
|
||||
http://righttoprivacy.i2p/rss/ blog
|
||||
http://angelwood.i2p/feed.rss blog
|
||||
http://gl6vzyjnv62kp3vjmmouwlvsrbp2bujk2cev6frgnln5vfepy2xq.b32.i2p/index.xml blog
|
||||
http://dujemihanovic.i2p/index.xml blog
|
||||
http://torrentfreak.i2p/feed/ torrents
|
||||
http://arstechnica.i2p/feed news
|
||||
http://simp.i2p/blog/feed blog
|
||||
http://ghativega.i2p/atom.xml blog
|
||||
http://cool-website.i2p/rss.xml blog
|
||||
http://jacksonchen666.i2p/feeds/ blog
|
||||
http://mdleom.i2p/atom.xml blog
|
||||
|
@ -19,9 +17,7 @@ http://ellipticnews.i2p/feeds/posts/default blog
|
|||
http://kuukkanen.i2p/feed.xml blog
|
||||
http://hackaday.i2p/feed news
|
||||
http://hongkongfreepress.i2p/ news
|
||||
http://git.idk.i2p/i2p-hackers/i2p.i2p/-/merge_requests.atom?state=opened developer
|
||||
http://zzz.i2p/topics.rss forum
|
||||
http://git.idk.i2p/i2p-hackers/i2p.i2p/-/issues.atom?state=opened developer
|
||||
http://stats.i2p/cgi-bin/newhosts.xml blog
|
||||
http://tracker2.postman.i2p/?view=RSS&mapset=-1 torrents
|
||||
http://hq.postman.i2p/?feed=atom blog
|
||||
|
@ -36,7 +32,6 @@ http://tails.i2p/news/index.en.atom blog
|
|||
http://discuss.i2p/app.php/feed/forums?sid=69da73f2a323f778252a08a4c93a887d forum
|
||||
http://sciencedaily.i2p/rss news
|
||||
http://schneieronsecurity.i2p/rss news
|
||||
http://orizuru.i2p/atom.xml news
|
||||
http://s-config.i2p/rss blog
|
||||
http://shreddit.i2p/r/i2p/.rss forum
|
||||
http://natter.i2p/StormyCloudInc/rss forum
|
||||
|
@ -51,27 +46,60 @@ http://techxplore.i2p/rss-feed/ news
|
|||
http://notbob.i2p/blog.xml blog
|
||||
http://theatlantic.i2p/feed/all/ news
|
||||
http://i2p-projekt.i2p/en/feed/blog/atom developer
|
||||
http://amnesie.i2p/rss.xml blog
|
||||
http://1337z.i2p/rss.xml blog
|
||||
http://git.skank.i2p/rez/plus.atom developer
|
||||
http://hackernews.i2p/rss news
|
||||
http://paltepuk.i2p/blog/index.i2p.xml blog
|
||||
http://deurachavich.i2p/rss.xml blog
|
||||
http://fury.i2p/feed.xml blog
|
||||
https://blog.everypizza.im/feed/feed.xml blog
|
||||
http://mdleom.i2p/atom.xml blog
|
||||
http://masflam.i2p/blog/feed.rss blog
|
||||
http://libresolutionsnetwork.i2p/rss blog
|
||||
http://jacksonchen666.i2p/posts/index.xml blog
|
||||
http://dankaminsky.i2p/feed blog
|
||||
http://darkrealm.i2p/index.php?act=rss blog
|
||||
http://franciscogg.i2p/rss.xml blog
|
||||
http://shadowforums.i2p/!feed forum
|
||||
http://darkrealm.i2p/index.php?act=rss blog
|
||||
http://kulervod.i2p/feeds/local.xml?sort=Active forum
|
||||
http://git.simp.i2p/simp/rayhunter.rss developer
|
||||
http://git.simp.i2p/simp/guessthesong.rss developer
|
||||
http://git.simp.i2p/fuzzykitten/dev_endboard.rss developer
|
||||
http://git.simp.i2p/simp/i2music.rss developer
|
||||
http://git.simp.i2p/simp/i2pnews.rss developer
|
||||
http://git.simp.i2p/simp/shorturl.rss developer
|
||||
http://taz.i2p/rss.xml blog
|
||||
http://forum.midgard.i2p/syndication.php forum
|
||||
http://git.simp.i2p/simp/TuckIt.rss developer
|
||||
http://terminus.i2p/rss forum
|
||||
http://git.idk.i2p/I2P_Developers/i2p.i2p.rss developer
|
||||
http://git.idk.i2p/idk/Go_I2p.rss developer
|
||||
http://git.idk.i2p/I2P_Developers/i2p.plugins.zzzot.rss developer
|
||||
http://git.idk.i2p/idk/I2PSnark-RPC.rss developer
|
||||
http://git.community.i2p/PurpleI2P/pyseeder.rss developer
|
||||
http://git.community.i2p/PurpleI2P/i2pd-tools.rss developer
|
||||
http://git.community.i2p/PurpleI2P/i2pd.rss developer
|
||||
http://gatheryourparty.i2p/rss.xml blog
|
||||
http://monkemanx.i2p/index.xml blog
|
||||
http://masflam.i2p/feed/ blog
|
||||
http://thricegreat.i2p/rss.xml blog
|
||||
http://taiwan.i2p/rss2.xml blog
|
||||
http://shittyweb.i2p/blog/feed.xml blog
|
||||
http://silosneeded.i2p/feed.xml blog
|
||||
http://pabloshell.i2p/rss.xml blog
|
||||
http://masflam.i2p/feed/?type=rss blog
|
||||
http://maidzone.i2p/blog.rss blog
|
||||
http://med0ed.i2p/rss.xml blog
|
||||
http://jerryhome.i2p/atom.xml blog
|
||||
http://lulu-cats.i2p/index.xml blog
|
||||
http://joshuatshaffer.i2p/index.xml blog
|
||||
http://jakob.i2p/feed.xml blog
|
||||
http://itphx.i2p/feed blog
|
||||
http://gettie.i2p/rss.xml blog
|
||||
http://darksavantcrusaders.i2p/feed.xml blog
|
||||
http://cxj.i2p/index.xml blog
|
||||
http://cosmicflow.i2p/rss/news.xml blog
|
||||
http://gedanken.i2p/feed.rss blog
|
||||
http://iranfreedom.i2p/en/feed/ news
|
||||
http://opendemocracy.i2p/feed news
|
||||
http://sur.i2p/feed news
|
||||
http://upstreamjournal.i2p/feed news
|
||||
http://clap.i2p/feed news
|
||||
http://git.simp.i2p/simp/Eepstore.rss developer
|
||||
http://git.simp.i2p/simp/emissary.rss developer
|
||||
http://git.simp.i2p/simp/mantaray.rss developer
|
||||
http://git.simp.i2p/simp/yosemite.rss developer
|
Loading…
Reference in New Issue