mirror of http://git.simp.i2p/simp/i2music.git
109 lines
3.7 KiB
Python
109 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
import binascii, urllib, bencoding, asyncio
|
|
from datetime import datetime
|
|
from urllib import request
|
|
from configload import list_ini, settings_path, scraper_hostname, scraper_http_proxy, base_url
|
|
|
|
class Scrape():
|
|
def __init__(self, info_hash:str, tracker_url:str, hostname:str, port:str)->None:
|
|
self.peer_dict = {}
|
|
self.error = True
|
|
base_tracker_url = base_url(tracker_url)
|
|
url_base = f'http://{base_tracker_url}/scrape.php?info_hash'
|
|
encoded_info_hash = binascii.a2b_hex(info_hash)
|
|
params = {'': encoded_info_hash}
|
|
safe_url = (url_base + urllib.parse.urlencode(params))
|
|
def get_decoded_dict(d):
|
|
generated_dict = {}
|
|
for k, v in d.items():
|
|
if type(k) == type(b''):
|
|
try:
|
|
k = k.decode('utf8')
|
|
except:
|
|
pass
|
|
if type(v) == type({}):
|
|
generated_dict[k] = get_decoded_dict(v)
|
|
else:
|
|
generated_dict[k] = v
|
|
return generated_dict
|
|
|
|
proxies = {
|
|
'http': f'http://{hostname}:{port}',
|
|
}
|
|
if '127.0.0.1' not in safe_url:
|
|
proxy_support = request.ProxyHandler(proxies)
|
|
opener = request.build_opener(proxy_support)
|
|
request.install_opener(opener)
|
|
req = request.Request(safe_url)
|
|
try:
|
|
handle = urllib.request.urlopen(req, timeout=6)
|
|
except Exception as e:
|
|
base_tracker_url = f'TIMEOUT {base_tracker_url}'
|
|
# print(e)
|
|
else:
|
|
handle = urllib.request.urlopen(safe_url)
|
|
timestamp_x = "{:%Y-%m-%d %H:%M:%S.%f}".format(datetime.now())
|
|
try:
|
|
decoded = bencoding.bdecode(handle.read())
|
|
ret = {}
|
|
decoded = get_decoded_dict(decoded)
|
|
for hash, stats in decoded['files'].items():
|
|
nice_hash = binascii.b2a_hex(hash)
|
|
vals_list = []
|
|
for key, value in stats.items():
|
|
vals_list.append(int(value))
|
|
vals_dict = {
|
|
'tracker': base_tracker_url,
|
|
'seeds': vals_list[0],
|
|
'leeches': vals_list[2],
|
|
'completed': vals_list[1],
|
|
'timestamp': timestamp_x,
|
|
}
|
|
self.error = False
|
|
self.peer_dict[info_hash] = vals_dict
|
|
except Exception as e:
|
|
self.error = True
|
|
vals_dict = {
|
|
'tracker': base_tracker_url,
|
|
|
|
'timestamp': timestamp_x,
|
|
}
|
|
self.peer_dict[info_hash] = vals_dict
|
|
# print(e)
|
|
|
|
async def scrape_trackers(t_dict:dict)->dict:
|
|
print(f"scraping trackers...")
|
|
async def run_scraper(infohash_to_scrape, t, hostname, port):
|
|
try:
|
|
s = Scrape(infohash_to_scrape, t, hostname, port)
|
|
# _tracker = s.peer_dict[infohash_to_scrape]['tracker']
|
|
seeds = s.peer_dict[infohash_to_scrape]['seeds']
|
|
leeches = s.peer_dict[infohash_to_scrape]['leeches']
|
|
completed = s.peer_dict[infohash_to_scrape]['completed']
|
|
except Exception as e:
|
|
# _tracker = t
|
|
seeds, leeches, completed = 0, 0, 0
|
|
# print(f"{tracker_url} S/L (completed) {seeds} / {leeches} ({completed})")
|
|
return seeds, leeches, completed
|
|
tracker_list = list_ini(settings_path, 'trackers', 'tracker_list')
|
|
for pl in t_dict:
|
|
infohash_to_scrape = (t_dict[pl]['infohash'])
|
|
hostname = '127.0.0.1'
|
|
port = '4104'
|
|
s_most, l_most, c_most = 0, 0, 0
|
|
tasks = []
|
|
for t in tracker_list:
|
|
tasks.append(run_scraper(infohash_to_scrape=infohash_to_scrape, t=t, hostname=scraper_hostname, port=scraper_http_proxy))
|
|
tup_ = await asyncio.gather(*tasks, return_exceptions=True)
|
|
s_most, l_most, c_most = 0, 0, 0
|
|
for u in tup_:
|
|
if u[0] > s_most:
|
|
s_most = u[0]
|
|
if u[1] > l_most:
|
|
l_most = u[1]
|
|
if u[2] > c_most:
|
|
c_most = u[2]
|
|
t_dict[pl]['seeds'] = s_most
|
|
t_dict[pl]['leeches'] = l_most
|
|
t_dict[pl]['completed'] = c_most
|
|
return t_dict |