i2music/scrape.py

109 lines
3.7 KiB
Python

#!/usr/bin/env python3
import binascii, urllib, bencoding, asyncio
from datetime import datetime
from urllib import request
from configload import list_ini, settings_path, scraper_hostname, scraper_http_proxy, base_url
class Scrape():
def __init__(self, info_hash:str, tracker_url:str, hostname:str, port:str)->None:
self.peer_dict = {}
self.error = True
base_tracker_url = base_url(tracker_url)
url_base = f'http://{base_tracker_url}/scrape.php?info_hash'
encoded_info_hash = binascii.a2b_hex(info_hash)
params = {'': encoded_info_hash}
safe_url = (url_base + urllib.parse.urlencode(params))
def get_decoded_dict(d):
generated_dict = {}
for k, v in d.items():
if type(k) == type(b''):
try:
k = k.decode('utf8')
except:
pass
if type(v) == type({}):
generated_dict[k] = get_decoded_dict(v)
else:
generated_dict[k] = v
return generated_dict
proxies = {
'http': f'http://{hostname}:{port}',
}
if '127.0.0.1' not in safe_url:
proxy_support = request.ProxyHandler(proxies)
opener = request.build_opener(proxy_support)
request.install_opener(opener)
req = request.Request(safe_url)
try:
handle = urllib.request.urlopen(req, timeout=6)
except Exception as e:
base_tracker_url = f'TIMEOUT {base_tracker_url}'
# print(e)
else:
handle = urllib.request.urlopen(safe_url)
timestamp_x = "{:%Y-%m-%d %H:%M:%S.%f}".format(datetime.now())
try:
decoded = bencoding.bdecode(handle.read())
ret = {}
decoded = get_decoded_dict(decoded)
for hash, stats in decoded['files'].items():
nice_hash = binascii.b2a_hex(hash)
vals_list = []
for key, value in stats.items():
vals_list.append(int(value))
vals_dict = {
'tracker': base_tracker_url,
'seeds': vals_list[0],
'leeches': vals_list[2],
'completed': vals_list[1],
'timestamp': timestamp_x,
}
self.error = False
self.peer_dict[info_hash] = vals_dict
except Exception as e:
self.error = True
vals_dict = {
'tracker': base_tracker_url,
'timestamp': timestamp_x,
}
self.peer_dict[info_hash] = vals_dict
# print(e)
async def scrape_trackers(t_dict:dict)->dict:
print(f"scraping trackers...")
async def run_scraper(infohash_to_scrape, t, hostname, port):
try:
s = Scrape(infohash_to_scrape, t, hostname, port)
# _tracker = s.peer_dict[infohash_to_scrape]['tracker']
seeds = s.peer_dict[infohash_to_scrape]['seeds']
leeches = s.peer_dict[infohash_to_scrape]['leeches']
completed = s.peer_dict[infohash_to_scrape]['completed']
except Exception as e:
# _tracker = t
seeds, leeches, completed = 0, 0, 0
# print(f"{tracker_url} S/L (completed) {seeds} / {leeches} ({completed})")
return seeds, leeches, completed
tracker_list = list_ini(settings_path, 'trackers', 'tracker_list')
for pl in t_dict:
infohash_to_scrape = (t_dict[pl]['infohash'])
hostname = '127.0.0.1'
port = '4104'
s_most, l_most, c_most = 0, 0, 0
tasks = []
for t in tracker_list:
tasks.append(run_scraper(infohash_to_scrape=infohash_to_scrape, t=t, hostname=scraper_hostname, port=scraper_http_proxy))
tup_ = await asyncio.gather(*tasks, return_exceptions=True)
s_most, l_most, c_most = 0, 0, 0
for u in tup_:
if u[0] > s_most:
s_most = u[0]
if u[1] > l_most:
l_most = u[1]
if u[2] > c_most:
c_most = u[2]
t_dict[pl]['seeds'] = s_most
t_dict[pl]['leeches'] = l_most
t_dict[pl]['completed'] = c_most
return t_dict