#!/usr/bin/env python3 import binascii, urllib, bencoding, asyncio from datetime import datetime from urllib import request from configload import list_ini, settings_path, scraper_hostname, scraper_http_proxy, base_url class Scrape(): def __init__(self, info_hash:str, tracker_url:str, hostname:str, port:str)->None: self.peer_dict = {} self.error = True base_tracker_url = base_url(tracker_url) url_base = f'http://{base_tracker_url}/scrape.php?info_hash' encoded_info_hash = binascii.a2b_hex(info_hash) params = {'': encoded_info_hash} safe_url = (url_base + urllib.parse.urlencode(params)) def get_decoded_dict(d): generated_dict = {} for k, v in d.items(): if type(k) == type(b''): try: k = k.decode('utf8') except: pass if type(v) == type({}): generated_dict[k] = get_decoded_dict(v) else: generated_dict[k] = v return generated_dict proxies = { 'http': f'http://{hostname}:{port}', } if '127.0.0.1' not in safe_url: proxy_support = request.ProxyHandler(proxies) opener = request.build_opener(proxy_support) request.install_opener(opener) req = request.Request(safe_url) try: handle = urllib.request.urlopen(req, timeout=6) except Exception as e: base_tracker_url = f'TIMEOUT {base_tracker_url}' # print(e) else: handle = urllib.request.urlopen(safe_url) timestamp_x = "{:%Y-%m-%d %H:%M:%S.%f}".format(datetime.now()) try: decoded = bencoding.bdecode(handle.read()) ret = {} decoded = get_decoded_dict(decoded) for hash, stats in decoded['files'].items(): nice_hash = binascii.b2a_hex(hash) vals_list = [] for key, value in stats.items(): vals_list.append(int(value)) vals_dict = { 'tracker': base_tracker_url, 'seeds': vals_list[0], 'leeches': vals_list[2], 'completed': vals_list[1], 'timestamp': timestamp_x, } self.error = False self.peer_dict[info_hash] = vals_dict except Exception as e: self.error = True vals_dict = { 'tracker': base_tracker_url, 'timestamp': timestamp_x, } self.peer_dict[info_hash] = vals_dict # print(e) async def scrape_trackers(t_dict:dict)->dict: print(f"scraping trackers...") async def run_scraper(infohash_to_scrape, t, hostname, port): try: s = Scrape(infohash_to_scrape, t, hostname, port) # _tracker = s.peer_dict[infohash_to_scrape]['tracker'] seeds = s.peer_dict[infohash_to_scrape]['seeds'] leeches = s.peer_dict[infohash_to_scrape]['leeches'] completed = s.peer_dict[infohash_to_scrape]['completed'] except Exception as e: # _tracker = t seeds, leeches, completed = 0, 0, 0 # print(f"{tracker_url} S/L (completed) {seeds} / {leeches} ({completed})") return seeds, leeches, completed tracker_list = list_ini(settings_path, 'trackers', 'tracker_list') for pl in t_dict: infohash_to_scrape = (t_dict[pl]['infohash']) hostname = '127.0.0.1' port = '4104' s_most, l_most, c_most = 0, 0, 0 tasks = [] for t in tracker_list: tasks.append(run_scraper(infohash_to_scrape=infohash_to_scrape, t=t, hostname=scraper_hostname, port=scraper_http_proxy)) tup_ = await asyncio.gather(*tasks, return_exceptions=True) s_most, l_most, c_most = 0, 0, 0 for u in tup_: if u[0] > s_most: s_most = u[0] if u[1] > l_most: l_most = u[1] if u[2] > c_most: c_most = u[2] t_dict[pl]['seeds'] = s_most t_dict[pl]['leeches'] = l_most t_dict[pl]['completed'] = c_most return t_dict