mirror of http://git.simp.i2p/simp/PyLink.git
408 lines
13 KiB
Python
Executable File
408 lines
13 KiB
Python
Executable File
"""
|
|
Regex Filter Plugin for PyLink
|
|
Comprehensive message filtering with PCRE2 and flood protection
|
|
"""
|
|
import os
|
|
import time
|
|
import hashlib
|
|
import threading
|
|
import pcre2
|
|
from collections import defaultdict, deque
|
|
from pathlib import Path
|
|
|
|
from pylinkirc import utils, conf, world
|
|
from pylinkirc.classes import *
|
|
from pylinkirc.log import log
|
|
|
|
# Plugin metadata
|
|
__version__ = "1.0.0"
|
|
|
|
# Global variables for thread-safe operation
|
|
flood_tracker = defaultdict(lambda: deque(maxlen=50))
|
|
similarity_cache = defaultdict(lambda: deque(maxlen=20))
|
|
file_watchers = {}
|
|
last_reload = 0
|
|
filter_lock = threading.RLock()
|
|
|
|
class Cr():
|
|
pattern = []
|
|
|
|
def _get_config():
|
|
"""Get plugin configuration with defaults"""
|
|
config = conf.conf.get('regex_filter', {})
|
|
|
|
defaults = {
|
|
'blacklist_file': './data/regex_blacklist.txt',
|
|
'log_file': './logs/regex_filter.log',
|
|
'flood_window': 30, # seconds
|
|
'flood_threshold': 3, # messages
|
|
'similarity_threshold': 0.8, # 80% similar
|
|
'enabled': True,
|
|
'log_blocked': True,
|
|
'auto_reload': True,
|
|
'debug': False
|
|
}
|
|
|
|
# Merge with defaults
|
|
for key, value in defaults.items():
|
|
config.setdefault(key, value)
|
|
return config
|
|
|
|
def _ensure_directories():
|
|
"""Create necessary directories and files"""
|
|
config = _get_config()
|
|
|
|
try:
|
|
# Create directories
|
|
for path_key in ['blacklist_file', 'log_file']:
|
|
file_path = Path(config[path_key])
|
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create default blacklist file if missing
|
|
blacklist_path = Path(config['blacklist_file'])
|
|
if not blacklist_path.exists():
|
|
with open(blacklist_path, 'w') as f:
|
|
f.write("# Regex Blacklist Patterns - One regex per line\n")
|
|
f.write("# Test all patterns thoroughly before deploying\n")
|
|
f.write("# Examples (commented out by default):\n")
|
|
f.write("# \\bhttps?://(?:bit\\.ly|tinyurl\\.com|t\\.co)/\\w+\\b\n")
|
|
f.write("# \\b(?:spam|phishing|malware)\\b\n")
|
|
f.write("\n")
|
|
log.info(f"Created default blacklist file: {blacklist_path}")
|
|
|
|
|
|
except Exception as e:
|
|
log.error(f"Failed to create directories: {e}")
|
|
|
|
def _compile_regex_filters():
|
|
"""Read blacklist_file and compile the patterns"""
|
|
config = _get_config()
|
|
blacklist_file = config['blacklist_file']
|
|
if os.path.isfile(blacklist_file):
|
|
lines_list = []
|
|
with open(blacklist_file, 'r') as file:
|
|
for _, line in enumerate(file, start=1):
|
|
rm_line_break = line.replace('\n', '')
|
|
if rm_line_break[:1] != '#' and len(rm_line_break) != 0:
|
|
lines_list.append(rm_line_break)
|
|
if len(Cr.pattern) > 0:
|
|
Cr.pattern.clear()
|
|
for r in lines_list:
|
|
try:
|
|
Cr.pattern.append(pcre2.compile(r, jit=True))
|
|
except Exception as e:
|
|
log.error(f"Failed compiling pattern: {e}")
|
|
else:
|
|
log.error(f"{config['blacklist_file']} not found.")
|
|
|
|
def _log_filter_event(event_type, source, target, reason, network_name="unknown"):
|
|
"""Log filtering events"""
|
|
config = _get_config()
|
|
|
|
if not config.get('log_blocked', True):
|
|
return
|
|
|
|
try:
|
|
log_path = Path(config['log_file'])
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
timestamp = time.strftime('%Y-%m-%d %H:%M:%S')
|
|
# Don't log actual content for security reasons
|
|
log_entry = f"[{timestamp}] {network_name} {event_type}: {source} -> {target} | {reason}\\n"
|
|
|
|
with open(log_path, 'a') as f:
|
|
f.write(log_entry)
|
|
|
|
except Exception as e:
|
|
log.error(f"Failed to write filter log: {e}")
|
|
|
|
def _check_file_modified(file_path):
|
|
"""Check if file has been modified since last check"""
|
|
global last_reload
|
|
|
|
try:
|
|
mtime = os.path.getmtime(file_path)
|
|
if mtime > last_reload:
|
|
last_reload = time.time()
|
|
_compile_regex_filters()
|
|
return True
|
|
except OSError:
|
|
pass
|
|
|
|
return False
|
|
|
|
def _get_message_hash(content):
|
|
"""Get hash of message content for similarity detection"""
|
|
# Normalize content for similarity checking
|
|
normalized = ''.join(content.lower().split())
|
|
return hashlib.md5(normalized.encode('utf-8')).hexdigest()
|
|
|
|
def _calculate_similarity(hash1, hash2):
|
|
"""Calculate similarity between two message hashes"""
|
|
if len(hash1) != len(hash2):
|
|
return 0.0
|
|
|
|
matches = sum(c1 == c2 for c1, c2 in zip(hash1, hash2))
|
|
return matches / len(hash1)
|
|
|
|
def _check_flood_protection(source, content):
|
|
"""Check for flood/spam patterns"""
|
|
config = _get_config()
|
|
current_time = time.time()
|
|
window = config.get('flood_window', 30)
|
|
threshold = config.get('flood_threshold', 3)
|
|
similarity_threshold = config.get('similarity_threshold', 0.8)
|
|
|
|
with filter_lock:
|
|
# Clean old entries
|
|
cutoff_time = current_time - window
|
|
flood_tracker[source] = deque(
|
|
[entry for entry in flood_tracker[source] if entry[0] > cutoff_time],
|
|
maxlen=50
|
|
)
|
|
|
|
# Add current message
|
|
msg_hash = _get_message_hash(content)
|
|
flood_tracker[source].append((current_time, msg_hash))
|
|
|
|
# Check for flood
|
|
recent_messages = flood_tracker[source]
|
|
if len(recent_messages) >= threshold:
|
|
# Check for similar messages
|
|
similar_count = 0
|
|
for _, old_hash in list(recent_messages)[-threshold:]:
|
|
if _calculate_similarity(msg_hash, old_hash) >= similarity_threshold:
|
|
similar_count += 1
|
|
|
|
if similar_count >= threshold:
|
|
return True, "Flood/spam pattern detected"
|
|
|
|
return False, ""
|
|
|
|
def _run_regex(content):
|
|
"""Filter content using jit compiled pattern"""
|
|
for p in Cr.pattern:
|
|
try:
|
|
matched = p.match(content)
|
|
if matched:
|
|
return matched, "Content blocked by regex filter"
|
|
except Exception as e:
|
|
log.error(f"Filter error: {e}")
|
|
return False, ''
|
|
|
|
def _should_filter_content(source, target, content, network_name="unknown"):
|
|
"""Main filtering logic"""
|
|
config = _get_config()
|
|
|
|
if not config.get('enabled', True):
|
|
return False, ""
|
|
|
|
if not content or content.strip() == "":
|
|
return False, ""
|
|
|
|
# Auto-reload check
|
|
if config.get('auto_reload', True):
|
|
if _check_file_modified(config['blacklist_file']):
|
|
log.info("Blacklist file updated, patterns reloaded")
|
|
|
|
# Check flood protection first
|
|
is_flood, flood_reason = _check_flood_protection(source, content)
|
|
if is_flood:
|
|
return True, flood_reason
|
|
|
|
# Check content with filter
|
|
is_blocked, block_reason = _run_regex(content)
|
|
if is_blocked:
|
|
return True, block_reason
|
|
|
|
return False, ""
|
|
|
|
def handle_privmsg(irc, source, command, args):
|
|
"""Handle PRIVMSG events"""
|
|
if len(args) < 2:
|
|
return
|
|
|
|
target = args['target']
|
|
content = args['text']
|
|
|
|
source_id = source
|
|
# Check if content should be filtered
|
|
should_block, reason = _should_filter_content(source_id, target, content, irc.name)
|
|
|
|
if should_block:
|
|
# Log the filtering event
|
|
_log_filter_event("PRIVMSG", source_id, target, reason, irc.name)
|
|
|
|
# Log to PyLink console
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked PRIVMSG from {source_id} to {target} - {reason}")
|
|
|
|
# Block the message by modifying args to empty content
|
|
# This prevents relay while maintaining protocol compliance
|
|
args['text'] = ""
|
|
return
|
|
|
|
def handle_notice(irc, source, command, args):
|
|
"""Handle NOTICE events"""
|
|
if len(args) < 2:
|
|
return
|
|
|
|
target = args['target']
|
|
content = args['text']
|
|
source_id = source
|
|
|
|
should_block, reason = _should_filter_content(source_id, target, content, irc.name)
|
|
|
|
if should_block:
|
|
_log_filter_event("NOTICE", source_id, target, reason, irc.name)
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked NOTICE from {source_id} to {target} - {reason}")
|
|
args['text'] = ""
|
|
return
|
|
|
|
def handle_part(irc, source, command, args):
|
|
"""Handle PART events"""
|
|
if len(args) < 2:
|
|
return
|
|
|
|
channel_list = args['channels']
|
|
reason = args['text']
|
|
source_id = source
|
|
for channel in channel_list:
|
|
should_block, block_reason = _should_filter_content(source_id, channel, reason, irc.name)
|
|
|
|
if should_block:
|
|
_log_filter_event("PART", source_id, channel, block_reason, irc.name)
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked PART from {source_id} in {channel} - {block_reason}")
|
|
args['text'] = ""
|
|
return
|
|
|
|
def handle_quit(irc, source, command, args):
|
|
"""Handle QUIT events"""
|
|
if len(args) < 1:
|
|
return
|
|
|
|
reason = args['text']
|
|
source_id = source
|
|
should_block, block_reason = _should_filter_content(source_id, "QUIT", reason, irc.name)
|
|
|
|
if should_block:
|
|
_log_filter_event("QUIT", source_id, "QUIT", block_reason, irc.name)
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked QUIT from {source_id} - {block_reason}")
|
|
args['text'] = ""
|
|
return
|
|
|
|
def handle_topic(irc, source, command, args):
|
|
"""Handle TOPIC events"""
|
|
if len(args) < 2:
|
|
return
|
|
|
|
channel = args['channel']
|
|
topic = args['text']
|
|
source_id = source
|
|
should_block, reason = _should_filter_content(source_id, channel, topic, irc.name)
|
|
|
|
if should_block:
|
|
_log_filter_event("TOPIC", source_id, channel, reason, irc.name)
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked TOPIC from {source_id} in {channel} - {reason}")
|
|
args['topic'] = ""
|
|
return
|
|
|
|
def handle_kick(irc, source, command, args):
|
|
"""Handle KICK events"""
|
|
if len(args) < 3:
|
|
return
|
|
|
|
channel = args['channel']
|
|
kicked_user = args['target']
|
|
reason = args['text']
|
|
source_id = source
|
|
target = f"{channel}:{kicked_user}"
|
|
|
|
should_block, block_reason = _should_filter_content(source_id, target, reason, irc.name)
|
|
|
|
if should_block:
|
|
_log_filter_event("KICK", source_id, target, block_reason, irc.name)
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked KICK from {source_id} in {channel} - {block_reason}")
|
|
args['text'] = ""
|
|
return
|
|
|
|
def handle_nick(irc, source, command, args):
|
|
"""Handle NICK events"""
|
|
if len(args) < 1:
|
|
return
|
|
|
|
new_nick = args['newnick']
|
|
source_id = source
|
|
|
|
should_block, reason = _should_filter_content(source_id, "NICK", new_nick, irc.name)
|
|
|
|
if should_block:
|
|
_log_filter_event("NICK", source_id, "NICK", reason, irc.name)
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked NICK change from {source_id} - {reason}")
|
|
args['newnick'] = f"Filtered{int(time.time())}" # Replace with safe nick
|
|
return
|
|
|
|
def handle_join(irc, source, command, args):
|
|
"""Handle JOIN events"""
|
|
if len(args) < 1:
|
|
return
|
|
|
|
channel = args['channel']
|
|
source_id = source
|
|
should_block, reason = _should_filter_content(source_id, "JOIN", channel, irc.name)
|
|
|
|
if should_block:
|
|
_log_filter_event("JOIN", source_id, channel, reason, irc.name)
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked JOIN from {source_id} to {channel} - {reason}")
|
|
# For JOIN, we can't easily modify the channel name without breaking protocol
|
|
# Instead, log it for monitoring
|
|
return
|
|
|
|
def handle_away(irc, source, command, args):
|
|
"""Handle AWAY events"""
|
|
if len(args) < 1:
|
|
return
|
|
|
|
reason = args['text']
|
|
source_id = source
|
|
should_block, block_reason = _should_filter_content(source_id, "AWAY", reason, irc.name)
|
|
|
|
if should_block:
|
|
_log_filter_event("AWAY", source_id, "AWAY", block_reason, irc.name)
|
|
log.warning(f"Regex Filter ({irc.name}): Blocked AWAY from {source_id} - {block_reason}")
|
|
args['text'] = ""
|
|
return
|
|
|
|
def main(irc=None):
|
|
"""Plugin initialization"""
|
|
try:
|
|
_ensure_directories()
|
|
_compile_regex_filters()
|
|
# Register hooks for all message types
|
|
utils.add_hook(handle_privmsg, 'PRIVMSG', priority=1050)
|
|
utils.add_hook(handle_notice, 'NOTICE', priority=1050)
|
|
utils.add_hook(handle_part, 'PART', priority=1050)
|
|
utils.add_hook(handle_quit, 'QUIT', priority=1050)
|
|
utils.add_hook(handle_topic, 'TOPIC', priority=1050)
|
|
utils.add_hook(handle_kick, 'KICK', priority=1050)
|
|
utils.add_hook(handle_nick, 'NICK', priority=1050)
|
|
utils.add_hook(handle_join, 'JOIN', priority=1050)
|
|
utils.add_hook(handle_away, 'AWAY', priority=1050)
|
|
|
|
log.info("Regex Filter plugin loaded successfully")
|
|
|
|
except Exception as e:
|
|
log.error(f"Failed to initialize Regex Filter plugin: {e}")
|
|
|
|
def die(irc=None):
|
|
"""Plugin cleanup"""
|
|
try:
|
|
# Clear flood tracking data
|
|
global flood_tracker, similarity_cache
|
|
flood_tracker.clear()
|
|
similarity_cache.clear()
|
|
|
|
log.info("Regex Filter plugin unloaded")
|
|
|
|
except Exception as e:
|
|
log.error(f"Error during Regex Filter plugin cleanup: {e}")
|