PyLink/pattern_testing_tools.sh

303 lines
7.6 KiB
Bash

#!/bin/bash
# Regex Pattern Testing Tools
# Author: Anon
# License: BSD 2-Clause
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEST_DIR="${SCRIPT_DIR}/pattern_tests"
BLACKLIST_FILE="${SCRIPT_DIR}/data/regex_blacklist.txt"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
create_test_environment() {
echo "Creating secure pattern testing environment..."
mkdir -p "${TEST_DIR}"
mkdir -p "$(dirname "${BLACKLIST_FILE}")"
# Create test samples directory (for safe test content only)
mkdir -p "${TEST_DIR}/safe_samples"
mkdir -p "${TEST_DIR}/results"
# Create pattern validation script
cat > "${TEST_DIR}/validate_pattern.pl" << 'EOF'
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
# Validate a single regex pattern
my $pattern = $ARGV[0] || die "Usage: $0 'pattern'\n";
# Test if pattern compiles
eval { qr/$pattern/i };
if ($@) {
print "INVALID: $@\n";
exit 1;
}
print "VALID: Pattern compiles successfully\n";
# Basic safety checks
if (length($pattern) < 3) {
print "WARNING: Pattern very short, may cause false positives\n";
}
if ($pattern =~ /\.\*.*\.\*/) {
print "WARNING: Multiple .* may cause performance issues\n";
}
if ($pattern =~ /^\.\*/ || $pattern =~ /\.\*$/) {
print "WARNING: Leading/trailing .* may be overly broad\n";
}
exit 0;
EOF
chmod +x "${TEST_DIR}/validate_pattern.pl"
# Create batch testing script
cat > "${TEST_DIR}/test_patterns.pl" << 'EOF'
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
my $blacklist_file = $ARGV[0] || die "Usage: $0 blacklist_file [test_content_file]\n";
my $test_file = $ARGV[1];
# Load patterns
open my $fh, '<:utf8', $blacklist_file or die "Cannot open $blacklist_file: $!\n";
my @patterns;
my $line_num = 0;
while (my $line = <$fh>) {
$line_num++;
chomp $line;
$line =~ s/^\s+|\s+$//g;
next if $line eq '' || $line =~ /^#/;
# Validate pattern
eval { qr/$line/i };
if ($@) {
print "ERROR: Line $line_num - Invalid pattern: $line\n";
print " $@\n";
next;
}
push @patterns, { pattern => $line, line => $line_num };
}
close $fh;
print "Loaded " . @patterns . " valid patterns\n\n";
# Test against content if provided
if ($test_file && -f $test_file) {
open my $test_fh, '<:utf8', $test_file or die "Cannot open $test_file: $!\n";
while (my $content = <$test_fh>) {
chomp $content;
next if $content eq '';
my $matched = 0;
for my $p (@patterns) {
if ($content =~ /$p->{pattern}/i) {
print "MATCH: Line $p->{line} - '$p->{pattern}'\n";
print " Content: " . substr($content, 0, 50) . "...\n\n";
$matched = 1;
last;
}
}
unless ($matched) {
print "NO MATCH: " . substr($content, 0, 50) . "...\n\n";
}
}
close $test_fh;
}
EOF
chmod +x "${TEST_DIR}/test_patterns.pl"
echo -e "${GREEN}Testing environment created in ${TEST_DIR}${NC}"
}
validate_single_pattern() {
local pattern="$1"
if [[ -z "$pattern" ]]; then
echo -e "${RED}Error: No pattern provided${NC}"
return 1
fi
echo "Validating pattern: $pattern"
perl "${TEST_DIR}/validate_pattern.pl" "$pattern"
}
test_blacklist_file() {
local file="${1:-$BLACKLIST_FILE}"
if [[ ! -f "$file" ]]; then
echo -e "${RED}Error: Blacklist file not found: $file${NC}"
return 1
fi
echo "Testing patterns in: $file"
perl "${TEST_DIR}/test_patterns.pl" "$file"
}
test_against_samples() {
local file="${1:-$BLACKLIST_FILE}"
local samples="${2:-${TEST_DIR}/safe_samples/test_content.txt}"
if [[ ! -f "$file" ]]; then
echo -e "${RED}Error: Blacklist file not found: $file${NC}"
return 1
fi
if [[ ! -f "$samples" ]]; then
echo -e "${YELLOW}Warning: Sample file not found: $samples${NC}"
echo "Create test samples in: ${TEST_DIR}/safe_samples/"
return 1
fi
echo "Testing patterns against samples..."
perl "${TEST_DIR}/test_patterns.pl" "$file" "$samples"
}
benchmark_patterns() {
local file="${1:-$BLACKLIST_FILE}"
if [[ ! -f "$file" ]]; then
echo -e "${RED}Error: Blacklist file not found: $file${NC}"
return 1
fi
echo "Benchmarking pattern performance..."
# Create benchmark script
cat > "${TEST_DIR}/benchmark.pl" << 'EOF'
#!/usr/bin/env perl
use strict;
use warnings;
use Time::HiRes qw(time);
my $blacklist_file = $ARGV[0] || die "Usage: $0 blacklist_file\n";
# Load patterns
open my $fh, '<:utf8', $blacklist_file or die "Cannot open $blacklist_file: $!\n";
my @patterns;
while (my $line = <$fh>) {
chomp $line;
$line =~ s/^\s+|\s+$//g;
next if $line eq '' || $line =~ /^#/;
eval { qr/$line/i };
next if $@;
push @patterns, $line;
}
close $fh;
# Test performance with sample content
my $test_content = "This is a sample message with various words and https://example.com/test links";
my $iterations = 1000;
print "Testing " . @patterns . " patterns over $iterations iterations...\n";
my $start_time = time();
for my $i (1..$iterations) {
for my $pattern (@patterns) {
$test_content =~ /$pattern/i;
}
}
my $end_time = time();
my $total_time = $end_time - $start_time;
my $avg_time = $total_time / $iterations;
printf "Total time: %.4f seconds\n", $total_time;
printf "Average time per message: %.6f seconds\n", $avg_time;
printf "Messages per second: %.0f\n", 1 / $avg_time if $avg_time > 0;
EOF
perl "${TEST_DIR}/benchmark.pl" "$file"
}
create_safe_samples() {
local samples_file="${TEST_DIR}/safe_samples/test_content.txt"
echo "Creating safe test samples..."
cat > "$samples_file" << 'EOF'
# Safe test samples for pattern validation
# Add your own safe test content here
Normal message with no issues
Check out this link: https://legitimate-site.com
User sharing a photo: here's my vacation pic!
Someone posting a normal URL: visit https://example.com/page
Regular conversation about everyday topics
Message with common terms that should not be blocked
EOF
echo -e "${GREEN}Created safe samples in: $samples_file${NC}"
echo "Edit this file to add your own safe test content"
}
show_usage() {
echo "Regex Pattern Testing Tools"
echo ""
echo "Usage: $0 <command> [options]"
echo ""
echo "Commands:"
echo " setup - Create testing environment"
echo " validate 'pattern' - Validate a single regex pattern"
echo " test [blacklist_file] - Test all patterns in blacklist file"
echo " samples - Test patterns against safe sample content"
echo " benchmark [file] - Benchmark pattern performance"
echo " create-samples - Create template for safe test samples"
echo ""
echo "Examples:"
echo " $0 setup"
echo " $0 validate '\\bhttps?://bit\\.ly/\\w+'"
echo " $0 test ./data/regex_blacklist.txt"
echo " $0 benchmark"
}
# Main script logic
case "${1:-}" in
"setup")
create_test_environment
create_safe_samples
;;
"validate")
validate_single_pattern "${2:-}"
;;
"test")
test_blacklist_file "${2:-}"
;;
"samples")
test_against_samples "${2:-}" "${3:-}"
;;
"benchmark")
benchmark_patterns "${2:-}"
;;
"create-samples")
create_safe_samples
;;
*)
show_usage
;;
esac