mirror of http://git.simp.i2p/simp/PyLink.git
303 lines
7.6 KiB
Bash
303 lines
7.6 KiB
Bash
#!/bin/bash
|
|
# Regex Pattern Testing Tools
|
|
# Author: Anon
|
|
# License: BSD 2-Clause
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
TEST_DIR="${SCRIPT_DIR}/pattern_tests"
|
|
BLACKLIST_FILE="${SCRIPT_DIR}/data/regex_blacklist.txt"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
create_test_environment() {
|
|
echo "Creating secure pattern testing environment..."
|
|
|
|
mkdir -p "${TEST_DIR}"
|
|
mkdir -p "$(dirname "${BLACKLIST_FILE}")"
|
|
|
|
# Create test samples directory (for safe test content only)
|
|
mkdir -p "${TEST_DIR}/safe_samples"
|
|
mkdir -p "${TEST_DIR}/results"
|
|
|
|
# Create pattern validation script
|
|
cat > "${TEST_DIR}/validate_pattern.pl" << 'EOF'
|
|
#!/usr/bin/env perl
|
|
use strict;
|
|
use warnings;
|
|
use utf8;
|
|
|
|
# Validate a single regex pattern
|
|
my $pattern = $ARGV[0] || die "Usage: $0 'pattern'\n";
|
|
|
|
# Test if pattern compiles
|
|
eval { qr/$pattern/i };
|
|
if ($@) {
|
|
print "INVALID: $@\n";
|
|
exit 1;
|
|
}
|
|
|
|
print "VALID: Pattern compiles successfully\n";
|
|
|
|
# Basic safety checks
|
|
if (length($pattern) < 3) {
|
|
print "WARNING: Pattern very short, may cause false positives\n";
|
|
}
|
|
|
|
if ($pattern =~ /\.\*.*\.\*/) {
|
|
print "WARNING: Multiple .* may cause performance issues\n";
|
|
}
|
|
|
|
if ($pattern =~ /^\.\*/ || $pattern =~ /\.\*$/) {
|
|
print "WARNING: Leading/trailing .* may be overly broad\n";
|
|
}
|
|
|
|
exit 0;
|
|
EOF
|
|
|
|
chmod +x "${TEST_DIR}/validate_pattern.pl"
|
|
|
|
# Create batch testing script
|
|
cat > "${TEST_DIR}/test_patterns.pl" << 'EOF'
|
|
#!/usr/bin/env perl
|
|
use strict;
|
|
use warnings;
|
|
use utf8;
|
|
|
|
my $blacklist_file = $ARGV[0] || die "Usage: $0 blacklist_file [test_content_file]\n";
|
|
my $test_file = $ARGV[1];
|
|
|
|
# Load patterns
|
|
open my $fh, '<:utf8', $blacklist_file or die "Cannot open $blacklist_file: $!\n";
|
|
my @patterns;
|
|
my $line_num = 0;
|
|
|
|
while (my $line = <$fh>) {
|
|
$line_num++;
|
|
chomp $line;
|
|
$line =~ s/^\s+|\s+$//g;
|
|
|
|
next if $line eq '' || $line =~ /^#/;
|
|
|
|
# Validate pattern
|
|
eval { qr/$line/i };
|
|
if ($@) {
|
|
print "ERROR: Line $line_num - Invalid pattern: $line\n";
|
|
print " $@\n";
|
|
next;
|
|
}
|
|
|
|
push @patterns, { pattern => $line, line => $line_num };
|
|
}
|
|
close $fh;
|
|
|
|
print "Loaded " . @patterns . " valid patterns\n\n";
|
|
|
|
# Test against content if provided
|
|
if ($test_file && -f $test_file) {
|
|
open my $test_fh, '<:utf8', $test_file or die "Cannot open $test_file: $!\n";
|
|
|
|
while (my $content = <$test_fh>) {
|
|
chomp $content;
|
|
next if $content eq '';
|
|
|
|
my $matched = 0;
|
|
for my $p (@patterns) {
|
|
if ($content =~ /$p->{pattern}/i) {
|
|
print "MATCH: Line $p->{line} - '$p->{pattern}'\n";
|
|
print " Content: " . substr($content, 0, 50) . "...\n\n";
|
|
$matched = 1;
|
|
last;
|
|
}
|
|
}
|
|
|
|
unless ($matched) {
|
|
print "NO MATCH: " . substr($content, 0, 50) . "...\n\n";
|
|
}
|
|
}
|
|
|
|
close $test_fh;
|
|
}
|
|
EOF
|
|
|
|
chmod +x "${TEST_DIR}/test_patterns.pl"
|
|
|
|
echo -e "${GREEN}Testing environment created in ${TEST_DIR}${NC}"
|
|
}
|
|
|
|
validate_single_pattern() {
|
|
local pattern="$1"
|
|
|
|
if [[ -z "$pattern" ]]; then
|
|
echo -e "${RED}Error: No pattern provided${NC}"
|
|
return 1
|
|
fi
|
|
|
|
echo "Validating pattern: $pattern"
|
|
perl "${TEST_DIR}/validate_pattern.pl" "$pattern"
|
|
}
|
|
|
|
test_blacklist_file() {
|
|
local file="${1:-$BLACKLIST_FILE}"
|
|
|
|
if [[ ! -f "$file" ]]; then
|
|
echo -e "${RED}Error: Blacklist file not found: $file${NC}"
|
|
return 1
|
|
fi
|
|
|
|
echo "Testing patterns in: $file"
|
|
perl "${TEST_DIR}/test_patterns.pl" "$file"
|
|
}
|
|
|
|
test_against_samples() {
|
|
local file="${1:-$BLACKLIST_FILE}"
|
|
local samples="${2:-${TEST_DIR}/safe_samples/test_content.txt}"
|
|
|
|
if [[ ! -f "$file" ]]; then
|
|
echo -e "${RED}Error: Blacklist file not found: $file${NC}"
|
|
return 1
|
|
fi
|
|
|
|
if [[ ! -f "$samples" ]]; then
|
|
echo -e "${YELLOW}Warning: Sample file not found: $samples${NC}"
|
|
echo "Create test samples in: ${TEST_DIR}/safe_samples/"
|
|
return 1
|
|
fi
|
|
|
|
echo "Testing patterns against samples..."
|
|
perl "${TEST_DIR}/test_patterns.pl" "$file" "$samples"
|
|
}
|
|
|
|
benchmark_patterns() {
|
|
local file="${1:-$BLACKLIST_FILE}"
|
|
|
|
if [[ ! -f "$file" ]]; then
|
|
echo -e "${RED}Error: Blacklist file not found: $file${NC}"
|
|
return 1
|
|
fi
|
|
|
|
echo "Benchmarking pattern performance..."
|
|
|
|
# Create benchmark script
|
|
cat > "${TEST_DIR}/benchmark.pl" << 'EOF'
|
|
#!/usr/bin/env perl
|
|
use strict;
|
|
use warnings;
|
|
use Time::HiRes qw(time);
|
|
|
|
my $blacklist_file = $ARGV[0] || die "Usage: $0 blacklist_file\n";
|
|
|
|
# Load patterns
|
|
open my $fh, '<:utf8', $blacklist_file or die "Cannot open $blacklist_file: $!\n";
|
|
my @patterns;
|
|
|
|
while (my $line = <$fh>) {
|
|
chomp $line;
|
|
$line =~ s/^\s+|\s+$//g;
|
|
next if $line eq '' || $line =~ /^#/;
|
|
|
|
eval { qr/$line/i };
|
|
next if $@;
|
|
|
|
push @patterns, $line;
|
|
}
|
|
close $fh;
|
|
|
|
# Test performance with sample content
|
|
my $test_content = "This is a sample message with various words and https://example.com/test links";
|
|
my $iterations = 1000;
|
|
|
|
print "Testing " . @patterns . " patterns over $iterations iterations...\n";
|
|
|
|
my $start_time = time();
|
|
|
|
for my $i (1..$iterations) {
|
|
for my $pattern (@patterns) {
|
|
$test_content =~ /$pattern/i;
|
|
}
|
|
}
|
|
|
|
my $end_time = time();
|
|
my $total_time = $end_time - $start_time;
|
|
my $avg_time = $total_time / $iterations;
|
|
|
|
printf "Total time: %.4f seconds\n", $total_time;
|
|
printf "Average time per message: %.6f seconds\n", $avg_time;
|
|
printf "Messages per second: %.0f\n", 1 / $avg_time if $avg_time > 0;
|
|
EOF
|
|
|
|
perl "${TEST_DIR}/benchmark.pl" "$file"
|
|
}
|
|
|
|
create_safe_samples() {
|
|
local samples_file="${TEST_DIR}/safe_samples/test_content.txt"
|
|
|
|
echo "Creating safe test samples..."
|
|
|
|
cat > "$samples_file" << 'EOF'
|
|
# Safe test samples for pattern validation
|
|
# Add your own safe test content here
|
|
|
|
Normal message with no issues
|
|
Check out this link: https://legitimate-site.com
|
|
User sharing a photo: here's my vacation pic!
|
|
Someone posting a normal URL: visit https://example.com/page
|
|
Regular conversation about everyday topics
|
|
Message with common terms that should not be blocked
|
|
EOF
|
|
|
|
echo -e "${GREEN}Created safe samples in: $samples_file${NC}"
|
|
echo "Edit this file to add your own safe test content"
|
|
}
|
|
|
|
show_usage() {
|
|
echo "Regex Pattern Testing Tools"
|
|
echo ""
|
|
echo "Usage: $0 <command> [options]"
|
|
echo ""
|
|
echo "Commands:"
|
|
echo " setup - Create testing environment"
|
|
echo " validate 'pattern' - Validate a single regex pattern"
|
|
echo " test [blacklist_file] - Test all patterns in blacklist file"
|
|
echo " samples - Test patterns against safe sample content"
|
|
echo " benchmark [file] - Benchmark pattern performance"
|
|
echo " create-samples - Create template for safe test samples"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " $0 setup"
|
|
echo " $0 validate '\\bhttps?://bit\\.ly/\\w+'"
|
|
echo " $0 test ./data/regex_blacklist.txt"
|
|
echo " $0 benchmark"
|
|
}
|
|
|
|
# Main script logic
|
|
case "${1:-}" in
|
|
"setup")
|
|
create_test_environment
|
|
create_safe_samples
|
|
;;
|
|
"validate")
|
|
validate_single_pattern "${2:-}"
|
|
;;
|
|
"test")
|
|
test_blacklist_file "${2:-}"
|
|
;;
|
|
"samples")
|
|
test_against_samples "${2:-}" "${3:-}"
|
|
;;
|
|
"benchmark")
|
|
benchmark_patterns "${2:-}"
|
|
;;
|
|
"create-samples")
|
|
create_safe_samples
|
|
;;
|
|
*)
|
|
show_usage
|
|
;;
|
|
esac
|