#!/usr/bin/env python3 """ File Upload Security Assessment for Project Starlight Analyzes file upload functionality for security vulnerabilities """ import json import re import base64 import hashlib import datetime import mimetypes import math from typing import Dict, List, Optional, Any class FileUploadSecurityAssessment: """Assesses file upload security vulnerabilities.""" def __init__(self): self.allowed_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.pdf', '.txt', '.doc', '.docx'] self.dangerous_extensions = [ '.php', '.php3', '.php4', '.php5', '.phtml', '.pht', '.asp', '.aspx', '.cer', '.asa', '.jsp', '.jspx', '.jsw', '.jsv', '.pl', '.py', '.rb', '.sh', '.bat', '.exe', '.com', '.scr', '.pif', '.vbs', '.js', '.html', '.htm', '.xhtml', '.shtml' ] self.dangerous_mime_types = [ 'application/x-php', 'application/x-httpd-php', 'text/x-php', 'application/x-executable', 'application/x-msdownload', 'application/x-msdos-program' ] self.max_file_size = 10 * 1024 * 1024 # 10MB self.magic_numbers = { 'jpg': b'\xff\xd8\xff', 'png': b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a', 'gif': b'GIF87a', 'pdf': b'%PDF-', 'zip': b'PK\x03\x04' } def analyze_file_upload(self, file_data: bytes, filename: str, content_type: Optional[str] = None) -> Dict[str, Any]: """Analyze uploaded file for security issues.""" analysis = { 'filename': filename, 'content_type': content_type, 'file_size': len(file_data), 'timestamp': datetime.datetime.now().isoformat(), 'risks': [], 'is_safe': True, 'security_score': 100, 'recommendations': [] } # Check filename security filename_analysis = self._analyze_filename(filename) analysis['risks'].extend(filename_analysis['risks']) analysis['security_score'] -= filename_analysis['score_reduction'] # Check file size if len(file_data) > self.max_file_size: analysis['risks'].append({ 'type': 'file_size', 'severity': 'medium', 'description': f"File size ({len(file_data)} bytes) exceeds limit ({self.max_file_size} bytes)" }) analysis['security_score'] -= 15 # Check file content content_analysis = self._analyze_file_content(file_data, filename) analysis['risks'].extend(content_analysis['risks']) analysis['security_score'] -= content_analysis['score_reduction'] # Check MIME type if content_type: mime_analysis = self._analyze_mime_type(content_type, file_data) analysis['risks'].extend(mime_analysis['risks']) analysis['security_score'] -= mime_analysis['score_reduction'] # Check for embedded scripts script_analysis = self._check_embedded_scripts(file_data, filename) analysis['risks'].extend(script_analysis['risks']) analysis['security_score'] -= script_analysis['score_reduction'] # Overall safety assessment analysis['is_safe'] = analysis['security_score'] >= 70 and not any( risk['severity'] == 'critical' for risk in analysis['risks'] ) # Generate recommendations if analysis['risks']: analysis['recommendations'] = self._generate_recommendations(analysis['risks']) return analysis def _analyze_filename(self, filename: str) -> Dict[str, Any]: """Analyze filename for security issues.""" result = {'risks': [], 'score_reduction': 0} # Check for dangerous extensions file_ext = filename.lower().split('.')[-1] if '.' in filename else '' if '.' + file_ext in self.dangerous_extensions: result['risks'].append({ 'type': 'dangerous_extension', 'severity': 'critical', 'description': f"Dangerous file extension: .{file_ext}" }) result['score_reduction'] += 40 # Check for double extensions if filename.count('.') > 1: parts = filename.lower().split('.') for i in range(len(parts) - 1): if '.' + parts[i] in self.dangerous_extensions: result['risks'].append({ 'type': 'double_extension', 'severity': 'high', 'description': f"Double extension detected: {filename}" }) result['score_reduction'] += 25 break # Check for special characters dangerous_chars = ['..', '/', '\\', ':', '*', '?', '"', '<', '>', '|'] for char in dangerous_chars: if char in filename: result['risks'].append({ 'type': 'dangerous_characters', 'severity': 'medium', 'description': f"Dangerous character in filename: '{char}'" }) result['score_reduction'] += 10 break # Check for very long filenames if len(filename) > 255: result['risks'].append({ 'type': 'long_filename', 'severity': 'low', 'description': f"Filename too long: {len(filename)} characters" }) result['score_reduction'] += 5 # Check for null bytes if '\x00' in filename: result['risks'].append({ 'type': 'null_byte', 'severity': 'critical', 'description': "Null byte injection detected in filename" }) result['score_reduction'] += 35 return result def _analyze_file_content(self, file_data: bytes, filename: str) -> Dict[str, Any]: """Analyze file content for security issues.""" result = {'risks': [], 'score_reduction': 0} # Check magic numbers detected_type = self._detect_file_type(file_data) file_ext = filename.lower().split('.')[-1] if '.' in filename else '' if detected_type and file_ext != detected_type: result['risks'].append({ 'type': 'mime_mismatch', 'severity': 'medium', 'description': f"File extension (.{file_ext}) doesn't match detected type ({detected_type})" }) result['score_reduction'] += 15 # Check for executable signatures executable_signatures = [ b'MZ', # Windows PE b'\x7fELF', # Linux ELF b'#!/bin/', # Shell script b'#!/usr/bin/', # Shell script b' Dict[str, Any]: """Analyze MIME type for security issues.""" result = {'risks': [], 'score_reduction': 0} # Check for dangerous MIME types if content_type in self.dangerous_mime_types: result['risks'].append({ 'type': 'dangerous_mime', 'severity': 'critical', 'description': f"Dangerous MIME type: {content_type}" }) result['score_reduction'] += 40 # Compare declared MIME with detected type detected_type = self._detect_file_type(file_data) if detected_type: expected_mime = mimetypes.guess_type(f'test.{detected_type}')[0] if expected_mime and content_type != expected_mime: result['risks'].append({ 'type': 'mime_type_mismatch', 'severity': 'medium', 'description': f"Declared MIME ({content_type}) doesn't match detected ({expected_mime})" }) result['score_reduction'] += 10 return result def _check_embedded_scripts(self, file_data: bytes, filename: str) -> Dict[str, Any]: """Check for embedded malicious scripts.""" result = {'risks': [], 'score_reduction': 0} # Convert to text for analysis (limited to first 10KB) try: text_content = file_data[:10240].decode('utf-8', errors='ignore') except: text_content = file_data[:10240].decode('latin-1', errors='ignore') # Check for script patterns script_patterns = [ r']*>.*?', r'javascript:', r'vbscript:', r'on\w+\s*=', # Event handlers r'eval\s*\(', r'document\.write', r']*>', r']*>', r']*>', r'expression\s*\(', ] for pattern in script_patterns: matches = re.findall(pattern, text_content, re.IGNORECASE | re.DOTALL) if matches: result['risks'].append({ 'type': 'embedded_script', 'severity': 'high', 'description': f"Embedded script pattern detected: {pattern}", 'matches': len(matches) }) result['score_reduction'] += 30 break return result def _detect_file_type(self, file_data: bytes) -> Optional[str]: """Detect file type from magic numbers.""" for file_type, signature in self.magic_numbers.items(): if file_data.startswith(signature): return file_type return None def _generate_recommendations(self, risks: List[Dict]) -> List[str]: """Generate security recommendations based on risks.""" recommendations = [] risk_types = set(risk['type'] for risk in risks) if 'dangerous_extension' in risk_types: recommendations.append("Implement strict file extension whitelist") if 'double_extension' in risk_types: recommendations.append("Check for and reject double extensions") if 'mime_mismatch' in risk_types or 'mime_type_mismatch' in risk_types: recommendations.append("Validate file content magic numbers match extensions") if 'executable_content' in risk_types: recommendations.append("Scan file contents for executable signatures") if 'embedded_script' in risk_types: recommendations.append("Scan files for embedded malicious scripts") if 'file_size' in risk_types: recommendations.append("Implement reasonable file size limits") # General recommendations recommendations.extend([ "Store uploaded files outside web root directory", "Generate random filenames for uploaded files", "Implement file content scanning", "Set proper file permissions on uploaded files", "Log all file upload attempts", "Consider using virus scanning for all uploads", "Implement rate limiting for file uploads" ]) return list(set(recommendations)) def generate_security_assessment_report(self) -> Dict[str, Any]: """Generate comprehensive file upload security assessment report.""" return { 'timestamp': datetime.datetime.now().isoformat(), 'assessment_type': 'File Upload Security Assessment', 'allowed_extensions': self.allowed_extensions, 'dangerous_extensions': self.dangerous_extensions, 'max_file_size': self.max_file_size, 'security_recommendations': self._get_general_recommendations(), 'test_cases': self._generate_test_cases() } def _get_general_recommendations(self) -> List[str]: """Get general file upload security recommendations.""" return [ "Implement server-side file validation (never trust client-side validation)", "Use whitelisting approach for allowed file types and extensions", "Validate file content magic numbers against extensions", "Store uploaded files outside web-accessible directories", "Generate random filenames to prevent directory traversal", "Implement proper file permissions (read-only where possible)", "Scan all uploads for malware and malicious content", "Set reasonable file size limits", "Implement rate limiting to prevent DoS attacks", "Log all upload attempts with metadata", "Consider using dedicated file storage services", "Regularly audit uploaded files and clean up unused ones" ] def _generate_test_cases(self) -> List[Dict[str, Any]]: """Generate test cases for file upload security testing.""" test_cases = [ { 'name': 'Malicious PHP Upload', 'description': 'Test uploading PHP file with webshell', 'filename': 'image.php', 'content': '', 'expected_result': 'Rejected' }, { 'name': 'Double Extension Bypass', 'description': 'Test double extension bypass attempt', 'filename': 'image.php.jpg', 'content': 'fake_image_content', 'expected_result': 'Rejected' }, { 'name': 'MIME Type Manipulation', 'description': 'Test MIME type spoofing', 'filename': 'malicious.exe', 'content': 'fake_content', 'declared_mime': 'image/jpeg', 'expected_result': 'Rejected' }, { 'name': 'Null Byte Injection', 'description': 'Test null byte injection in filename', 'filename': 'safe.php\x00.jpg', 'content': 'fake_content', 'expected_result': 'Rejected' }, { 'name': 'Large File Upload', 'description': 'Test oversized file upload', 'filename': 'large.jpg', 'content': 'A' * (self.max_file_size + 1), 'expected_result': 'Rejected' } ] return test_cases def main(): """Test the file upload security assessment.""" assessor = FileUploadSecurityAssessment() # Test with different file scenarios test_cases = [ (b'\xff\xd8\xff\xe0\x00\x10JFIF', 'image.jpg', 'image/jpeg'), # Safe JPG (b'', 'malicious.php', 'application/x-php'), # Malicious PHP (b'fake_content', 'image.php.jpg', 'image/jpeg'), # Double extension (b'GIF87a', 'image.gif', 'image/gif'), # Script in GIF ] print("File Upload Security Assessment Results:") print("=" * 50) test_results = [] for file_data, filename, content_type in test_cases: analysis = assessor.analyze_file_upload(file_data, filename, content_type) test_results.append(analysis) print(f"\nFile: {filename}") print(f"Safe: {analysis['is_safe']}") print(f"Score: {analysis['security_score']}") if analysis['risks']: for risk in analysis['risks']: print(f" - {risk['severity']}: {risk['description']}") # Generate assessment report report = assessor.generate_security_assessment_report() print(f"\nAssessment Report Generated") print(f"Test cases: {len(report['test_cases'])}") return { 'test_completed': True, 'report': report, 'test_results': test_results, 'total_files_tested': len(test_cases) } if __name__ == "__main__": main()