#!/usr/bin/env python3
"""
File Upload Security Assessment for Project Starlight
Analyzes file upload functionality for security vulnerabilities
"""
import json
import re
import base64
import hashlib
import datetime
import mimetypes
import math
from typing import Dict, List, Optional, Any
class FileUploadSecurityAssessment:
"""Assesses file upload security vulnerabilities."""
def __init__(self):
self.allowed_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.pdf', '.txt', '.doc', '.docx']
self.dangerous_extensions = [
'.php', '.php3', '.php4', '.php5', '.phtml', '.pht',
'.asp', '.aspx', '.cer', '.asa',
'.jsp', '.jspx', '.jsw', '.jsv',
'.pl', '.py', '.rb', '.sh', '.bat',
'.exe', '.com', '.scr', '.pif', '.vbs',
'.js', '.html', '.htm', '.xhtml', '.shtml'
]
self.dangerous_mime_types = [
'application/x-php', 'application/x-httpd-php',
'text/x-php', 'application/x-executable',
'application/x-msdownload', 'application/x-msdos-program'
]
self.max_file_size = 10 * 1024 * 1024 # 10MB
self.magic_numbers = {
'jpg': b'\xff\xd8\xff',
'png': b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a',
'gif': b'GIF87a',
'pdf': b'%PDF-',
'zip': b'PK\x03\x04'
}
def analyze_file_upload(self, file_data: bytes, filename: str,
content_type: Optional[str] = None) -> Dict[str, Any]:
"""Analyze uploaded file for security issues."""
analysis = {
'filename': filename,
'content_type': content_type,
'file_size': len(file_data),
'timestamp': datetime.datetime.now().isoformat(),
'risks': [],
'is_safe': True,
'security_score': 100,
'recommendations': []
}
# Check filename security
filename_analysis = self._analyze_filename(filename)
analysis['risks'].extend(filename_analysis['risks'])
analysis['security_score'] -= filename_analysis['score_reduction']
# Check file size
if len(file_data) > self.max_file_size:
analysis['risks'].append({
'type': 'file_size',
'severity': 'medium',
'description': f"File size ({len(file_data)} bytes) exceeds limit ({self.max_file_size} bytes)"
})
analysis['security_score'] -= 15
# Check file content
content_analysis = self._analyze_file_content(file_data, filename)
analysis['risks'].extend(content_analysis['risks'])
analysis['security_score'] -= content_analysis['score_reduction']
# Check MIME type
if content_type:
mime_analysis = self._analyze_mime_type(content_type, file_data)
analysis['risks'].extend(mime_analysis['risks'])
analysis['security_score'] -= mime_analysis['score_reduction']
# Check for embedded scripts
script_analysis = self._check_embedded_scripts(file_data, filename)
analysis['risks'].extend(script_analysis['risks'])
analysis['security_score'] -= script_analysis['score_reduction']
# Overall safety assessment
analysis['is_safe'] = analysis['security_score'] >= 70 and not any(
risk['severity'] == 'critical' for risk in analysis['risks']
)
# Generate recommendations
if analysis['risks']:
analysis['recommendations'] = self._generate_recommendations(analysis['risks'])
return analysis
def _analyze_filename(self, filename: str) -> Dict[str, Any]:
"""Analyze filename for security issues."""
result = {'risks': [], 'score_reduction': 0}
# Check for dangerous extensions
file_ext = filename.lower().split('.')[-1] if '.' in filename else ''
if '.' + file_ext in self.dangerous_extensions:
result['risks'].append({
'type': 'dangerous_extension',
'severity': 'critical',
'description': f"Dangerous file extension: .{file_ext}"
})
result['score_reduction'] += 40
# Check for double extensions
if filename.count('.') > 1:
parts = filename.lower().split('.')
for i in range(len(parts) - 1):
if '.' + parts[i] in self.dangerous_extensions:
result['risks'].append({
'type': 'double_extension',
'severity': 'high',
'description': f"Double extension detected: {filename}"
})
result['score_reduction'] += 25
break
# Check for special characters
dangerous_chars = ['..', '/', '\\', ':', '*', '?', '"', '<', '>', '|']
for char in dangerous_chars:
if char in filename:
result['risks'].append({
'type': 'dangerous_characters',
'severity': 'medium',
'description': f"Dangerous character in filename: '{char}'"
})
result['score_reduction'] += 10
break
# Check for very long filenames
if len(filename) > 255:
result['risks'].append({
'type': 'long_filename',
'severity': 'low',
'description': f"Filename too long: {len(filename)} characters"
})
result['score_reduction'] += 5
# Check for null bytes
if '\x00' in filename:
result['risks'].append({
'type': 'null_byte',
'severity': 'critical',
'description': "Null byte injection detected in filename"
})
result['score_reduction'] += 35
return result
def _analyze_file_content(self, file_data: bytes, filename: str) -> Dict[str, Any]:
"""Analyze file content for security issues."""
result = {'risks': [], 'score_reduction': 0}
# Check magic numbers
detected_type = self._detect_file_type(file_data)
file_ext = filename.lower().split('.')[-1] if '.' in filename else ''
if detected_type and file_ext != detected_type:
result['risks'].append({
'type': 'mime_mismatch',
'severity': 'medium',
'description': f"File extension (.{file_ext}) doesn't match detected type ({detected_type})"
})
result['score_reduction'] += 15
# Check for executable signatures
executable_signatures = [
b'MZ', # Windows PE
b'\x7fELF', # Linux ELF
b'#!/bin/', # Shell script
b'#!/usr/bin/', # Shell script
b'',
r'javascript:',
r'vbscript:',
r'on\w+\s*=', # Event handlers
r'eval\s*\(',
r'document\.write',
r'