# OP_CAT-IPFS Integration Best Practices & Security Guidelines

## Overview

This document provides comprehensive best practices and security guidelines for implementing Bitcoin's OP_CAT functionality with Starlight's IPFS architecture.

---

## Security Guidelines

### 1. Input Validation

#### IPFS CID Validation
```python
def validate_ipfs_cid(cid: str) -> Dict:
    """
    Comprehensive IPFS CID validation.
    
    Args:
        cid: IPFS CID to validate
        
    Returns:
        Dict: Validation result with details
    """
    result = {"valid": False, "errors": [], "warnings": []}
    
    # Format validation
    if not isinstance(cid, str):
        result["errors"].append("CID must be a string")
        return result
    
    # Check CID prefix
    valid_prefixes = ["bafy", "bafk", "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"]
    if not any(cid.startswith(prefix) for prefix in valid_prefixes):
        result["errors"].append("Invalid CID prefix")
        return result
    
    # Length validation
    if len(cid) != 49 and len(cid) != 59:
        result["errors"].append(f"Invalid CID length: {len(cid)}")
        return result
    
    # Character validation
    valid_chars = set("abcdefghijklmnopqrstuvwxyz234567")
    cid_part = cid[4:]  # Remove prefix
    
    if not all(c in valid_chars for c in cid_part):
        result["errors"].append("CID contains invalid characters")
        return result
    
    # Checksum validation (simplified)
    if not _verify_cid_checksum(cid):
        result["warnings"].append("CID checksum verification failed")
    
    result["valid"] = True
    return result

def _verify_cid_checksum(cid: str) -> bool:
    """Verify CID checksum (simplified implementation)."""
    # In production, implement proper multibase checksum verification
    return len(cid) >= 49
```

#### Content Size Validation
```python
def validate_content_size(content: bytes, max_size: int = 10 * 1024 * 1024) -> Dict:
    """
    Validate content size constraints.
    
    Args:
        content: Content to validate
        max_size: Maximum allowed size in bytes
        
    Returns:
        Dict: Validation result
    """
    size = len(content)
    
    return {
        "valid": size <= max_size,
        "size": size,
        "max_size": max_size,
        "size_mb": size / (1024 * 1024),
        "requires_chunking": size > max_size,
        "recommended_chunks": max(1, (size + (1024 * 1024 - 1)) // (1024 * 1024))
    }
```

### 2. Script Security

#### Stack Depth Protection
```python
def validate_stack_depth(script: Dict, max_depth: int = 10) -> Dict:
    """
    Validate script stack depth to prevent overflow attacks.
    
    Args:
        script: Script structure to validate
        max_depth: Maximum allowed stack depth
        
    Returns:
        Dict: Stack depth validation result
    """
    operations = script.get("operations", [])
    current_depth = 0
    max_reached = 0
    
    for op in operations:
        op_name = op.get("op", "")
        
        # Track stack operations
        if op_name.startswith("OP_PUSH"):
            current_depth += 1
            max_reached = max(max_reached, current_depth)
        elif op_name in ["OP_CAT", "OP_HASH256", "OP_EQUAL", "OP_EQUALVERIFY"]:
            if current_depth >= 2:
                current_depth -= 1
            else:
                return {
                    "valid": False,
                    "error": f"Stack underflow at {op_name}",
                    "current_depth": current_depth
                }
        elif op_name in ["OP_1", "OP_0"]:
            current_depth += 1
            max_reached = max(max_reached, current_depth)
        elif op_name in ["OP_DROP", "OP_2DROP"]:
            current_depth = max(0, current_depth - (2 if op_name == "OP_2DROP" else 1))
    
    return {
        "valid": max_reached <= max_depth,
        "max_depth": max_reached,
        "allowed_depth": max_depth,
        "final_depth": current_depth
    }
```

#### Script Size Limits
```python
def validate_script_size(script: Dict, max_size: int = 520) -> Dict:
    """
    Validate script size against Bitcoin limits.
    
    Args:
        script: Script structure to validate
        max_size: Maximum allowed script size
        
    Returns:
        Dict: Size validation result
    """
    operations = script.get("operations", [])
    total_size = 0
    
    for op in operations:
        op_name = op.get("op", "")
        
        if op_name.startswith("OP_PUSHBYTES"):
            # Extract size from operation name
            try:
                size = int(op_name.split("_")[2])
                total_size += size + 1  # Data + opcode
            except (IndexError, ValueError):
                total_size += 1  # Default to opcode only
        else:
            total_size += 1  # Just opcode
    
    return {
        "valid": total_size <= max_size,
        "size": total_size,
        "max_size": max_size,
        "size_percentage": (total_size / max_size) * 100,
        "operations_count": len(operations)
    }
```

### 3. Content Integrity

#### Multi-Layer Hashing
```python
def create_content_integrity_layers(content: bytes) -> Dict:
    """
    Create multiple integrity layers for content verification.
    
    Args:
        content: Content to secure
        
    Returns:
        Dict: Integrity layers
    """
    return {
        "sha256": hashlib.sha256(content).hexdigest(),
        "sha512": hashlib.sha512(content).hexdigest(),
        "double_sha256": hashlib.sha256(hashlib.sha256(content).digest()).hexdigest(),
        "ripemd160": hashlib.new('ripemd160', content).hexdigest(),
        "blake2b": hashlib.blake2b(content, digest_size=32).hexdigest(),
        "metadata": {
            "content_size": len(content),
            "created_at": datetime.datetime.now().isoformat(),
            "algorithms": ["sha256", "sha512", "double_sha256", "ripemd160", "blake2b"]
        }
    }

def verify_content_integrity(content: bytes, integrity_layers: Dict) -> Dict:
    """
    Verify content against multiple integrity layers.
    
    Args:
        content: Content to verify
        integrity_layers: Expected integrity values
        
    Returns:
        Dict: Verification result
    """
    current_layers = create_content_integrity_layers(content)
    results = {}
    
    for algorithm, expected_hash in integrity_layers.items():
        if algorithm in current_layers:
            actual_hash = current_layers[algorithm]
            results[algorithm] = {
                "valid": actual_hash == expected_hash,
                "expected": expected_hash,
                "actual": actual_hash
            }
    
    all_valid = all(result["valid"] for result in results.values())
    
    return {
        "overall_valid": all_valid,
        "algorithm_results": results,
        "verified_algorithms": list(results.keys())
    }
```

---

## Best Practices

### 1. Architecture Best Practices

#### Modular Design
```python
class ModularOPCATIntegration:
    """
    Modular approach to OP_CAT-IPFS integration.
    """
    
    def __init__(self, config: Dict):
        self.config = config
        self.validators = self._initialize_validators()
        self.processors = self._initialize_processors()
        self.security = self._initialize_security()
    
    def _initialize_validators(self) -> Dict:
        """Initialize validation modules."""
        return {
            "cid": CIDValidator(),
            "content": ContentValidator(self.config.get("max_content_size", 10*1024*1024)),
            "script": ScriptValidator(),
            "security": SecurityValidator()
        }
    
    def _initialize_processors(self) -> Dict:
        """Initialize processing modules."""
        return {
            "basic": BasicContentProcessor(),
            "chunked": ChunkedContentProcessor(self.config.get("chunk_size", 1024*1024)),
            "conditional": ConditionalContentProcessor(),
            "aggregation": AggregationContentProcessor()
        }
    
    def _initialize_security(self) -> Dict:
        """Initialize security modules."""
        return {
            "input": InputSecurity(),
            "script": ScriptSecurity(),
            "content": ContentSecurity()
        }
    
    def process_content(self, content: bytes, processing_type: str = "basic") -> Dict:
        """
        Process content with modular security and validation.
        
        Args:
            content: Content to process
            processing_type: Type of processing to apply
            
        Returns:
            Dict: Processing result
        """
        # Security validation
        security_result = self.security["input"].validate(content)
        if not security_result["valid"]:
            return {"success": False, "error": "Security validation failed", "details": security_result}
        
        # Content validation
        content_validation = self.validators["content"].validate(content)
        if not content_validation["valid"]:
            return {"success": False, "error": "Content validation failed", "details": content_validation}
        
        # Process content
        processor = self.processors.get(processing_type)
        if not processor:
            return {"success": False, "error": f"Unknown processing type: {processing_type}"}
        
        result = processor.process(content)
        
        # Script validation
        if "script" in result:
            script_validation = self.validators["script"].validate(result["script"])
            result["script_validation"] = script_validation
        
        return result
```

#### Configuration Management
```python
class IntegrationConfig:
    """
    Centralized configuration management for OP_CAT-IPFS integration.
    """
    
    DEFAULT_CONFIG = {
        "network": "mainnet",
        "max_content_size": 10 * 1024 * 1024,  # 10MB
        "chunk_size": 1024 * 1024,  # 1MB
        "max_chunks": 8,
        "max_script_size": 520,
        "max_stack_depth": 10,
        "security": {
            "enable_cid_validation": True,
            "enable_content_validation": True,
            "enable_script_validation": True,
            "require_multiple_hashes": True,
            "max_operations_per_script": 50
        },
        "performance": {
            "enable_caching": True,
            "cache_ttl": 3600,  # 1 hour
            "enable_parallel_processing": True,
            "max_parallel_tasks": 4
        },
        "logging": {
            "level": "INFO",
            "enable_audit_log": True,
            "log_retention_days": 30
        }
    }
    
    def __init__(self, custom_config: Dict = None):
        self.config = self._merge_configs(self.DEFAULT_CONFIG, custom_config or {})
        self._validate_config()
    
    def _merge_configs(self, default: Dict, custom: Dict) -> Dict:
        """Merge custom config with defaults."""
        merged = default.copy()
        for key, value in custom.items():
            if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
                merged[key] = self._merge_configs(merged[key], value)
            else:
                merged[key] = value
        return merged
    
    def _validate_config(self):
        """Validate configuration parameters."""
        if self.config["max_content_size"] <= 0:
            raise ValueError("max_content_size must be positive")
        
        if self.config["chunk_size"] <= 0:
            raise ValueError("chunk_size must be positive")
        
        if self.config["max_script_size"] > 520:
            raise ValueError("max_script_size cannot exceed Bitcoin limit of 520")
    
    def get(self, key: str, default=None):
        """Get configuration value."""
        keys = key.split(".")
        value = self.config
        
        for k in keys:
            if isinstance(value, dict) and k in value:
                value = value[k]
            else:
                return default
        
        return value
```

### 2. Performance Best Practices

#### Content Caching
```python
class ContentCache:
    """
    Intelligent caching for OP_CAT-IPFS operations.
    """
    
    def __init__(self, max_size: int = 1000, ttl: int = 3600):
        self.max_size = max_size
        self.ttl = ttl
        self.cache = {}
        self.access_times = {}
    
    def get(self, key: str) -> Optional[Dict]:
        """Get cached item if valid."""
        if key not in self.cache:
            return None
        
        item = self.cache[key]
        current_time = datetime.datetime.now().timestamp()
        
        # Check TTL
        if current_time - item["timestamp"] > self.ttl:
            self.remove(key)
            return None
        
        # Update access time
        self.access_times[key] = current_time
        return item["data"]
    
    def set(self, key: str, data: Dict):
        """Cache item with eviction if needed."""
        current_time = datetime.datetime.now().timestamp()
        
        # Evict if cache is full
        if len(self.cache) >= self.max_size:
            self._evict_lru()
        
        self.cache[key] = {
            "data": data,
            "timestamp": current_time
        }
        self.access_times[key] = current_time
    
    def _evict_lru(self):
        """Evict least recently used item."""
        if not self.access_times:
            return
        
        lru_key = min(self.access_times, key=self.access_times.get)
        self.remove(lru_key)
    
    def remove(self, key: str):
        """Remove item from cache."""
        self.cache.pop(key, None)
        self.access_times.pop(key, None)
```

#### Parallel Processing
```python
import concurrent.futures
from typing import List, Callable

class ParallelContentProcessor:
    """
    Parallel processing for multiple content items.
    """
    
    def __init__(self, max_workers: int = 4):
        self.max_workers = max_workers
    
    def process_multiple_contents(
        self, 
        contents: List[bytes], 
        processor_func: Callable,
        **kwargs
    ) -> List[Dict]:
        """
        Process multiple contents in parallel.
        
        Args:
            contents: List of content to process
            processor_func: Processing function to use
            **kwargs: Additional arguments for processor
            
        Returns:
            List[Dict]: Processing results
        """
        results = [None] * len(contents)
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            # Submit all tasks
            future_to_index = {
                executor.submit(processor_func, content, **kwargs): i
                for i, content in enumerate(contents)
            }
            
            # Collect results as they complete
            for future in concurrent.futures.as_completed(future_to_index):
                index = future_to_index[future]
                try:
                    result = future.result()
                    results[index] = result
                except Exception as e:
                    results[index] = {
                        "success": False,
                        "error": str(e),
                        "index": index
                    }
        
        return results
```

### 3. Error Handling Best Practices

#### Comprehensive Error Handling
```python
class OPCATErrorHandler:
    """
    Comprehensive error handling for OP_CAT-IPFS operations.
    """
    
    ERROR_CODES = {
        "INVALID_CID": 1001,
        "CONTENT_TOO_LARGE": 1002,
        "SCRIPT_SIZE_EXCEEDED": 1003,
        "STACK_DEPTH_EXCEEDED": 1004,
        "VALIDATION_FAILED": 1005,
        "PROCESSING_ERROR": 1006,
        "SECURITY_VIOLATION": 1007,
        "CONFIGURATION_ERROR": 1008
    }
    
    def __init__(self):
        self.error_log = []
    
    def handle_error(self, error_type: str, message: str, context: Dict = None) -> Dict:
        """
        Handle and log error consistently.
        
        Args:
            error_type: Type of error
            message: Error message
            context: Additional context information
            
        Returns:
            Dict: Structured error response
        """
        error_code = self.ERROR_CODES.get(error_type, 9999)
        timestamp = datetime.datetime.now().isoformat()
        
        error_entry = {
            "code": error_code,
            "type": error_type,
            "message": message,
            "context": context or {},
            "timestamp": timestamp
        }
        
        self.error_log.append(error_entry)
        
        return {
            "success": False,
            "error": error_entry,
            "recovery_suggestions": self._get_recovery_suggestions(error_type)
        }
    
    def _get_recovery_suggestions(self, error_type: str) -> List[str]:
        """Get recovery suggestions for error type."""
        suggestions = {
            "INVALID_CID": [
                "Verify CID format and length",
                "Check CID prefix (bafy, bafk)",
                "Validate base32 encoding"
            ],
            "CONTENT_TOO_LARGE": [
                "Use content chunking",
                "Compress content before processing",
                "Increase max_content_size limit"
            ],
            "SCRIPT_SIZE_EXCEEDED": [
                "Reduce number of operations",
                "Use shorter content identifiers",
                "Optimize operation sequence"
            ],
            "STACK_DEPTH_EXCEEDED": [
                "Reduce number of push operations",
                "Use nested aggregation patterns",
                "Split into multiple scripts"
            ]
        }
        
        return suggestions.get(error_type, ["Contact support for assistance"])
```

---

## Security Checklist

### Pre-Deployment Security Checklist

#### Input Validation
- [ ] IPFS CID format validation
- [ ] Content size limits enforced
- [ ] Character encoding validation
- [ ] Malicious input detection

#### Script Security
- [ ] Stack depth limits enforced
- [ ] Script size limits enforced
- [ ] Operation count limits
- [ ] Opcode validation

#### Content Security
- [ ] Multi-layer hash verification
- [ ] Content type validation
- [ ] Malware scanning (if applicable)
- [ ] Access control enforcement

#### Network Security
- [ ] HTTPS/TLS encryption
- [ ] API rate limiting
- [ ] DDoS protection
- [ ] Authentication mechanisms

### Runtime Security Monitoring

#### Monitoring Metrics
```python
class SecurityMonitor:
    """
    Runtime security monitoring for OP_CAT-IPFS operations.
    """
    
    def __init__(self):
        self.metrics = {
            "requests_processed": 0,
            "validation_failures": 0,
            "security_violations": 0,
            "average_processing_time": 0,
            "error_rate": 0
        }
        self.alerts = []
    
    def record_request(self, processing_time: float, success: bool, security_issues: List[str] = None):
        """Record request metrics."""
        self.metrics["requests_processed"] += 1
        
        if not success:
            self.metrics["validation_failures"] += 1
        
        if security_issues:
            self.metrics["security_violations"] += len(security_issues)
            for issue in security_issues:
                self._create_alert(issue)
        
        # Update average processing time
        total_time = self.metrics["average_processing_time"] * (self.metrics["requests_processed"] - 1)
        self.metrics["average_processing_time"] = (total_time + processing_time) / self.metrics["requests_processed"]
        
        # Update error rate
        self.metrics["error_rate"] = self.metrics["validation_failures"] / self.metrics["requests_processed"]
    
    def _create_alert(self, issue: str):
        """Create security alert."""
        alert = {
            "issue": issue,
            "timestamp": datetime.datetime.now().isoformat(),
            "severity": self._determine_severity(issue)
        }
        self.alerts.append(alert)
    
    def _determine_severity(self, issue: str) -> str:
        """Determine alert severity."""
        high_severity_issues = [
            "stack_overflow_attempt",
            "script_size_exceeded",
            "malicious_cid_detected"
        ]
        
        return "HIGH" if issue in high_severity_issues else "MEDIUM"
```

---

## Compliance and Auditing

### Audit Trail
```python
class AuditTrail:
    """
    Comprehensive audit trail for OP_CAT-IPFS operations.
    """
    
    def __init__(self):
        self.audit_log = []
    
    def record_operation(self, operation_type: str, input_data: Dict, result: Dict, user_id: str = None):
        """Record operation in audit trail."""
        audit_entry = {
            "timestamp": datetime.datetime.now().isoformat(),
            "operation_type": operation_type,
            "user_id": user_id,
            "input_hash": hashlib.sha256(str(input_data).encode()).hexdigest(),
            "result_hash": hashlib.sha256(str(result).encode()).hexdigest(),
            "success": result.get("success", False),
            "processing_time_ms": result.get("processing_time_ms", 0),
            "security_flags": result.get("security_flags", [])
        }
        
        self.audit_log.append(audit_entry)
    
    def generate_audit_report(self, start_date: str = None, end_date: str = None) -> Dict:
        """Generate audit report for date range."""
        filtered_log = self._filter_by_date(self.audit_log, start_date, end_date)
        
        return {
            "period": {
                "start": start_date,
                "end": end_date,
                "total_operations": len(filtered_log)
            },
            "statistics": {
                "successful_operations": sum(1 for entry in filtered_log if entry["success"]),
                "failed_operations": sum(1 for entry in filtered_log if not entry["success"]),
                "average_processing_time": sum(entry["processing_time_ms"] for entry in filtered_log) / len(filtered_log) if filtered_log else 0,
                "security_incidents": sum(len(entry["security_flags"]) for entry in filtered_log)
            },
            "security_summary": self._generate_security_summary(filtered_log)
        }
    
    def _filter_by_date(self, audit_log: List[Dict], start_date: str, end_date: str) -> List[Dict]:
        """Filter audit log by date range."""
        if not start_date and not end_date:
            return audit_log
        
        filtered = []
        for entry in audit_log:
            entry_date = entry["timestamp"][:10]  # Extract date part
            
            if start_date and entry_date < start_date:
                continue
            if end_date and entry_date > end_date:
                continue
            
            filtered.append(entry)
        
        return filtered
    
    def _generate_security_summary(self, audit_log: List[Dict]) -> Dict:
        """Generate security summary from audit log."""
        security_flags = {}
        
        for entry in audit_log:
            for flag in entry["security_flags"]:
                security_flags[flag] = security_flags.get(flag, 0) + 1
        
        return {
            "total_flags": sum(security_flags.values()),
            "flag_counts": security_flags,
            "most_common_flag": max(security_flags, key=security_flags.get) if security_flags else None
        }
```

---

## Implementation Checklist

### Development Phase
- [ ] Implement comprehensive input validation
- [ ] Add multi-layer content verification
- [ ] Configure security monitoring
- [ ] Set up audit logging
- [ ] Implement error handling
- [ ] Add performance optimization

### Testing Phase
- [ ] Unit tests for all validation functions
- [ ] Integration tests for complete workflows
- [ ] Security penetration testing
- [ ] Performance load testing
- [ ] Error scenario testing
- [ ] Compliance verification

### Deployment Phase
- [ ] Security configuration review
- [ ] Monitoring system setup
- [ ] Alert system configuration
- [ ] Backup procedures verification
- [ ] Documentation completion
- [ ] Team training completion

### Maintenance Phase
- [ ] Regular security audits
- [ ] Performance monitoring
- [ ] Log analysis and review
- [ ] Security patch updates
- [ ] Configuration optimization
- [ ] Compliance verification

---

This comprehensive security and best practices guide ensures robust, secure, and compliant OP_CAT-IPFS integration implementations for the Starlight project.