""" Neural Database Management System Stores and manages neural recording data across species and modalities """ import math import json import datetime import hashlib from typing import Dict, List, Optional, Any, Union, Tuple from dataclasses import dataclass, asdict import itertools import collections @dataclass class RecordingMetadata: """Metadata for neural recordings""" recording_id: str species: str modality: str timestamp: str duration: float sample_rate: int channel_count: int data_size: int experiment_type: str researcher_id: str ethical_approval: str quantum_enhanced: bool @dataclass class DataQualityMetrics: """Data quality assessment metrics""" signal_to_noise_ratio: float artifact_percentage: float data_completeness: float noise_floor: float impedance_stability: float quantum_coherence: float class NeuralDatabase: """Comprehensive neural database management system""" def __init__(self): self.recordings = {} self.metadata_index = {} self.quality_metrics = {} self.search_index = {} self.stats_cache = {} self.last_updated = datetime.datetime.now().isoformat() def generate_recording_id(self, metadata: RecordingMetadata) -> str: """Generate unique recording ID""" id_string = f"{metadata.species}_{metadata.modality}_{metadata.timestamp}_{metadata.researcher_id}" return hashlib.sha256(id_string.encode()).hexdigest()[:16] def add_recording(self, metadata: RecordingMetadata, data: List[Dict], quality_metrics: DataQualityMetrics) -> str: """Add a new recording to the database""" recording_id = self.generate_recording_id(metadata) metadata.recording_id = recording_id # Store recording self.recordings[recording_id] = { 'metadata': asdict(metadata), 'data': data, 'quality_metrics': asdict(quality_metrics), 'data_hash': hashlib.sha256(json.dumps(data, sort_keys=True).encode()).hexdigest()[:16], 'added_timestamp': datetime.datetime.now().isoformat() } # Update indices self._update_metadata_index(recording_id, metadata) self._update_search_index(recording_id, metadata, quality_metrics) self._update_statistics() return recording_id def _update_metadata_index(self, recording_id: str, metadata: RecordingMetadata): """Update metadata search indices""" if metadata.species not in self.metadata_index: self.metadata_index[metadata.species] = {} if metadata.modality not in self.metadata_index[metadata.species]: self.metadata_index[metadata.species][metadata.modality] = [] self.metadata_index[metadata.species][metadata.modality].append(recording_id) def _update_search_index(self, recording_id: str, metadata: RecordingMetadata, quality_metrics: DataQualityMetrics): """Update search index for fast queries""" search_terms = [ metadata.species.lower(), metadata.modality.lower(), metadata.experiment_type.lower(), metadata.researcher_id.lower() ] for term in search_terms: if term not in self.search_index: self.search_index[term] = [] self.search_index[term].append(recording_id) # Quality-based indexing if quality_metrics.signal_to_noise_ratio > 10: if 'high_snr' not in self.search_index: self.search_index['high_snr'] = [] self.search_index['high_snr'].append(recording_id) def _update_statistics(self): """Update database statistics""" total_recordings = len(self.recordings) species_count = collections.Counter() modality_count = collections.Counter() quantum_count = 0 total_size = 0 for recording in self.recordings.values(): metadata = recording['metadata'] species_count[metadata['species']] += 1 modality_count[metadata['modality']] += 1 if metadata['quantum_enhanced']: quantum_count += 1 total_size += metadata['data_size'] self.stats_cache = { 'total_recordings': total_recordings, 'species_distribution': dict(species_count), 'modality_distribution': dict(modality_count), 'quantum_enhanced_count': quantum_count, 'total_data_size': total_size, 'last_updated': datetime.datetime.now().isoformat() } def search_recordings(self, species: Optional[str] = None, modality: Optional[str] = None, experiment_type: Optional[str] = None, researcher_id: Optional[str] = None, min_snr: Optional[float] = None, quantum_only: bool = False) -> List[str]: """Search recordings based on criteria""" candidate_ids = list(self.recordings.keys()) # Apply filters if species: species_ids = self.metadata_index.get(species, {}) if modality: candidate_ids = [rid for rid in candidate_ids if rid in species_ids.get(modality, [])] else: candidate_ids = [rid for rid in candidate_ids if any(rid in mod_ids for mod_ids in species_ids.values())] if experiment_type: candidate_ids = [rid for rid in candidate_ids if self.recordings[rid]['metadata']['experiment_type'] == experiment_type] if researcher_id: candidate_ids = [rid for rid in candidate_ids if self.recordings[rid]['metadata']['researcher_id'] == researcher_id] if min_snr: candidate_ids = [rid for rid in candidate_ids if self.recordings[rid]['quality_metrics']['signal_to_noise_ratio'] >= min_snr] if quantum_only: candidate_ids = [rid for rid in candidate_ids if self.recordings[rid]['metadata']['quantum_enhanced']] return candidate_ids def get_recording(self, recording_id: str) -> Optional[Dict[str, Any]]: """Retrieve a specific recording""" return self.recordings.get(recording_id) def get_cross_species_comparison(self, modality: str) -> Dict[str, Any]: """Generate cross-species comparison for a modality""" comparison = { 'modality': modality, 'timestamp': datetime.datetime.now().isoformat(), 'species_data': {} } for species in self.metadata_index.keys(): if modality in self.metadata_index[species]: species_recordings = self.metadata_index[species][modality] # Aggregate statistics for this species-modality combination total_snr = 0 total_duration = 0 quantum_count = 0 for recording_id in species_recordings: recording = self.recordings[recording_id] total_snr += recording['quality_metrics']['signal_to_noise_ratio'] total_duration += recording['metadata']['duration'] if recording['metadata']['quantum_enhanced']: quantum_count += 1 if species_recordings: comparison['species_data'][species] = { 'recording_count': len(species_recordings), 'average_snr': total_snr / len(species_recordings), 'total_duration': total_duration, 'quantum_enhanced_percentage': (quantum_count / len(species_recordings)) * 100 } return comparison def generate_database_report(self) -> Dict[str, Any]: """Generate comprehensive database report""" report = { 'report_timestamp': datetime.datetime.now().isoformat(), 'database_statistics': self.stats_cache, 'quality_metrics_summary': self._calculate_quality_summary(), 'usage_statistics': self._calculate_usage_statistics(), 'recommendations': self._generate_recommendations() } return report def _calculate_quality_summary(self) -> Dict[str, Any]: """Calculate overall quality metrics summary""" if not self.recordings: return {} all_snr = [] all_artifact = [] all_completeness = [] all_coherence = [] for recording in self.recordings.values(): metrics = recording['quality_metrics'] all_snr.append(metrics['signal_to_noise_ratio']) all_artifact.append(metrics['artifact_percentage']) all_completeness.append(metrics['data_completeness']) all_coherence.append(metrics['quantum_coherence']) def safe_avg(values): return sum(values) / len(values) if values else 0 def safe_std(values): if len(values) < 2: return 0 mean = safe_avg(values) variance = sum((x - mean) ** 2 for x in values) / len(values) return math.sqrt(variance) return { 'average_snr': safe_avg(all_snr), 'snr_std': safe_std(all_snr), 'average_artifact_percentage': safe_avg(all_artifact), 'average_data_completeness': safe_avg(all_completeness), 'average_quantum_coherence': safe_avg(all_coherence), 'high_quality_recordings': sum(1 for snr in all_snr if snr > 15), 'low_quality_recordings': sum(1 for snr in all_snr if snr < 5) } def _calculate_usage_statistics(self) -> Dict[str, Any]: """Calculate usage statistics""" researcher_counts = collections.Counter() experiment_counts = collections.Counter() temporal_distribution = collections.Counter() for recording in self.recordings.values(): metadata = recording['metadata'] researcher_counts[metadata['researcher_id']] += 1 experiment_counts[metadata['experiment_type']] += 1 # Extract year from timestamp try: year = metadata['timestamp'][:4] temporal_distribution[year] += 1 except: pass return { 'top_researchers': dict(researcher_counts.most_common(5)), 'popular_experiment_types': dict(experiment_counts.most_common(5)), 'temporal_distribution': dict(temporal_distribution) } def _generate_recommendations(self) -> List[str]: """Generate database management recommendations""" recommendations = [] if not self.recordings: return ["Database is empty. Add recordings to get recommendations."] quality_summary = self._calculate_quality_summary() if quality_summary.get('average_snr', 0) < 10: recommendations.append("Consider improving recording setup to increase signal-to-noise ratio") if quality_summary.get('average_artifact_percentage', 0) > 20: recommendations.append("High artifact percentage detected. Review artifact rejection protocols") if quality_summary.get('average_quantum_coherence', 0) < 0.8: recommendations.append("Low quantum coherence detected. Check quantum enhancement systems") stats = self.stats_cache if stats.get('quantum_enhanced_count', 0) < stats.get('total_recordings', 1) * 0.5: recommendations.append("Consider increasing quantum-enhanced recordings for better data quality") return recommendations def export_data_summary(self, format_type: str = 'json') -> str: """Export data summary in specified format""" summary = { 'export_timestamp': datetime.datetime.now().isoformat(), 'database_stats': self.stats_cache, 'recordings_overview': {} } for recording_id, recording in self.recordings.items(): metadata = recording['metadata'] summary['recordings_overview'][recording_id] = { 'species': metadata['species'], 'modality': metadata['modality'], 'duration': metadata['duration'], 'sample_rate': metadata['sample_rate'], 'channels': metadata['channel_count'], 'snr': recording['quality_metrics']['signal_to_noise_ratio'], 'quantum_enhanced': metadata['quantum_enhanced'] } if format_type == 'json': return json.dumps(summary, indent=2) elif format_type == 'csv': csv_lines = ['recording_id,species,modality,duration,sample_rate,channels,snr,quantum_enhanced'] for recording_id, info in summary['recordings_overview'].items(): csv_lines.append(f"{recording_id},{info['species']},{info['modality']},{info['duration']},{info['sample_rate']},{info['channels']},{info['snr']},{info['quantum_enhanced']}") return '\n'.join(csv_lines) else: return json.dumps(summary, indent=2) def test_neural_database(): """Test the neural database system""" print("Testing Neural Database Management System...") # Initialize database db = NeuralDatabase() # Create test metadata metadata1 = RecordingMetadata( recording_id='', species='human', modality='eeg', timestamp=datetime.datetime.now().isoformat(), duration=60.0, sample_rate=1000, channel_count=32, data_size=1000000, experiment_type='cognitive_task', researcher_id='researcher_001', ethical_approval='IRB_2024_001', quantum_enhanced=True ) metadata2 = RecordingMetadata( recording_id='', species='mouse', modality='electrophysiology', timestamp=datetime.datetime.now().isoformat(), duration=30.0, sample_rate=30000, channel_count=64, data_size=2000000, experiment_type='behavioral_task', researcher_id='researcher_002', ethical_approval='IACUC_2024_001', quantum_enhanced=False ) # Create test quality metrics quality1 = DataQualityMetrics( signal_to_noise_ratio=15.5, artifact_percentage=5.2, data_completeness=98.5, noise_floor=0.3, impedance_stability=95.0, quantum_coherence=0.92 ) quality2 = DataQualityMetrics( signal_to_noise_ratio=12.3, artifact_percentage=8.7, data_completeness=96.2, noise_floor=1.2, impedance_stability=88.5, quantum_coherence=0.78 ) # Create test data test_data1 = [{'timestamp': i/1000, 'channel_1': math.sin(2*math.pi*10*i/1000)} for i in range(1000)] test_data2 = [{'timestamp': i/30000, 'channel_1': math.sin(2*math.pi*20*i/30000)} for i in range(1000)] # Add recordings print("\n1. Adding recordings to database...") recording_id1 = db.add_recording(metadata1, test_data1, quality1) recording_id2 = db.add_recording(metadata2, test_data2, quality2) print(f"Added recording 1: {recording_id1}") print(f"Added recording 2: {recording_id2}") # Test search functionality print("\n2. Testing search functionality...") human_recordings = db.search_recordings(species='human') print(f"Human recordings: {human_recordings}") high_snr_recordings = db.search_recordings(min_snr=15.0) print(f"High SNR recordings: {high_snr_recordings}") quantum_recordings = db.search_recordings(quantum_only=True) print(f"Quantum-enhanced recordings: {quantum_recordings}") # Test cross-species comparison print("\n3. Testing cross-species comparison...") eeg_comparison = db.get_cross_species_comparison('eeg') print(f"EEG cross-species data: {eeg_comparison}") # Test database statistics print("\n4. Testing database statistics...") db_stats = db.stats_cache print(f"Total recordings: {db_stats['total_recordings']}") print(f"Species distribution: {db_stats['species_distribution']}") print(f"Modality distribution: {db_stats['modality_distribution']}") # Test database report print("\n5. Testing database report...") report = db.generate_database_report() print(f"Quality summary keys: {list(report['quality_metrics_summary'].keys())}") print(f"Recommendations: {report['recommendations']}") # Test data export print("\n6. Testing data export...") json_export = db.export_data_summary('json') print(f"JSON export length: {len(json_export)} characters") csv_export = db.export_data_summary('csv') print(f"CSV export preview:\n{csv_export[:200]}...") return True if __name__ == "__main__": test_neural_database()