#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Validation Framework
Provides CSV validation, schema checking, and input sanitization
"""

import os
import csv
import logging
from typing import List, Dict, Any, Optional, Callable
from pathlib import Path


class ValidationError(Exception):
    """Custom exception for validation errors."""
    pass


class CSVValidator:
    """Validates CSV files against schemas."""
    
    def __init__(self, logger: Optional[logging.Logger] = None):
        """Initialize CSV validator.
        
        Args:
            logger: Optional logger instance
        """
        self.logger = logger or logging.getLogger(__name__)
    
    def validate_file_exists(self, file_path: str) -> bool:
        """Validate that CSV file exists.
        
        Args:
            file_path: Path to CSV file
            
        Returns:
            True if file exists, raises ValidationError otherwise
            
        Raises:
            ValidationError: If file doesn't exist
        """
        if not os.path.exists(file_path):
            raise ValidationError(f"CSV file not found: {file_path}")
        return True
    
    def validate_file_extension(self, file_path: str) -> bool:
        """Validate that file has .csv extension.
        
        Args:
            file_path: Path to CSV file
            
        Returns:
            True if valid extension, raises ValidationError otherwise
            
        Raises:
            ValidationError: If file doesn't have .csv extension
        """
        if not file_path.lower().endswith('.csv'):
            raise ValidationError(f"File must have .csv extension: {file_path}")
        return True
    
    def validate_file_readable(self, file_path: str) -> bool:
        """Validate that CSV file is readable.
        
        Args:
            file_path: Path to CSV file
            
        Returns:
            True if readable, raises ValidationError otherwise
            
        Raises:
            ValidationError: If file is not readable
        """
        if not os.access(file_path, os.R_OK):
            raise ValidationError(f"CSV file is not readable: {file_path}")
        return True
    
    def validate_csv_structure(self, file_path: str, required_columns: Optional[List[str]] = None) -> Dict[str, Any]:
        """Validate CSV file structure.
        
        Args:
            file_path: Path to CSV file
            required_columns: List of required column names (optional)
            
        Returns:
            Dictionary with validation results:
            - valid: bool
            - columns: list of column names
            - row_count: number of data rows
            - errors: list of error messages
            
        Raises:
            ValidationError: If validation fails
        """
        errors = []
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                # Try to detect delimiter
                sample = f.read(1024)
                f.seek(0)
                sniffer = csv.Sniffer()
                delimiter = sniffer.sniff(sample).delimiter
                
                reader = csv.DictReader(f, delimiter=delimiter)
                columns = reader.fieldnames or []
                rows = list(reader)
                
                # Check for required columns
                if required_columns:
                    missing_columns = set(required_columns) - set(columns)
                    if missing_columns:
                        errors.append(f"Missing required columns: {', '.join(missing_columns)}")
                
                # Check for empty file
                if not rows:
                    errors.append("CSV file is empty (no data rows)")
                
                # Check for empty rows
                empty_rows = [i for i, row in enumerate(rows, start=2) if not any(row.values())]
                if empty_rows:
                    errors.append(f"Found {len(empty_rows)} empty rows at lines: {', '.join(map(str, empty_rows[:10]))}")
                
                if errors:
                    raise ValidationError(f"CSV validation failed: {'; '.join(errors)}")
                
                return {
                    'valid': True,
                    'columns': columns,
                    'row_count': len(rows),
                    'errors': []
                }
                
        except csv.Error as e:
            raise ValidationError(f"CSV parsing error: {e}")
        except Exception as e:
            raise ValidationError(f"Error validating CSV: {e}")
    
    def validate_row_data(self, row: Dict[str, str], required_fields: List[str], 
                         field_validators: Optional[Dict[str, Callable]] = None) -> List[str]:
        """Validate a single CSV row.
        
        Args:
            row: Dictionary representing a CSV row
            required_fields: List of required field names
            field_validators: Optional dictionary mapping field names to validation functions
            
        Returns:
            List of error messages (empty if valid)
        """
        errors = []
        
        # Check required fields
        for field in required_fields:
            value = row.get(field, '').strip()
            if not value:
                errors.append(f"Missing required field: {field}")
        
        # Apply custom validators
        if field_validators:
            for field, validator in field_validators.items():
                if field in row:
                    try:
                        validator(row[field])
                    except Exception as e:
                        errors.append(f"Validation failed for field '{field}': {e}")
        
        return errors
    
    def validate_complete(self, file_path: str, required_columns: Optional[List[str]] = None,
                        required_fields: Optional[List[str]] = None,
                        field_validators: Optional[Dict[str, Callable]] = None) -> Dict[str, Any]:
        """Perform complete CSV validation.
        
        Args:
            file_path: Path to CSV file
            required_columns: List of required column names
            required_fields: List of required field names per row
            field_validators: Optional dictionary mapping field names to validation functions
            
        Returns:
            Dictionary with validation results
        """
        # File existence and extension
        self.validate_file_exists(file_path)
        self.validate_file_extension(file_path)
        self.validate_file_readable(file_path)
        
        # Structure validation
        structure_result = self.validate_csv_structure(file_path, required_columns)
        
        # Row-level validation
        row_errors = []
        if required_fields or field_validators:
            with open(file_path, 'r', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                for row_num, row in enumerate(reader, start=2):
                    errors = self.validate_row_data(row, required_fields or [], field_validators)
                    if errors:
                        row_errors.append({
                            'row': row_num,
                            'errors': errors
                        })
        
        result = {
            'valid': len(row_errors) == 0,
            'columns': structure_result['columns'],
            'row_count': structure_result['row_count'],
            'row_errors': row_errors,
            'errors': structure_result['errors']
        }
        
        if not result['valid']:
            error_messages = result['errors'].copy()
            for row_error in row_errors:
                error_messages.append(f"Row {row_error['row']}: {', '.join(row_error['errors'])}")
            raise ValidationError(f"CSV validation failed: {'; '.join(error_messages)}")
        
        return result


class InputSanitizer:
    """Sanitizes and normalizes input data."""
    
    @staticmethod
    def sanitize_string(value: str, max_length: Optional[int] = None, 
                       strip_whitespace: bool = True) -> str:
        """Sanitize a string value.
        
        Args:
            value: String value to sanitize
            max_length: Optional maximum length
            strip_whitespace: Whether to strip whitespace
            
        Returns:
            Sanitized string
        """
        if not isinstance(value, str):
            value = str(value)
        
        if strip_whitespace:
            value = value.strip()
        
        if max_length and len(value) > max_length:
            value = value[:max_length]
        
        return value
    
    @staticmethod
    def sanitize_number(value: Any, min_value: Optional[float] = None,
                       max_value: Optional[float] = None) -> float:
        """Sanitize a number value.
        
        Args:
            value: Number value to sanitize
            min_value: Optional minimum value
            max_value: Optional maximum value
            
        Returns:
            Sanitized number
            
        Raises:
            ValueError: If value cannot be converted to number or is out of range
        """
        try:
            num = float(value)
        except (ValueError, TypeError):
            raise ValueError(f"Cannot convert to number: {value}")
        
        if min_value is not None and num < min_value:
            raise ValueError(f"Value {num} is below minimum {min_value}")
        
        if max_value is not None and num > max_value:
            raise ValueError(f"Value {num} is above maximum {max_value}")
        
        return num
    
    @staticmethod
    def sanitize_path(value: str, must_exist: bool = False, 
                     must_be_file: bool = False, must_be_dir: bool = False) -> str:
        """Sanitize a file path.
        
        Args:
            value: Path to sanitize
            must_exist: Whether path must exist
            must_be_file: Whether path must be a file
            must_be_dir: Whether path must be a directory
            
        Returns:
            Sanitized absolute path
            
        Raises:
            ValueError: If path validation fails
        """
        path = Path(value).resolve()
        
        if must_exist and not path.exists():
            raise ValueError(f"Path does not exist: {path}")
        
        if must_be_file and not path.is_file():
            raise ValueError(f"Path is not a file: {path}")
        
        if must_be_dir and not path.is_dir():
            raise ValueError(f"Path is not a directory: {path}")
        
        return str(path)


def create_field_validator(field_type: str, **kwargs) -> Callable:
    """Create a field validator function.
    
    Args:
        field_type: Type of validator ('required', 'numeric', 'email', 'url', 'length')
        **kwargs: Additional parameters for validator
        
    Returns:
        Validator function
    """
    def required_validator(value: str):
        if not value or not value.strip():
            raise ValueError("Field is required")
    
    def numeric_validator(value: str):
        try:
            float(value)
        except (ValueError, TypeError):
            raise ValueError("Field must be numeric")
    
    def email_validator(value: str):
        import re
        pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
        if not re.match(pattern, value):
            raise ValueError("Field must be a valid email address")
    
    def url_validator(value: str):
        import re
        pattern = r'^https?://.+'
        if not re.match(pattern, value):
            raise ValueError("Field must be a valid URL")
    
    def length_validator(value: str):
        min_len = kwargs.get('min_length', 0)
        max_len = kwargs.get('max_length', None)
        if len(value) < min_len:
            raise ValueError(f"Field must be at least {min_len} characters")
        if max_len and len(value) > max_len:
            raise ValueError(f"Field must be at most {max_len} characters")
    
    validators = {
        'required': required_validator,
        'numeric': numeric_validator,
        'email': email_validator,
        'url': url_validator,
        'length': length_validator
    }
    
    if field_type not in validators:
        raise ValueError(f"Unknown validator type: {field_type}")
    
    return validators[field_type]

