diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..575ccb2c6f36a774d317fede5400f1c69beaefae --- /dev/null +++ b/.dockerignore @@ -0,0 +1,56 @@ +# Git +.git +.gitignore +.github + +# Python +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +*.so +*.egg +*.egg-info +dist +build +.pytest_cache +.coverage +htmlcov +.mypy_cache + +# Environment +.env +.venv +venv +env + +# IDE +.vscode +.idea +*.swp +*.swo +*~ + +# Documentation (large files) +*.md +!README.md +FASE_*.md + +# Tests +tests/ +test_*.py + +# Logs +logs/ +*.log + +# Temporary +tmp/ +temp/ +.DS_Store + +# Old versions +*_backup.py +*_old.py +*.old diff --git a/Dockerfile.spaces b/Dockerfile.spaces new file mode 100644 index 0000000000000000000000000000000000000000..7aae758eab7761ada6230fe7e319c9a998cafdbc --- /dev/null +++ b/Dockerfile.spaces @@ -0,0 +1,53 @@ +# Dockerfile optimizado para HuggingFace Spaces +# Using new v3 architecture + +FROM python:3.10-slim + +# Environment variables +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PORT=7860 \ + MODEL_ID=amazon/chronos-2 \ + DEVICE_MAP=cpu + +# Working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# Copy application code (NEW v3 architecture) +COPY app/ ./app/ + +# Copy static files (Excel Add-in) +COPY static/ ./static/ + +# Copy docs (optional but good to have) +COPY docs/ ./docs/ 2>/dev/null || true + +# Create non-root user +RUN useradd -m -u 1000 user && \ + chown -R user:user /app + +USER user + +# Expose port (HF Spaces uses 7860) +EXPOSE 7860 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=90s --retries=3 \ + CMD curl -f http://localhost:7860/health || exit 1 + +# Start command - USING NEW MAIN_V3 +CMD ["uvicorn", "app.main_v3:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"] diff --git a/README.md b/README.md index e73294dffcb513aaf39bcfd1a8bec460a4586677..c86042d746a0f5ff9dcc391689222922fa5471eb 100644 --- a/README.md +++ b/README.md @@ -1,195 +1,74 @@ --- -title: Chronos2 Excel Forecasting API +title: Chronos2 Forecasting API emoji: 📊 colorFrom: blue colorTo: green sdk: docker +app_file: Dockerfile.spaces app_port: 7860 -pinned: false -license: mit --- -# 📊 Chronos2 Excel Forecasting API +# Chronos2 Excel Forecasting API -API de pronósticos con IA para Microsoft Excel usando [Amazon Chronos-2](https://huggingface.co/amazon/chronos-t5-large). +Time series forecasting API powered by Amazon Chronos-2 model with Excel Add-in support. -🔗 **Úsalo directamente desde Excel** con nuestro Office Add-in +## Features -## 🚀 Características +- ✅ **Univariate & Multivariate Forecasting** - Multiple time series support +- ✅ **Anomaly Detection** - Detect outliers in your data +- ✅ **Backtesting** - Validate forecast accuracy +- ✅ **Excel Add-in** - Direct integration with Microsoft Excel +- ✅ **Interactive Charts** - Visualize forecasts and anomalies +- ✅ **REST API** - Easy integration with any platform -- ✅ **Pronósticos univariados**: Series temporales simples -- ✅ **Detección de anomalías**: Identifica valores atípicos automáticamente -- ✅ **Backtesting**: Valida la precisión de tus modelos -- ✅ **API REST con FastAPI**: Fácil integración -- ✅ **Documentación interactiva**: Swagger UI incluido +## Quick Start -## 📖 Documentación +### API Endpoints -Accede a la documentación interactiva: -- **Swagger UI**: `/docs` -- **ReDoc**: `/redoc` -- **Health Check**: `/health` +- **Health Check**: `GET /health` +- **Documentation**: `GET /docs` +- **Univariate Forecast**: `POST /forecast/univariate` +- **Multivariate Forecast**: `POST /forecast/multivariate` +- **Anomaly Detection**: `POST /forecast/anomaly` +- **Backtesting**: `POST /forecast/backtest` -## 🧪 Prueba Rápida +### Excel Add-in -### Pronóstico Simple +Load the add-in in Excel: +1. Insert → Add-ins → Upload My Add-in +2. Paste URL: `https://ttzzs-chronos2-excel-forecasting-api.hf.space/manifest.xml` -```bash -curl -X POST https://YOUR-USERNAME-chronos2-excel-forecasting-api.hf.space/forecast_univariate \ - -H "Content-Type: application/json" \ - -d '{ - "series": {"values": [100, 102, 105, 103, 108, 112, 115]}, - "prediction_length": 3, - "freq": "D" - }' -``` - -**Respuesta esperada:** -```json -{ - "timestamps": ["t+1", "t+2", "t+3"], - "median": [117.5, 119.2, 121.0], - "quantiles": { - "0.1": [112.3, 113.8, 115.5], - "0.5": [117.5, 119.2, 121.0], - "0.9": [122.7, 124.6, 126.5] - } -} -``` - -### Detección de Anomalías +### Example API Call ```bash -curl -X POST https://YOUR-USERNAME-chronos2-excel-forecasting-api.hf.space/detect_anomalies \ +curl -X POST https://ttzzs-chronos2-excel-forecasting-api.hf.space/forecast/univariate \ -H "Content-Type: application/json" \ -d '{ - "context": {"values": [100, 102, 105, 103, 108]}, - "recent_observed": [107, 200, 106], - "prediction_length": 3 - }' -``` - -### Backtesting - -```bash -curl -X POST https://YOUR-USERNAME-chronos2-excel-forecasting-api.hf.space/backtest_simple \ - -H "Content-Type: application/json" \ - -d '{ - "series": {"values": [100, 102, 105, 103, 108, 112, 115, 118, 120, 122, 125, 128]}, - "prediction_length": 7, - "test_length": 4 + "values": [100, 102, 105, 108, 110], + "prediction_length": 3, + "model_id": "amazon/chronos-2" }' ``` -## 🔗 Endpoints Disponibles - -| Endpoint | Método | Descripción | -|----------|--------|-------------| -| `/` | GET | Información de la API | -| `/health` | GET | Health check del servicio | -| `/docs` | GET | Documentación Swagger | -| `/forecast_univariate` | POST | Pronóstico de serie simple | -| `/detect_anomalies` | POST | Detectar valores atípicos | -| `/backtest_simple` | POST | Validar precisión del modelo | -| `/simple_forecast` | POST | Pronóstico rápido (testing) | - -## 💻 Uso con Excel - -Este API funciona perfectamente con nuestro **Office Add-in para Excel**: - -1. Descarga el Add-in desde [GitHub](https://github.com/tu-usuario/chronos2-server) -2. Configura la URL de este Space en el Add-in -3. ¡Realiza pronósticos directamente desde tus hojas de cálculo! - -### Ejemplo en Excel - -```javascript -// En el Excel Add-in, configura: -const API_BASE_URL = 'https://YOUR-USERNAME-chronos2-excel-forecasting-api.hf.space'; -``` - -## 🛠️ Tecnologías +## Architecture -- **Modelo**: [Amazon Chronos-2 T5-Large](https://huggingface.co/amazon/chronos-t5-large) -- **Framework**: [FastAPI](https://fastapi.tiangolo.com/) -- **Inference**: [Hugging Face Inference API](https://huggingface.co/docs/api-inference) -- **Deployment**: Hugging Face Spaces (Docker) +Built with Clean Architecture principles: +- **Domain Layer** - Business logic and entities +- **Application Layer** - Use cases and services +- **Infrastructure Layer** - External dependencies (ML models, storage) +- **API Layer** - FastAPI routes and DTOs -## 📊 Casos de Uso +## Technology Stack -- 📈 **Ventas**: Predice demanda futura de productos -- 💰 **Finanzas**: Proyecta ingresos y gastos -- 📦 **Inventario**: Optimiza stock y reposición -- 🌡️ **Sensores**: Anticipa valores de sensores IoT -- 🏪 **Retail**: Planifica recursos y personal +- **Framework**: FastAPI 0.115.5 +- **ML Model**: Amazon Chronos-2 (Transformer-based forecasting) +- **Python**: 3.10+ +- **Docker**: Optimized multi-stage build -## ⚙️ Configuración +## License -### Variables de Entorno - -Para desplegar tu propia instancia, configura: - -- `HF_TOKEN`: Tu token de Hugging Face (requerido) -- `CHRONOS_MODEL_ID`: ID del modelo (default: `amazon/chronos-t5-large`) -- `PORT`: Puerto del servidor (default: `7860`) - -### Crear tu propio Space - -1. Fork este repositorio -2. Crea un nuevo Space en Hugging Face -3. Selecciona **Docker** como SDK -4. Conecta tu repositorio -5. Configura `HF_TOKEN` en los Secrets del Space -6. ¡Listo! - -## 🔒 Seguridad - -- ✅ CORS configurado para orígenes permitidos -- ✅ Validación de entrada con Pydantic -- ✅ Rate limiting en HuggingFace Inference API -- ✅ Timeouts configurados para evitar bloqueos - -## 📚 Recursos - -- [Documentación de Chronos-2](https://huggingface.co/amazon/chronos-t5-large) -- [API de HuggingFace Inference](https://huggingface.co/docs/api-inference) -- [FastAPI Docs](https://fastapi.tiangolo.com/) -- [Tutorial de Office Add-ins](https://docs.microsoft.com/en-us/office/dev/add-ins/) - -## 🐛 Solución de Problemas - -### "Model is loading" - -La primera request puede tardar 30-60 segundos mientras el modelo se carga. Reintenta después. - -### "HF_TOKEN not configured" - -Asegúrate de configurar `HF_TOKEN` en los Secrets de tu Space. - -### Errores de timeout - -El modelo puede estar frío. Espera unos segundos y reintenta. - -## 📝 Licencia - -MIT License - Ver [LICENSE](LICENSE) para más detalles. - -## 🤝 Contribuir - -¿Quieres mejorar este proyecto? - -1. Fork el repositorio -2. Crea una branch para tu feature (`git checkout -b feature/amazing`) -3. Commit tus cambios (`git commit -m 'Add amazing feature'`) -4. Push a la branch (`git push origin feature/amazing`) -5. Abre un Pull Request - -## 📧 Contacto - -¿Preguntas o sugerencias? Abre un [issue en GitHub](https://github.com/tu-usuario/chronos2-server/issues). - ---- +MIT License -**Desarrollado con ❤️ usando [Chronos-2](https://huggingface.co/amazon/chronos-t5-large) y [FastAPI](https://fastapi.tiangolo.com/)** +## Support -🌟 Si te gusta este proyecto, ¡dale una estrella en [GitHub](https://github.com/tu-usuario/chronos2-server)! +For issues and questions, visit the [GitHub repository](https://github.com/vargasjosej/aprender_ai/tree/refactor/solid-architecture/chronos2-server). diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/api/dependencies.py b/app/api/dependencies.py new file mode 100644 index 0000000000000000000000000000000000000000..1a6c5e47a60a23064469cf63b9a900bc821939d2 --- /dev/null +++ b/app/api/dependencies.py @@ -0,0 +1,194 @@ +""" +Dependency Injection para FastAPI. + +Provee instancias de servicios, repositorios y casos de uso +usando el sistema de DI de FastAPI. +""" + +from functools import lru_cache +from fastapi import Depends + +# Infrastructure +from app.infrastructure.ml.model_factory import ModelFactory +from app.infrastructure.config.settings import get_settings + +# Domain +from app.domain.interfaces.forecast_model import IForecastModel +from app.domain.interfaces.data_transformer import IDataTransformer +from app.domain.services.forecast_service import ForecastService +from app.domain.services.anomaly_service import AnomalyService + +# Application +from app.application.use_cases.forecast_use_case import ( + ForecastUnivariateUseCase, + ForecastMultiSeriesUseCase +) +from app.application.use_cases.anomaly_use_case import DetectAnomaliesUseCase +from app.application.use_cases.backtest_use_case import BacktestUseCase + +# Utils +from app.utils.dataframe_builder import DataFrameBuilder +from app.utils.logger import setup_logger + +# Get settings instance +settings = get_settings() + +logger = setup_logger(__name__) + +# ============================================================================ +# Infrastructure Layer Dependencies +# ============================================================================ + +# Singleton para el modelo de forecasting +_model_instance: IForecastModel = None + + +def get_forecast_model() -> IForecastModel: + """ + Dependency: Modelo de forecasting (Singleton). + + Usa Chronos-2 por defecto. El modelo se carga una sola vez + y se reutiliza en todas las requests. + + Returns: + IForecastModel: Instancia del modelo + """ + global _model_instance + + if _model_instance is None: + logger.info("Initializing forecast model (first time)") + _model_instance = ModelFactory.create( + model_type="chronos2", + model_id=settings.model_id, + device_map=settings.device_map + ) + logger.info(f"Model loaded: {_model_instance.get_model_info()}") + + return _model_instance + + +def get_data_transformer() -> IDataTransformer: + """ + Dependency: Transformador de datos. + + Returns: + IDataTransformer: Instancia del transformador + """ + return DataFrameBuilder() + + +# ============================================================================ +# Domain Layer Dependencies +# ============================================================================ + +def get_forecast_service( + model: IForecastModel = Depends(get_forecast_model), + transformer: IDataTransformer = Depends(get_data_transformer) +) -> ForecastService: + """ + Dependency: Servicio de dominio para forecasting. + + Args: + model: Modelo de forecasting + transformer: Transformador de datos + + Returns: + ForecastService: Servicio de forecasting + """ + return ForecastService(model=model, transformer=transformer) + + +def get_anomaly_service( + model: IForecastModel = Depends(get_forecast_model), + transformer: IDataTransformer = Depends(get_data_transformer) +) -> AnomalyService: + """ + Dependency: Servicio de dominio para detección de anomalías. + + Args: + model: Modelo de forecasting + transformer: Transformador de datos + + Returns: + AnomalyService: Servicio de anomalías + """ + return AnomalyService(model=model, transformer=transformer) + + +# ============================================================================ +# Application Layer Dependencies (Use Cases) +# ============================================================================ + +def get_forecast_univariate_use_case( + service: ForecastService = Depends(get_forecast_service) +) -> ForecastUnivariateUseCase: + """ + Dependency: Caso de uso de pronóstico univariado. + + Args: + service: Servicio de forecasting + + Returns: + ForecastUnivariateUseCase: Caso de uso + """ + return ForecastUnivariateUseCase(forecast_service=service) + + +def get_forecast_multi_series_use_case( + service: ForecastService = Depends(get_forecast_service) +) -> ForecastMultiSeriesUseCase: + """ + Dependency: Caso de uso de pronóstico multi-series. + + Args: + service: Servicio de forecasting + + Returns: + ForecastMultiSeriesUseCase: Caso de uso + """ + return ForecastMultiSeriesUseCase(forecast_service=service) + + +def get_detect_anomalies_use_case( + service: AnomalyService = Depends(get_anomaly_service) +) -> DetectAnomaliesUseCase: + """ + Dependency: Caso de uso de detección de anomalías. + + Args: + service: Servicio de anomalías + + Returns: + DetectAnomaliesUseCase: Caso de uso + """ + return DetectAnomaliesUseCase(anomaly_service=service) + + +def get_backtest_use_case( + service: ForecastService = Depends(get_forecast_service) +) -> BacktestUseCase: + """ + Dependency: Caso de uso de backtesting. + + Args: + service: Servicio de forecasting + + Returns: + BacktestUseCase: Caso de uso + """ + return BacktestUseCase(forecast_service=service) + + +# ============================================================================ +# Utility Functions +# ============================================================================ + +def reset_model(): + """ + Resetea el modelo (útil para testing). + + ADVERTENCIA: Solo usar en tests, no en producción. + """ + global _model_instance + _model_instance = None + logger.warning("Model instance reset") diff --git a/app/api/middleware/__init__.py b/app/api/middleware/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/api/routes/__init__.py b/app/api/routes/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..097a783867d5248c7b21eaa4628d316f2727e072 --- /dev/null +++ b/app/api/routes/__init__.py @@ -0,0 +1,17 @@ +""" +API Routes package. + +Contiene todos los endpoints de la API organizados por funcionalidad. +""" + +from .health import router as health_router +from .forecast import router as forecast_router +from .anomaly import router as anomaly_router +from .backtest import router as backtest_router + +__all__ = [ + "health_router", + "forecast_router", + "anomaly_router", + "backtest_router" +] diff --git a/app/api/routes/anomaly.py b/app/api/routes/anomaly.py new file mode 100644 index 0000000000000000000000000000000000000000..0720fe68254020be5661ad0267b60e5258e3fe2b --- /dev/null +++ b/app/api/routes/anomaly.py @@ -0,0 +1,85 @@ +""" +Anomaly detection API endpoints. + +Responsabilidad: Manejar requests de detección de anomalías. +""" + +from fastapi import APIRouter, Depends, HTTPException, status + +from app.api.dependencies import get_detect_anomalies_use_case +from app.application.use_cases.anomaly_use_case import DetectAnomaliesUseCase +from app.application.dtos.anomaly_dtos import ( + DetectAnomaliesRequestDTO, + DetectAnomaliesResponseDTO +) +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + +router = APIRouter(prefix="/anomaly", tags=["Anomaly Detection"]) + + +@router.post( + "/detect", + response_model=DetectAnomaliesResponseDTO, + status_code=status.HTTP_200_OK, + summary="Detectar anomalías", + description="Detecta anomalías comparando valores observados con pronóstico" +) +async def detect_anomalies( + request: DetectAnomaliesRequestDTO, + use_case: DetectAnomaliesUseCase = Depends(get_detect_anomalies_use_case) +): + """ + Detecta anomalías en serie temporal. + + Compara valores observados recientes con pronóstico basado + en contexto histórico. Marca como anomalías los valores que + caen fuera de intervalos de confianza. + + Args: + request: Contexto histórico y valores recientes a evaluar + use_case: Caso de uso inyectado + + Returns: + Lista de puntos con indicador de anomalía + + Example: + ```json + { + "context_values": [100, 102, 105, 103, 108], + "recent_observed": [112, 150, 115], + "quantile_low": 0.05, + "quantile_high": 0.95, + "freq": "D" + } + ``` + """ + try: + logger.info( + f"Anomaly detection request: {len(request.context_values)} context, " + f"{len(request.recent_observed)} to evaluate" + ) + + # Ejecutar use case + response = use_case.execute(request) + + num_anomalies = sum(1 for p in response.anomaly_points if p.is_anomaly) + logger.info( + f"Anomaly detection completed: {num_anomalies} anomalies found" + ) + + return response + + except ValueError as e: + logger.error(f"Validation error: {e}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) + except Exception as e: + logger.error(f"Unexpected error in anomaly detection: {e}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Error interno en detección de anomalías" + ) diff --git a/app/api/routes/backtest.py b/app/api/routes/backtest.py new file mode 100644 index 0000000000000000000000000000000000000000..9b9e2d1a125cb243c72667a7ccbda3348e30adcc --- /dev/null +++ b/app/api/routes/backtest.py @@ -0,0 +1,83 @@ +""" +Backtesting API endpoints. + +Responsabilidad: Manejar requests de backtesting (evaluación de modelos). +""" + +from fastapi import APIRouter, Depends, HTTPException, status + +from app.api.dependencies import get_backtest_use_case +from app.application.use_cases.backtest_use_case import BacktestUseCase +from app.application.dtos.backtest_dtos import ( + BacktestRequestDTO, + BacktestResponseDTO +) +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + +router = APIRouter(prefix="/backtest", tags=["Backtesting"]) + + +@router.post( + "/simple", + response_model=BacktestResponseDTO, + status_code=status.HTTP_200_OK, + summary="Backtesting simple", + description="Evalúa pronóstico comparando con valores reales" +) +async def backtest_simple( + request: BacktestRequestDTO, + use_case: BacktestUseCase = Depends(get_backtest_use_case) +): + """ + Backtesting simple (hold-out). + + Divide la serie en train/test, genera pronóstico con train, + y compara con test para calcular métricas de error. + + Args: + request: Serie completa y parámetros de backtesting + use_case: Caso de uso inyectado + + Returns: + Métricas de error (MAE, MAPE, RMSE) y comparación forecast vs actual + + Example: + ```json + { + "full_series": [100, 102, 105, 103, 108, 112, 115, 118], + "test_size": 3, + "freq": "D", + "quantile_levels": [0.1, 0.5, 0.9] + } + ``` + """ + try: + logger.info( + f"Backtest request: {len(request.full_series)} values, " + f"test_size={request.test_size}" + ) + + # Ejecutar use case + response = use_case.execute(request) + + logger.info( + f"Backtest completed: MAE={response.metrics.mae:.2f}, " + f"MAPE={response.metrics.mape:.2f}%" + ) + + return response + + except ValueError as e: + logger.error(f"Validation error: {e}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) + except Exception as e: + logger.error(f"Unexpected error in backtest: {e}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Error interno en backtesting" + ) diff --git a/app/api/routes/forecast.py b/app/api/routes/forecast.py new file mode 100644 index 0000000000000000000000000000000000000000..c8ebfeb72106db929de5fae9c3f8ea8c61c792e8 --- /dev/null +++ b/app/api/routes/forecast.py @@ -0,0 +1,158 @@ +""" +Forecast API endpoints. + +Responsabilidad: Manejar requests de forecasting y delegar a use cases. +""" + +from fastapi import APIRouter, Depends, HTTPException, status +from typing import List + +from app.api.dependencies import ( + get_forecast_univariate_use_case, + get_forecast_multi_series_use_case +) +from app.application.use_cases.forecast_use_case import ( + ForecastUnivariateUseCase, + ForecastMultiSeriesUseCase +) +from app.application.dtos.forecast_dtos import ( + ForecastUnivariateRequestDTO, + ForecastUnivariateResponseDTO, + ForecastMultiSeriesRequestDTO, + ForecastMultiSeriesResponseDTO +) +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + +router = APIRouter(prefix="/forecast", tags=["Forecast"]) + + +@router.post( + "/univariate", + response_model=ForecastUnivariateResponseDTO, + status_code=status.HTTP_200_OK, + summary="Pronóstico univariado", + description="Genera pronóstico para una serie temporal sin covariables" +) +async def forecast_univariate( + request: ForecastUnivariateRequestDTO, + use_case: ForecastUnivariateUseCase = Depends(get_forecast_univariate_use_case) +): + """ + Pronóstico univariado. + + Genera pronóstico probabilístico para una serie temporal simple, + sin variables exógenas. + + Args: + request: Datos de la serie y parámetros de predicción + use_case: Caso de uso inyectado + + Returns: + Pronóstico con mediana y cuantiles + + Raises: + HTTPException: Si hay error en la predicción + + Example: + ```json + { + "values": [100, 102, 105, 103, 108, 112], + "prediction_length": 3, + "freq": "D", + "quantile_levels": [0.1, 0.5, 0.9] + } + ``` + """ + try: + logger.info( + f"Forecast univariate request: {len(request.values)} values, " + f"{request.prediction_length} steps ahead" + ) + + # Ejecutar use case + response = use_case.execute(request) + + logger.info(f"Forecast completed: {len(response.timestamps)} predictions") + return response + + except ValueError as e: + logger.error(f"Validation error: {e}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) + except Exception as e: + logger.error(f"Unexpected error in forecast: {e}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Error interno al generar pronóstico" + ) + + +@router.post( + "/multi-series", + response_model=ForecastMultiSeriesResponseDTO, + status_code=status.HTTP_200_OK, + summary="Pronóstico multi-series", + description="Genera pronósticos para múltiples series simultáneamente" +) +async def forecast_multi_series( + request: ForecastMultiSeriesRequestDTO, + use_case: ForecastMultiSeriesUseCase = Depends(get_forecast_multi_series_use_case) +): + """ + Pronóstico para múltiples series. + + Genera pronósticos independientes para varias series temporales + en una sola llamada. + + Args: + request: Lista de series y parámetros + use_case: Caso de uso inyectado + + Returns: + Lista de pronósticos, uno por cada serie + + Example: + ```json + { + "series_list": [ + {"series_id": "sales", "values": [100, 102, 105]}, + {"series_id": "revenue", "values": [200, 205, 210]} + ], + "prediction_length": 3, + "freq": "D" + } + ``` + """ + try: + logger.info( + f"Forecast multi-series request: {len(request.series_list)} series" + ) + + # Ejecutar use case + response = use_case.execute(request) + + logger.info( + f"Multi-series forecast completed: " + f"{len(response.forecasts)} forecasts" + ) + return response + + except ValueError as e: + logger.error(f"Validation error: {e}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) + except Exception as e: + logger.error( + f"Unexpected error in multi-series forecast: {e}", + exc_info=True + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Error interno al generar pronósticos" + ) diff --git a/app/api/routes/health.py b/app/api/routes/health.py new file mode 100644 index 0000000000000000000000000000000000000000..0fd9053a2e178b4b39ee6d4e6900a4f3ba46931d --- /dev/null +++ b/app/api/routes/health.py @@ -0,0 +1,87 @@ +""" +Health check and system info endpoints. + +Responsabilidad: Verificar el estado de la API y servicios. +""" + +from fastapi import APIRouter, Depends +from typing import Dict, Any + +from app.api.dependencies import get_forecast_model +from app.domain.interfaces.forecast_model import IForecastModel +from app.infrastructure.config.settings import get_settings +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) +settings = get_settings() + +router = APIRouter(prefix="/health", tags=["Health"]) + + +@router.get("", response_model=Dict[str, Any]) +async def health_check( + model: IForecastModel = Depends(get_forecast_model) +): + """ + Health check endpoint. + + Verifica que la API esté funcionando y el modelo esté cargado. + + Returns: + Estado de la API y información del modelo + """ + try: + model_info = model.get_model_info() + + return { + "status": "ok", + "version": settings.api_version, + "model": model_info, + "message": "Chronos-2 API is running" + } + except Exception as e: + logger.error(f"Health check failed: {e}") + return { + "status": "error", + "version": settings.api_version, + "error": str(e), + "message": "API is running but model is not available" + } + + +@router.get("/info", response_model=Dict[str, Any]) +async def system_info(): + """ + System information endpoint. + + Returns: + Información sobre la arquitectura y configuración + """ + return { + "api": { + "title": settings.api_title, + "version": settings.api_version, + "description": settings.api_description + }, + "architecture": { + "style": "Clean Architecture", + "principles": "SOLID", + "layers": [ + "Presentation (API)", + "Application (Use Cases)", + "Domain (Business Logic)", + "Infrastructure (External Services)" + ] + }, + "model": { + "id": settings.model_id, + "device": settings.device_map + }, + "endpoints": { + "docs": "/docs", + "health": "/health", + "forecast": "/forecast", + "anomaly": "/anomaly", + "backtest": "/backtest" + } + } diff --git a/app/application/__init__.py b/app/application/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..77b18d7d6902f63c0787b4573ce8431e7369b36c --- /dev/null +++ b/app/application/__init__.py @@ -0,0 +1,7 @@ +""" +Application Layer - Use Cases y DTOs. + +Esta capa contiene la lógica de aplicación (casos de uso) que orquestan +las entidades y servicios del dominio para cumplir con los requisitos +de la aplicación. +""" diff --git a/app/application/dtos/__init__.py b/app/application/dtos/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..419cc3b4f2631cbee7240388b5fe2eba1f13a19f --- /dev/null +++ b/app/application/dtos/__init__.py @@ -0,0 +1,36 @@ +""" +Data Transfer Objects (DTOs). + +DTOs para transferir datos entre capas, evitando acoplamiento +entre la capa de presentación y el dominio. +""" + +from .forecast_dtos import ( + ForecastInputDTO, + ForecastOutputDTO, + MultiForecastInputDTO, + MultiForecastOutputDTO +) +from .anomaly_dtos import ( + AnomalyDetectionInputDTO, + AnomalyDetectionOutputDTO, + AnomalyPointDTO +) +from .backtest_dtos import ( + BacktestInputDTO, + BacktestOutputDTO, + BacktestMetricsDTO +) + +__all__ = [ + "ForecastInputDTO", + "ForecastOutputDTO", + "MultiForecastInputDTO", + "MultiForecastOutputDTO", + "AnomalyDetectionInputDTO", + "AnomalyDetectionOutputDTO", + "AnomalyPointDTO", + "BacktestInputDTO", + "BacktestOutputDTO", + "BacktestMetricsDTO", +] diff --git a/app/application/dtos/anomaly_dtos.py b/app/application/dtos/anomaly_dtos.py new file mode 100644 index 0000000000000000000000000000000000000000..9d71faec94940394cd2bfd1bd105029182e6c52e --- /dev/null +++ b/app/application/dtos/anomaly_dtos.py @@ -0,0 +1,86 @@ +""" +DTOs para casos de uso de Detección de Anomalías. +""" + +from dataclasses import dataclass +from typing import List, Optional, Dict + + +@dataclass +class AnomalyPointDTO: + """DTO para un punto de anomalía.""" + + index: int + value: float + expected: float + lower_bound: float + upper_bound: float + is_anomaly: bool + z_score: float = 0.0 + severity: str = "normal" # normal, low, medium, high + + def to_dict(self) -> Dict: + """Convierte a diccionario.""" + return { + "index": self.index, + "value": self.value, + "expected": self.expected, + "lower_bound": self.lower_bound, + "upper_bound": self.upper_bound, + "is_anomaly": self.is_anomaly, + "z_score": round(self.z_score, 2), + "severity": self.severity + } + + +@dataclass +class AnomalyDetectionInputDTO: + """DTO de entrada para detección de anomalías.""" + + context_values: List[float] + recent_values: List[float] + quantile_low: float = 0.05 + quantile_high: float = 0.95 + context_timestamps: Optional[List[str]] = None + freq: str = "D" + + def validate(self) -> None: + """Valida los datos de entrada.""" + if not self.context_values: + raise ValueError("context_values no puede estar vacío") + + if not self.recent_values: + raise ValueError("recent_values no puede estar vacío") + + if len(self.context_values) < 3: + raise ValueError("context_values debe tener al menos 3 puntos") + + if not (0 < self.quantile_low < 0.5): + raise ValueError("quantile_low debe estar en (0, 0.5)") + + if not (0.5 < self.quantile_high < 1): + raise ValueError("quantile_high debe estar en (0.5, 1)") + + if self.context_timestamps and len(self.context_timestamps) != len(self.context_values): + raise ValueError("context_timestamps y context_values deben tener la misma longitud") + + +@dataclass +class AnomalyDetectionOutputDTO: + """DTO de salida para detección de anomalías.""" + + anomalies: List[AnomalyPointDTO] + total_points: int + anomaly_count: int + anomaly_rate: float + summary: Dict + + def to_dict(self) -> Dict: + """Convierte a diccionario.""" + return { + "anomalies": [a.to_dict() for a in self.anomalies], + "total_points": self.total_points, + "anomaly_count": self.anomaly_count, + "anomaly_rate": round(self.anomaly_rate, 3), + "summary": self.summary + } diff --git a/app/application/dtos/backtest_dtos.py b/app/application/dtos/backtest_dtos.py new file mode 100644 index 0000000000000000000000000000000000000000..a354a2e6294b062e2be05497bf9971a96b743afb --- /dev/null +++ b/app/application/dtos/backtest_dtos.py @@ -0,0 +1,84 @@ +""" +DTOs para casos de uso de Backtesting. +""" + +from dataclasses import dataclass +from typing import List, Optional, Dict + + +@dataclass +class BacktestMetricsDTO: + """DTO para métricas de backtest.""" + + mae: float + mape: float + rmse: float + mse: float + + def to_dict(self) -> Dict: + """Convierte a diccionario.""" + return { + "mae": round(self.mae, 4), + "mape": round(self.mape, 4), + "rmse": round(self.rmse, 4), + "mse": round(self.mse, 4) + } + + +@dataclass +class BacktestInputDTO: + """DTO de entrada para backtest.""" + + values: List[float] + test_size: int + quantile_levels: List[float] + timestamps: Optional[List[str]] = None + freq: str = "D" + + def validate(self) -> None: + """Valida los datos de entrada.""" + if not self.values: + raise ValueError("values no puede estar vacío") + + if self.test_size < 1: + raise ValueError("test_size debe ser >= 1") + + if self.test_size >= len(self.values): + raise ValueError("test_size debe ser menor que la longitud de values") + + train_size = len(self.values) - self.test_size + if train_size < 3: + raise ValueError("train_size debe ser al menos 3 puntos") + + if not all(0 <= q <= 1 for q in self.quantile_levels): + raise ValueError("quantile_levels debe estar en [0, 1]") + + if self.timestamps and len(self.timestamps) != len(self.values): + raise ValueError("timestamps y values deben tener la misma longitud") + + +@dataclass +class BacktestOutputDTO: + """DTO de salida para backtest.""" + + forecast_values: List[float] + actual_values: List[float] + errors: List[float] + metrics: BacktestMetricsDTO + timestamps: List[str] + quantiles: Optional[Dict[str, List[float]]] = None + + def to_dict(self) -> Dict: + """Convierte a diccionario.""" + result = { + "forecast_values": self.forecast_values, + "actual_values": self.actual_values, + "errors": self.errors, + "metrics": self.metrics.to_dict(), + "timestamps": self.timestamps + } + + if self.quantiles: + result["quantiles"] = self.quantiles + + return result diff --git a/app/application/dtos/forecast_dtos.py b/app/application/dtos/forecast_dtos.py new file mode 100644 index 0000000000000000000000000000000000000000..734f2c3c38508f3587ef5dafbea63847fc6ad7ca --- /dev/null +++ b/app/application/dtos/forecast_dtos.py @@ -0,0 +1,111 @@ +""" +DTOs para casos de uso de Forecasting. +""" + +from dataclasses import dataclass +from typing import List, Optional, Dict + + +@dataclass +class ForecastInputDTO: + """DTO de entrada para pronóstico univariado.""" + + values: List[float] + prediction_length: int + quantile_levels: List[float] + timestamps: Optional[List[str]] = None + series_id: str = "series_0" + freq: str = "D" + + def validate(self) -> None: + """Valida los datos de entrada.""" + if not self.values: + raise ValueError("values no puede estar vacío") + + if self.prediction_length < 1: + raise ValueError("prediction_length debe ser >= 1") + + if not all(0 <= q <= 1 for q in self.quantile_levels): + raise ValueError("quantile_levels debe estar en [0, 1]") + + if self.timestamps and len(self.timestamps) != len(self.values): + raise ValueError("timestamps y values deben tener la misma longitud") + + +@dataclass +class ForecastOutputDTO: + """DTO de salida para pronóstico univariado.""" + + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + series_id: str = "series_0" + metadata: Optional[Dict] = None + + def to_dict(self) -> Dict: + """Convierte a diccionario.""" + result = { + "timestamps": self.timestamps, + "median": self.median, + "quantiles": self.quantiles, + "series_id": self.series_id + } + + if self.metadata: + result["metadata"] = self.metadata + + return result + + +@dataclass +class SeriesInputDTO: + """DTO para una serie individual en pronóstico múltiple.""" + + series_id: str + values: List[float] + timestamps: Optional[List[str]] = None + + +@dataclass +class MultiForecastInputDTO: + """DTO de entrada para pronóstico múltiple.""" + + series_list: List[SeriesInputDTO] + prediction_length: int + quantile_levels: List[float] + freq: str = "D" + + def validate(self) -> None: + """Valida los datos de entrada.""" + if not self.series_list: + raise ValueError("series_list no puede estar vacío") + + if self.prediction_length < 1: + raise ValueError("prediction_length debe ser >= 1") + + if not all(0 <= q <= 1 for q in self.quantile_levels): + raise ValueError("quantile_levels debe estar en [0, 1]") + + # Validar cada serie + for series in self.series_list: + if not series.values: + raise ValueError(f"Serie {series.series_id} está vacía") + + +@dataclass +class MultiForecastOutputDTO: + """DTO de salida para pronóstico múltiple.""" + + results: List[ForecastOutputDTO] + total_series: int + successful: int + failed: int + + def to_dict(self) -> Dict: + """Convierte a diccionario.""" + return { + "results": [r.to_dict() for r in self.results], + "total_series": self.total_series, + "successful": self.successful, + "failed": self.failed + } diff --git a/app/application/mappers/__init__.py b/app/application/mappers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5cf5aa6c54ed931db97146f600d5b29112758969 --- /dev/null +++ b/app/application/mappers/__init__.py @@ -0,0 +1,17 @@ +""" +Mappers - Conversión entre API Schemas y DTOs. + +Los mappers se encargan de convertir entre la capa de presentación +(API schemas) y la capa de aplicación (DTOs), manteniendo las capas +desacopladas. +""" + +from .forecast_mapper import ForecastMapper +from .anomaly_mapper import AnomalyMapper +from .backtest_mapper import BacktestMapper + +__all__ = [ + "ForecastMapper", + "AnomalyMapper", + "BacktestMapper", +] diff --git a/app/application/mappers/anomaly_mapper.py b/app/application/mappers/anomaly_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..6ab69d655fdf8783d10a7d504a5ef8292314b1bb --- /dev/null +++ b/app/application/mappers/anomaly_mapper.py @@ -0,0 +1,73 @@ +""" +Mapper para casos de uso de Detección de Anomalías. + +Convierte entre API schemas (Pydantic) y DTOs de aplicación. +""" + +from app.schemas.requests.anomaly import AnomalyDetectionRequest +from app.schemas.responses.anomaly import AnomalyDetectionResponse, AnomalyPoint +from app.application.dtos.anomaly_dtos import ( + AnomalyDetectionInputDTO, + AnomalyDetectionOutputDTO, + AnomalyPointDTO +) + + +class AnomalyMapper: + """ + Mapper para convertir entre API schemas y DTOs de anomalías. + """ + + @staticmethod + def to_input_dto(request: AnomalyDetectionRequest) -> AnomalyDetectionInputDTO: + """ + Convierte API request a DTO de entrada. + + Args: + request: Request de la API + + Returns: + AnomalyDetectionInputDTO: DTO para el caso de uso + """ + return AnomalyDetectionInputDTO( + context_values=request.context.values, + recent_values=request.recent_values, + quantile_low=request.quantile_low, + quantile_high=request.quantile_high, + context_timestamps=request.context.timestamps, + freq=request.freq + ) + + @staticmethod + def from_output_dto(dto: AnomalyDetectionOutputDTO) -> AnomalyDetectionResponse: + """ + Convierte DTO de salida a API response. + + Args: + dto: DTO del caso de uso + + Returns: + AnomalyDetectionResponse: Response para la API + """ + # Convertir cada punto de anomalía + anomaly_points = [ + AnomalyPoint( + index=ap.index, + value=ap.value, + expected=ap.expected, + lower_bound=ap.lower_bound, + upper_bound=ap.upper_bound, + is_anomaly=ap.is_anomaly, + z_score=ap.z_score, + severity=ap.severity + ) + for ap in dto.anomalies + ] + + return AnomalyDetectionResponse( + anomalies=anomaly_points, + total_points=dto.total_points, + anomaly_count=dto.anomaly_count, + anomaly_rate=dto.anomaly_rate, + summary=dto.summary + ) diff --git a/app/application/mappers/backtest_mapper.py b/app/application/mappers/backtest_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..43e5d403da81f3c94155d86aa10322471168f455 --- /dev/null +++ b/app/application/mappers/backtest_mapper.py @@ -0,0 +1,66 @@ +""" +Mapper para casos de uso de Backtesting. + +Convierte entre API schemas (Pydantic) y DTOs de aplicación. +""" + +from app.schemas.requests.backtest import BacktestRequest +from app.schemas.responses.backtest import BacktestResponse, BacktestMetrics +from app.application.dtos.backtest_dtos import ( + BacktestInputDTO, + BacktestOutputDTO, + BacktestMetricsDTO +) + + +class BacktestMapper: + """ + Mapper para convertir entre API schemas y DTOs de backtest. + """ + + @staticmethod + def to_input_dto(request: BacktestRequest) -> BacktestInputDTO: + """ + Convierte API request a DTO de entrada. + + Args: + request: Request de la API + + Returns: + BacktestInputDTO: DTO para el caso de uso + """ + return BacktestInputDTO( + values=request.series.values, + test_size=request.test_size, + quantile_levels=request.quantile_levels, + timestamps=request.series.timestamps, + freq=request.freq + ) + + @staticmethod + def from_output_dto(dto: BacktestOutputDTO) -> BacktestResponse: + """ + Convierte DTO de salida a API response. + + Args: + dto: DTO del caso de uso + + Returns: + BacktestResponse: Response para la API + """ + # Convertir métricas + metrics = BacktestMetrics( + mae=dto.metrics.mae, + mape=dto.metrics.mape, + rmse=dto.metrics.rmse, + mse=dto.metrics.mse + ) + + return BacktestResponse( + forecast_values=dto.forecast_values, + actual_values=dto.actual_values, + errors=dto.errors, + metrics=metrics, + timestamps=dto.timestamps, + quantiles=dto.quantiles + ) diff --git a/app/application/mappers/forecast_mapper.py b/app/application/mappers/forecast_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..c82a91bba873e2313959bcebdcfff89fe49e845f --- /dev/null +++ b/app/application/mappers/forecast_mapper.py @@ -0,0 +1,127 @@ +""" +Mapper para casos de uso de Forecasting. + +Convierte entre API schemas (Pydantic) y DTOs de aplicación. +""" + +from typing import Dict, Any +from app.schemas.requests.forecast import ( + ForecastUnivariateRequest, + ForecastMultiSeriesRequest, + SeriesData +) +from app.schemas.responses.forecast import ( + ForecastUnivariateResponse, + ForecastMultiSeriesResponse +) +from app.application.dtos.forecast_dtos import ( + ForecastInputDTO, + ForecastOutputDTO, + MultiForecastInputDTO, + MultiForecastOutputDTO, + SeriesInputDTO +) + + +class ForecastMapper: + """ + Mapper para convertir entre API schemas y DTOs de forecasting. + """ + + @staticmethod + def to_univariate_input_dto( + request: ForecastUnivariateRequest + ) -> ForecastInputDTO: + """ + Convierte API request a DTO de entrada. + + Args: + request: Request de la API + + Returns: + ForecastInputDTO: DTO para el caso de uso + """ + return ForecastInputDTO( + values=request.series.values, + prediction_length=request.prediction_length, + quantile_levels=request.quantile_levels, + timestamps=request.series.timestamps, + series_id=getattr(request.series, 'series_id', 'series_0'), + freq=request.freq + ) + + @staticmethod + def from_univariate_output_dto( + dto: ForecastOutputDTO + ) -> ForecastUnivariateResponse: + """ + Convierte DTO de salida a API response. + + Args: + dto: DTO del caso de uso + + Returns: + ForecastUnivariateResponse: Response para la API + """ + return ForecastUnivariateResponse( + timestamps=dto.timestamps, + median=dto.median, + quantiles=dto.quantiles, + series_id=dto.series_id, + metadata=dto.metadata + ) + + @staticmethod + def to_multi_series_input_dto( + request: ForecastMultiSeriesRequest + ) -> MultiForecastInputDTO: + """ + Convierte API request multi-series a DTO de entrada. + + Args: + request: Request de la API + + Returns: + MultiForecastInputDTO: DTO para el caso de uso + """ + series_list = [] + for series_data in request.series_list: + series_dto = SeriesInputDTO( + series_id=series_data.series_id, + values=series_data.values, + timestamps=series_data.timestamps + ) + series_list.append(series_dto) + + return MultiForecastInputDTO( + series_list=series_list, + prediction_length=request.prediction_length, + quantile_levels=request.quantile_levels, + freq=request.freq + ) + + @staticmethod + def from_multi_series_output_dto( + dto: MultiForecastOutputDTO + ) -> ForecastMultiSeriesResponse: + """ + Convierte DTO de salida multi-series a API response. + + Args: + dto: DTO del caso de uso + + Returns: + ForecastMultiSeriesResponse: Response para la API + """ + # Convertir cada resultado individual + results = [ + ForecastMapper.from_univariate_output_dto(result) + for result in dto.results + ] + + return ForecastMultiSeriesResponse( + results=results, + total_series=dto.total_series, + successful=dto.successful, + failed=dto.failed + ) diff --git a/app/application/use_cases/__init__.py b/app/application/use_cases/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fdccd37d26288e0d1637603fb5d0336ba0157592 --- /dev/null +++ b/app/application/use_cases/__init__.py @@ -0,0 +1,17 @@ +""" +Use Cases - Lógica de Aplicación. + +Los casos de uso orquestan las operaciones del dominio para cumplir +con los requisitos de la aplicación. Implementan el patrón Command/Query. +""" + +from .forecast_use_case import ForecastUnivariateUseCase, ForecastMultiSeriesUseCase +from .anomaly_use_case import DetectAnomaliesUseCase +from .backtest_use_case import BacktestUseCase + +__all__ = [ + "ForecastUnivariateUseCase", + "ForecastMultiSeriesUseCase", + "DetectAnomaliesUseCase", + "BacktestUseCase", +] diff --git a/app/application/use_cases/anomaly_use_case.py b/app/application/use_cases/anomaly_use_case.py new file mode 100644 index 0000000000000000000000000000000000000000..400344d9a783ac9b071fefb55e53fc0479531145 --- /dev/null +++ b/app/application/use_cases/anomaly_use_case.py @@ -0,0 +1,198 @@ +""" +Caso de uso para Detección de Anomalías. + +Implementa la lógica de aplicación para detectar anomalías +en series temporales usando pronósticos probabilísticos. +""" + +from app.domain.services.anomaly_service import AnomalyService +from app.domain.models.time_series import TimeSeries +from app.domain.models.forecast_config import ForecastConfig +from app.application.dtos.anomaly_dtos import ( + AnomalyDetectionInputDTO, + AnomalyDetectionOutputDTO, + AnomalyPointDTO +) +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class DetectAnomaliesUseCase: + """ + Caso de uso: Detección de Anomalías. + + Responsabilidad: Detectar anomalías comparando valores observados + con pronósticos probabilísticos. + """ + + def __init__(self, anomaly_service: AnomalyService): + """ + Inicializa el caso de uso. + + Args: + anomaly_service: Servicio de dominio para detección de anomalías + """ + self.anomaly_service = anomaly_service + logger.info("DetectAnomaliesUseCase initialized") + + def execute(self, input_dto: AnomalyDetectionInputDTO) -> AnomalyDetectionOutputDTO: + """ + Ejecuta el caso de uso de detección de anomalías. + + Args: + input_dto: Datos de entrada con contexto y valores recientes + + Returns: + AnomalyDetectionOutputDTO: Puntos de anomalía detectados + + Raises: + ValueError: Si los datos son inválidos + RuntimeError: Si falla la detección + """ + logger.info( + f"Detecting anomalies: {len(input_dto.context_values)} context points, " + f"{len(input_dto.recent_values)} recent points" + ) + + # Validar entrada + input_dto.validate() + + # Convertir DTO a modelos de dominio + context = TimeSeries( + values=input_dto.context_values, + timestamps=input_dto.context_timestamps, + freq=input_dto.freq + ) + + config = ForecastConfig( + prediction_length=len(input_dto.recent_values), + quantile_levels=[input_dto.quantile_low, 0.5, input_dto.quantile_high], + freq=input_dto.freq + ) + + # Ejecutar servicio de dominio + try: + anomaly_points = self.anomaly_service.detect_anomalies( + context=context, + recent_observed=input_dto.recent_values, + config=config, + quantile_low=input_dto.quantile_low, + quantile_high=input_dto.quantile_high + ) + logger.info(f"Anomaly detection completed") + except Exception as e: + logger.error(f"Anomaly detection failed: {e}", exc_info=True) + raise RuntimeError(f"Anomaly detection failed: {str(e)}") from e + + # Convertir a DTOs y calcular severidad + anomaly_dtos = [] + for ap in anomaly_points: + severity = self._calculate_severity(ap.z_score, ap.is_anomaly) + + dto = AnomalyPointDTO( + index=ap.index, + value=ap.value, + expected=ap.expected, + lower_bound=ap.lower_bound, + upper_bound=ap.upper_bound, + is_anomaly=ap.is_anomaly, + z_score=ap.z_score, + severity=severity + ) + anomaly_dtos.append(dto) + + # Calcular estadísticas + anomaly_count = sum(1 for a in anomaly_dtos if a.is_anomaly) + total_points = len(anomaly_dtos) + anomaly_rate = anomaly_count / total_points if total_points > 0 else 0.0 + + # Crear resumen + summary = self._create_summary(anomaly_dtos, input_dto) + + logger.info( + f"Anomalies detected: {anomaly_count}/{total_points} " + f"({anomaly_rate*100:.1f}%)" + ) + + # Crear DTO de salida + output_dto = AnomalyDetectionOutputDTO( + anomalies=anomaly_dtos, + total_points=total_points, + anomaly_count=anomaly_count, + anomaly_rate=anomaly_rate, + summary=summary + ) + + return output_dto + + def _calculate_severity(self, z_score: float, is_anomaly: bool) -> str: + """ + Calcula la severidad de una anomalía basándose en el z-score. + + Args: + z_score: Puntuación Z + is_anomaly: Si es anomalía + + Returns: + str: Nivel de severidad (normal, low, medium, high) + """ + if not is_anomaly: + return "normal" + + if z_score < 1.5: + return "low" + elif z_score < 2.5: + return "medium" + else: + return "high" + + def _create_summary( + self, + anomaly_dtos: list, + input_dto: AnomalyDetectionInputDTO + ) -> dict: + """ + Crea un resumen de la detección de anomalías. + + Args: + anomaly_dtos: Lista de anomalías detectadas + input_dto: Datos de entrada originales + + Returns: + dict: Resumen con estadísticas + """ + anomalies_only = [a for a in anomaly_dtos if a.is_anomaly] + + if not anomalies_only: + return { + "has_anomalies": False, + "severity_distribution": {"normal": len(anomaly_dtos)}, + "max_z_score": 0.0, + "avg_deviation": 0.0 + } + + # Distribución por severidad + severity_dist = { + "normal": sum(1 for a in anomaly_dtos if a.severity == "normal"), + "low": sum(1 for a in anomaly_dtos if a.severity == "low"), + "medium": sum(1 for a in anomaly_dtos if a.severity == "medium"), + "high": sum(1 for a in anomaly_dtos if a.severity == "high") + } + + # Estadísticas de anomalías + max_z_score = max(a.z_score for a in anomalies_only) + avg_deviation = sum( + abs(a.value - a.expected) for a in anomalies_only + ) / len(anomalies_only) + + return { + "has_anomalies": True, + "severity_distribution": severity_dist, + "max_z_score": round(max_z_score, 2), + "avg_deviation": round(avg_deviation, 2), + "quantile_range": { + "low": input_dto.quantile_low, + "high": input_dto.quantile_high + } + } diff --git a/app/application/use_cases/backtest_use_case.py b/app/application/use_cases/backtest_use_case.py new file mode 100644 index 0000000000000000000000000000000000000000..e0293bbc87d05e0d7cbda6e3bb49550fcf7f48ae --- /dev/null +++ b/app/application/use_cases/backtest_use_case.py @@ -0,0 +1,172 @@ +""" +Caso de uso para Backtesting. + +Implementa la lógica de aplicación para evaluar la precisión +de pronósticos usando datos históricos. +""" + +import math +from typing import List +from app.domain.services.forecast_service import ForecastService +from app.domain.models.time_series import TimeSeries +from app.domain.models.forecast_config import ForecastConfig +from app.application.dtos.backtest_dtos import ( + BacktestInputDTO, + BacktestOutputDTO, + BacktestMetricsDTO +) +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class BacktestUseCase: + """ + Caso de uso: Backtesting. + + Responsabilidad: Evaluar precisión del modelo usando datos históricos. + Divide los datos en train/test y calcula métricas de error. + """ + + def __init__(self, forecast_service: ForecastService): + """ + Inicializa el caso de uso. + + Args: + forecast_service: Servicio de dominio para forecasting + """ + self.forecast_service = forecast_service + logger.info("BacktestUseCase initialized") + + def execute(self, input_dto: BacktestInputDTO) -> BacktestOutputDTO: + """ + Ejecuta el caso de uso de backtesting. + + Args: + input_dto: Datos de entrada con serie completa y tamaño de test + + Returns: + BacktestOutputDTO: Resultados del backtest con métricas + + Raises: + ValueError: Si los datos son inválidos + RuntimeError: Si falla el backtest + """ + logger.info( + f"Executing backtest: {len(input_dto.values)} total points, " + f"{input_dto.test_size} test points" + ) + + # Validar entrada + input_dto.validate() + + # Dividir en train/test + train_values = input_dto.values[:-input_dto.test_size] + test_values = input_dto.values[-input_dto.test_size:] + + train_timestamps = None + test_timestamps = None + + if input_dto.timestamps: + train_timestamps = input_dto.timestamps[:-input_dto.test_size] + test_timestamps = input_dto.timestamps[-input_dto.test_size:] + + logger.info(f"Train size: {len(train_values)}, Test size: {len(test_values)}") + + # Crear modelos de dominio para train + train_series = TimeSeries( + values=train_values, + timestamps=train_timestamps, + freq=input_dto.freq + ) + + config = ForecastConfig( + prediction_length=input_dto.test_size, + quantile_levels=input_dto.quantile_levels, + freq=input_dto.freq + ) + + # Ejecutar pronóstico sobre datos de train + try: + result = self.forecast_service.forecast_univariate(train_series, config) + logger.info(f"Forecast completed: {len(result.median)} predictions") + except Exception as e: + logger.error(f"Backtest forecast failed: {e}", exc_info=True) + raise RuntimeError(f"Backtest forecast failed: {str(e)}") from e + + # Comparar con valores reales + forecast_values = result.median + actual_values = test_values + + # Calcular errores + errors = [ + actual - forecast + for actual, forecast in zip(actual_values, forecast_values) + ] + + # Calcular métricas + metrics = self._calculate_metrics(actual_values, forecast_values) + + logger.info( + f"Backtest metrics - MAE: {metrics.mae:.2f}, " + f"MAPE: {metrics.mape:.2f}%, RMSE: {metrics.rmse:.2f}" + ) + + # Preparar timestamps de salida + if test_timestamps: + output_timestamps = test_timestamps + else: + output_timestamps = result.timestamps + + # Crear DTO de salida + output_dto = BacktestOutputDTO( + forecast_values=forecast_values, + actual_values=actual_values, + errors=errors, + metrics=metrics, + timestamps=output_timestamps, + quantiles=result.quantiles if result.quantiles else None + ) + + return output_dto + + def _calculate_metrics( + self, + actual: List[float], + forecast: List[float] + ) -> BacktestMetricsDTO: + """ + Calcula métricas de error para el backtest. + + Args: + actual: Valores reales + forecast: Valores pronosticados + + Returns: + BacktestMetricsDTO: Métricas calculadas + """ + n = len(actual) + + # Mean Absolute Error + mae = sum(abs(a - f) for a, f in zip(actual, forecast)) / n + + # Mean Absolute Percentage Error + mape_values = [] + for a, f in zip(actual, forecast): + if a != 0: + mape_values.append(abs((a - f) / a)) + + mape = (sum(mape_values) / len(mape_values) * 100) if mape_values else 0.0 + + # Mean Squared Error + mse = sum((a - f) ** 2 for a, f in zip(actual, forecast)) / n + + # Root Mean Squared Error + rmse = math.sqrt(mse) + + return BacktestMetricsDTO( + mae=mae, + mape=mape, + rmse=rmse, + mse=mse + ) diff --git a/app/application/use_cases/forecast_use_case.py b/app/application/use_cases/forecast_use_case.py new file mode 100644 index 0000000000000000000000000000000000000000..337865bcce2eaa4984c9914d720550f5e03495e6 --- /dev/null +++ b/app/application/use_cases/forecast_use_case.py @@ -0,0 +1,186 @@ +""" +Casos de uso para Forecasting. + +Implementan la lógica de aplicación para pronósticos, +orquestando servicios de dominio y transformando DTOs. +""" + +from typing import List +from app.domain.services.forecast_service import ForecastService +from app.domain.models.time_series import TimeSeries +from app.domain.models.forecast_config import ForecastConfig +from app.application.dtos.forecast_dtos import ( + ForecastInputDTO, + ForecastOutputDTO, + MultiForecastInputDTO, + MultiForecastOutputDTO, + SeriesInputDTO +) +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class ForecastUnivariateUseCase: + """ + Caso de uso: Pronóstico Univariado. + + Responsabilidad: Ejecutar pronóstico para una serie temporal única. + """ + + def __init__(self, forecast_service: ForecastService): + """ + Inicializa el caso de uso. + + Args: + forecast_service: Servicio de dominio para forecasting + """ + self.forecast_service = forecast_service + logger.info("ForecastUnivariateUseCase initialized") + + def execute(self, input_dto: ForecastInputDTO) -> ForecastOutputDTO: + """ + Ejecuta el caso de uso. + + Args: + input_dto: Datos de entrada validados + + Returns: + ForecastOutputDTO: Resultado del pronóstico + + Raises: + ValueError: Si los datos son inválidos + RuntimeError: Si falla el pronóstico + """ + logger.info(f"Executing forecast for series: {input_dto.series_id}") + + # Validar entrada + input_dto.validate() + + # Convertir DTO a modelos de dominio + series = TimeSeries( + values=input_dto.values, + timestamps=input_dto.timestamps, + series_id=input_dto.series_id, + freq=input_dto.freq + ) + + config = ForecastConfig( + prediction_length=input_dto.prediction_length, + quantile_levels=input_dto.quantile_levels, + freq=input_dto.freq + ) + + # Ejecutar servicio de dominio + try: + result = self.forecast_service.forecast_univariate(series, config) + logger.info(f"Forecast completed: {len(result.timestamps)} periods") + except Exception as e: + logger.error(f"Forecast failed: {e}", exc_info=True) + raise RuntimeError(f"Forecast execution failed: {str(e)}") from e + + # Convertir resultado a DTO + output_dto = ForecastOutputDTO( + timestamps=result.timestamps, + median=result.median, + quantiles=result.quantiles, + series_id=result.series_id, + metadata={ + "prediction_length": config.prediction_length, + "freq": config.freq, + "context_length": len(series.values) + } + ) + + return output_dto + + +class ForecastMultiSeriesUseCase: + """ + Caso de uso: Pronóstico Multi-Series. + + Responsabilidad: Ejecutar pronósticos para múltiples series. + """ + + def __init__(self, forecast_service: ForecastService): + """ + Inicializa el caso de uso. + + Args: + forecast_service: Servicio de dominio para forecasting + """ + self.forecast_service = forecast_service + logger.info("ForecastMultiSeriesUseCase initialized") + + def execute(self, input_dto: MultiForecastInputDTO) -> MultiForecastOutputDTO: + """ + Ejecuta el caso de uso para múltiples series. + + Args: + input_dto: Datos de entrada con múltiples series + + Returns: + MultiForecastOutputDTO: Resultados de todos los pronósticos + """ + logger.info(f"Executing forecast for {len(input_dto.series_list)} series") + + # Validar entrada + input_dto.validate() + + # Configuración compartida + config = ForecastConfig( + prediction_length=input_dto.prediction_length, + quantile_levels=input_dto.quantile_levels, + freq=input_dto.freq + ) + + # Convertir DTOs a modelos de dominio + time_series_list: List[TimeSeries] = [] + for series_dto in input_dto.series_list: + series = TimeSeries( + values=series_dto.values, + timestamps=series_dto.timestamps, + series_id=series_dto.series_id, + freq=input_dto.freq + ) + time_series_list.append(series) + + # Ejecutar servicio de dominio + results = [] + successful = 0 + failed = 0 + + for ts in time_series_list: + try: + result = self.forecast_service.forecast_univariate(ts, config) + + output_dto = ForecastOutputDTO( + timestamps=result.timestamps, + median=result.median, + quantiles=result.quantiles, + series_id=result.series_id, + metadata={ + "prediction_length": config.prediction_length, + "freq": config.freq, + "context_length": len(ts.values) + } + ) + results.append(output_dto) + successful += 1 + + except Exception as e: + logger.error(f"Forecast failed for series {ts.series_id}: {e}") + failed += 1 + # Continuar con las siguientes series + + logger.info(f"Multi-series forecast completed: {successful} successful, {failed} failed") + + # Crear DTO de salida + multi_output = MultiForecastOutputDTO( + results=results, + total_series=len(input_dto.series_list), + successful=successful, + failed=failed + ) + + return multi_output diff --git a/app/domain/__init__.py b/app/domain/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/domain/interfaces/__init__.py b/app/domain/interfaces/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/domain/interfaces/data_transformer.py b/app/domain/interfaces/data_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..fb7af00206727bcf6d5623344d720cf83f7045ac --- /dev/null +++ b/app/domain/interfaces/data_transformer.py @@ -0,0 +1,65 @@ +""" +Interface para transformación de datos. + +Define la abstracción para convertir datos entre diferentes formatos, +cumpliendo con ISP (Interface Segregation Principle). +""" + +from abc import ABC, abstractmethod +from typing import List, Optional, Dict, Any +import pandas as pd + + +class IDataTransformer(ABC): + """ + Interface para transformación de datos de series temporales. + + Esta interface está segregada para contener solo métodos relacionados + con transformación de datos (ISP). + """ + + @abstractmethod + def build_context_df( + self, + values: List[float], + timestamps: Optional[List[str]] = None, + series_id: str = "series_0", + freq: str = "D" + ) -> pd.DataFrame: + """ + Construye un DataFrame de contexto para forecasting. + + Args: + values: Lista de valores históricos + timestamps: Lista de timestamps (opcional, se generan si es None) + series_id: Identificador de la serie + freq: Frecuencia temporal (D=daily, H=hourly, etc.) + + Returns: + pd.DataFrame: DataFrame con columnas id, timestamp, target + + Raises: + ValueError: Si valores y timestamps tienen longitudes diferentes + """ + pass + + @abstractmethod + def parse_prediction_result( + self, + pred_df: pd.DataFrame, + quantile_levels: List[float] + ) -> Dict[str, Any]: + """ + Parsea el resultado de predicción a un formato estándar. + + Args: + pred_df: DataFrame con predicciones del modelo + quantile_levels: Cuantiles calculados + + Returns: + Dict con: + - timestamps: Lista de timestamps + - median: Lista de valores medianos + - quantiles: Dict {cuantil: [valores]} + """ + pass diff --git a/app/domain/interfaces/forecast_model.py b/app/domain/interfaces/forecast_model.py new file mode 100644 index 0000000000000000000000000000000000000000..31df6257741ae8c1af2a93a79fd5064cd5051279 --- /dev/null +++ b/app/domain/interfaces/forecast_model.py @@ -0,0 +1,104 @@ +""" +Interface para modelos de forecasting. + +Este módulo define la abstracción IForecastModel que permite +diferentes implementaciones de modelos (Chronos, Prophet, ARIMA, etc.) +cumpliendo con DIP (Dependency Inversion Principle). +""" + +from abc import ABC, abstractmethod +from typing import List, Dict, Any, Optional +import pandas as pd + + +class IForecastModel(ABC): + """ + Interface para modelos de forecasting. + + Esta abstracción permite que diferentes implementaciones de modelos + sean intercambiables sin modificar el código que las usa (DIP + LSP). + + Ejemplos de implementaciones: + - ChronosModel (Chronos-2) + - ProphetModel (Facebook Prophet) + - ARIMAModel (ARIMA tradicional) + """ + + @abstractmethod + def predict( + self, + context_df: pd.DataFrame, + prediction_length: int, + quantile_levels: List[float], + **kwargs + ) -> pd.DataFrame: + """ + Genera pronósticos probabilísticos. + + Args: + context_df: DataFrame con datos históricos. + Debe contener columnas: id, timestamp, target + prediction_length: Número de pasos a predecir + quantile_levels: Lista de cuantiles a calcular (ej: [0.1, 0.5, 0.9]) + **kwargs: Parámetros adicionales específicos del modelo + + Returns: + pd.DataFrame: Pronósticos con columnas: + - id: Identificador de serie + - timestamp: Timestamp de predicción + - predictions: Valor mediano + - {q}: Valor para cada cuantil q + + Raises: + ValueError: Si los datos de entrada son inválidos + RuntimeError: Si el modelo falla al predecir + """ + pass + + @abstractmethod + def get_model_info(self) -> Dict[str, Any]: + """ + Retorna información del modelo. + + Returns: + Dict con información del modelo: + - type: Tipo de modelo (ej: "Chronos2", "Prophet") + - model_id: ID del modelo + - version: Versión del modelo + - device: Dispositivo usado (cpu/cuda) + - otros campos específicos del modelo + """ + pass + + def validate_context(self, context_df: pd.DataFrame) -> bool: + """ + Valida que el DataFrame de contexto tenga el formato correcto. + + Args: + context_df: DataFrame a validar + + Returns: + bool: True si es válido + + Raises: + ValueError: Si el DataFrame es inválido + """ + required_columns = {"id", "timestamp", "target"} + + if not isinstance(context_df, pd.DataFrame): + raise ValueError("context_df debe ser un pandas DataFrame") + + missing_columns = required_columns - set(context_df.columns) + if missing_columns: + raise ValueError( + f"Faltan columnas requeridas: {missing_columns}. " + f"Se encontraron: {set(context_df.columns)}" + ) + + if context_df.empty: + raise ValueError("context_df no puede estar vacío") + + if context_df["target"].isnull().any(): + raise ValueError("La columna 'target' contiene valores nulos") + + return True diff --git a/app/domain/models/__init__.py b/app/domain/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/domain/models/anomaly.py b/app/domain/models/anomaly.py new file mode 100644 index 0000000000000000000000000000000000000000..b45b1dacaaa8b4b3351a19e86a4faa837d92363c --- /dev/null +++ b/app/domain/models/anomaly.py @@ -0,0 +1,115 @@ +""" +Modelo de dominio para anomalías detectadas. + +Este módulo define la entidad AnomalyPoint, cumpliendo con SRP. +""" + +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class AnomalyPoint: + """ + Representa un punto con posible anomalía detectada. + + Attributes: + index: Índice del punto en la serie + value: Valor observado + expected: Valor esperado (mediana del pronóstico) + lower_bound: Límite inferior del intervalo de confianza + upper_bound: Límite superior del intervalo de confianza + is_anomaly: Indica si el punto es una anomalía + z_score: Puntuación Z del punto (opcional) + severity: Severidad de la anomalía (low, medium, high) + + Example: + >>> point = AnomalyPoint( + ... index=5, + ... value=200.0, + ... expected=120.0, + ... lower_bound=115.0, + ... upper_bound=125.0, + ... is_anomaly=True, + ... z_score=4.5 + ... ) + >>> point.deviation + 80.0 + >>> point.severity + 'high' + """ + + index: int + value: float + expected: float + lower_bound: float + upper_bound: float + is_anomaly: bool + z_score: float = 0.0 + severity: Optional[str] = None + + def __post_init__(self): + """Cálculo automático de severidad""" + if self.severity is None and self.is_anomaly: + self.severity = self._calculate_severity() + + @property + def deviation(self) -> float: + """ + Calcula la desviación del valor respecto al esperado. + + Returns: + float: Diferencia absoluta entre valor y esperado + """ + return abs(self.value - self.expected) + + @property + def deviation_percentage(self) -> float: + """ + Calcula el porcentaje de desviación. + + Returns: + float: Desviación como porcentaje del valor esperado + """ + if self.expected == 0: + return float('inf') if self.value != 0 else 0.0 + return (self.deviation / abs(self.expected)) * 100 + + def _calculate_severity(self) -> str: + """ + Calcula la severidad de la anomalía basada en z_score. + + Returns: + str: "low", "medium" o "high" + """ + abs_z = abs(self.z_score) + + if abs_z >= 4.0: + return "high" + elif abs_z >= 3.0: + return "medium" + else: + return "low" + + def is_above_expected(self) -> bool: + """Retorna True si el valor está por encima del esperado""" + return self.value > self.expected + + def is_below_expected(self) -> bool: + """Retorna True si el valor está por debajo del esperado""" + return self.value < self.expected + + def to_dict(self) -> dict: + """Serializa el punto a diccionario""" + return { + "index": self.index, + "value": self.value, + "expected": self.expected, + "lower_bound": self.lower_bound, + "upper_bound": self.upper_bound, + "is_anomaly": self.is_anomaly, + "z_score": self.z_score, + "severity": self.severity, + "deviation": self.deviation, + "deviation_percentage": self.deviation_percentage + } diff --git a/app/domain/models/forecast_config.py b/app/domain/models/forecast_config.py new file mode 100644 index 0000000000000000000000000000000000000000..d3c92344b7f88b9763fd47785491fc2baab66e45 --- /dev/null +++ b/app/domain/models/forecast_config.py @@ -0,0 +1,118 @@ +""" +Modelo de dominio para configuración de forecasting. + +Este módulo define la entidad ForecastConfig, cumpliendo con SRP. +""" + +from dataclasses import dataclass, field +from typing import List + + +@dataclass +class ForecastConfig: + """ + Configuración para operaciones de forecasting. + + Define los parámetros necesarios para realizar un pronóstico, + incluyendo horizonte de predicción, cuantiles y frecuencia. + + Attributes: + prediction_length: Número de períodos a pronosticar + quantile_levels: Cuantiles a calcular (ej: [0.1, 0.5, 0.9]) + freq: Frecuencia temporal (D, H, M, etc.) + + Example: + >>> config = ForecastConfig( + ... prediction_length=7, + ... quantile_levels=[0.1, 0.5, 0.9], + ... freq="D" + ... ) + >>> config.has_median + True + """ + + prediction_length: int + quantile_levels: List[float] = field(default_factory=lambda: [0.1, 0.5, 0.9]) + freq: str = "D" + + def __post_init__(self): + """Validación y normalización automática""" + self.validate() + self._ensure_median() + self._sort_quantiles() + + @property + def has_median(self) -> bool: + """Verifica si el cuantil 0.5 (mediana) está incluido""" + return 0.5 in self.quantile_levels + + def validate(self) -> bool: + """ + Valida la configuración. + + Returns: + bool: True si es válida + + Raises: + ValueError: Si la configuración es inválida + """ + # Validar prediction_length + if self.prediction_length < 1: + raise ValueError( + f"prediction_length debe ser >= 1, recibido: {self.prediction_length}" + ) + + # Validar quantile_levels + if not self.quantile_levels: + raise ValueError("quantile_levels no puede estar vacío") + + # Verificar que los cuantiles estén en [0, 1] + for q in self.quantile_levels: + if not 0 <= q <= 1: + raise ValueError( + f"Todos los cuantiles deben estar en [0, 1], encontrado: {q}" + ) + + # Validar freq + valid_freqs = {"D", "H", "M", "W", "Y", "Q", "S", "T", "min"} + if self.freq not in valid_freqs: + raise ValueError( + f"Frecuencia '{self.freq}' no reconocida. " + f"Válidas: {valid_freqs}" + ) + + return True + + def _ensure_median(self): + """Asegura que la mediana (0.5) esté incluida""" + if not self.has_median: + self.quantile_levels.append(0.5) + + def _sort_quantiles(self): + """Ordena los cuantiles de menor a mayor""" + self.quantile_levels = sorted(set(self.quantile_levels)) + + @classmethod + def default(cls) -> "ForecastConfig": + """ + Crea una configuración con valores por defecto. + + Returns: + ForecastConfig: Configuración por defecto + - prediction_length: 7 + - quantile_levels: [0.1, 0.5, 0.9] + - freq: "D" + """ + return cls( + prediction_length=7, + quantile_levels=[0.1, 0.5, 0.9], + freq="D" + ) + + def to_dict(self) -> dict: + """Serializa la configuración a diccionario""" + return { + "prediction_length": self.prediction_length, + "quantile_levels": self.quantile_levels, + "freq": self.freq + } diff --git a/app/domain/models/forecast_result.py b/app/domain/models/forecast_result.py new file mode 100644 index 0000000000000000000000000000000000000000..678dbd489e73484722c1144cf988d4d8b855f903 --- /dev/null +++ b/app/domain/models/forecast_result.py @@ -0,0 +1,147 @@ +""" +Modelo de dominio para resultados de forecasting. + +Este módulo define la entidad ForecastResult, cumpliendo con SRP. +""" + +from dataclasses import dataclass +from typing import List, Dict, Any + + +@dataclass +class ForecastResult: + """ + Resultado de una operación de forecasting. + + Encapsula los pronósticos generados, incluyendo timestamps, + valores medianos y cuantiles. + + Attributes: + timestamps: Lista de timestamps pronosticados + median: Lista de valores medianos (cuantil 0.5) + quantiles: Dict de cuantil -> valores (ej: {"0.1": [...], "0.9": [...]}) + series_id: Identificador de la serie + metadata: Información adicional del forecast + + Example: + >>> result = ForecastResult( + ... timestamps=["2025-11-10", "2025-11-11"], + ... median=[120.5, 122.3], + ... quantiles={"0.1": [115.2, 116.8], "0.9": [125.8, 127.8]}, + ... series_id="sales_A" + ... ) + >>> result.length + 2 + """ + + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + series_id: str = "series_0" + metadata: Dict[str, Any] = None + + def __post_init__(self): + """Validación automática al crear la instancia""" + if self.metadata is None: + self.metadata = {} + self.validate() + + @property + def length(self) -> int: + """Retorna el número de períodos pronosticados""" + return len(self.timestamps) + + def validate(self) -> bool: + """ + Valida la consistencia del resultado. + + Returns: + bool: True si es válido + + Raises: + ValueError: Si el resultado es inválido + """ + n = len(self.timestamps) + + # Validar que no esté vacío + if n == 0: + raise ValueError("El resultado no puede estar vacío") + + # Validar longitud de median + if len(self.median) != n: + raise ValueError( + f"Median ({len(self.median)}) debe tener la misma longitud " + f"que timestamps ({n})" + ) + + # Validar longitud de cada cuantil + for q, values in self.quantiles.items(): + if len(values) != n: + raise ValueError( + f"Cuantil {q} ({len(values)}) debe tener la misma longitud " + f"que timestamps ({n})" + ) + + # Validar que todos los valores sean numéricos + if not all(isinstance(v, (int, float)) for v in self.median): + raise ValueError("Median debe contener solo valores numéricos") + + for q, values in self.quantiles.items(): + if not all(isinstance(v, (int, float)) for v in values): + raise ValueError(f"Cuantil {q} debe contener solo valores numéricos") + + return True + + def get_quantile(self, level: float) -> List[float]: + """ + Obtiene los valores de un cuantil específico. + + Args: + level: Nivel del cuantil (ej: 0.1, 0.5, 0.9) + + Returns: + List[float]: Valores del cuantil + + Raises: + KeyError: Si el cuantil no existe + """ + key = f"{level:.3g}" + if key not in self.quantiles: + available = list(self.quantiles.keys()) + raise KeyError( + f"Cuantil {level} no encontrado. Disponibles: {available}" + ) + return self.quantiles[key] + + def get_interval(self, lower: float = 0.1, upper: float = 0.9) -> Dict[str, List[float]]: + """ + Obtiene un intervalo de predicción. + + Args: + lower: Cuantil inferior (default: 0.1) + upper: Cuantil superior (default: 0.9) + + Returns: + Dict con "lower", "median", "upper" + """ + return { + "lower": self.get_quantile(lower), + "median": self.median, + "upper": self.get_quantile(upper) + } + + def to_dict(self) -> Dict[str, Any]: + """ + Serializa el resultado a diccionario. + + Returns: + Dict con la representación del resultado + """ + return { + "timestamps": self.timestamps, + "median": self.median, + "quantiles": self.quantiles, + "series_id": self.series_id, + "length": self.length, + "metadata": self.metadata + } diff --git a/app/domain/models/time_series.py b/app/domain/models/time_series.py new file mode 100644 index 0000000000000000000000000000000000000000..bd90ac81253f2e898fafcc52626f426e57fe54c2 --- /dev/null +++ b/app/domain/models/time_series.py @@ -0,0 +1,124 @@ +""" +Modelo de dominio para series temporales. + +Este módulo define la entidad TimeSeries, cumpliendo con SRP. +""" + +from dataclasses import dataclass, field +from typing import List, Optional, Dict, Any + + +@dataclass +class TimeSeries: + """ + Modelo de dominio para una serie temporal. + + Representa una serie temporal con sus valores, timestamps opcionales + y metadata asociada. Esta clase es inmutable después de la validación. + + Attributes: + values: Lista de valores numéricos de la serie + timestamps: Lista opcional de timestamps (strings ISO o índices) + series_id: Identificador único de la serie + freq: Frecuencia temporal (D=daily, H=hourly, M=monthly, etc.) + metadata: Diccionario con información adicional + + Example: + >>> series = TimeSeries( + ... values=[100, 102, 105, 103, 108], + ... series_id="sales_product_a", + ... freq="D" + ... ) + >>> series.length + 5 + >>> series.validate() + True + """ + + values: List[float] + timestamps: Optional[List[str]] = None + series_id: str = "series_0" + freq: str = "D" + metadata: Dict[str, Any] = field(default_factory=dict) + + def __post_init__(self): + """Validación automática al crear la instancia""" + self.validate() + + @property + def length(self) -> int: + """Retorna la longitud de la serie""" + return len(self.values) + + def validate(self) -> bool: + """ + Valida la consistencia de la serie temporal. + + Returns: + bool: True si la serie es válida + + Raises: + ValueError: Si la serie es inválida + """ + # Verificar que no esté vacía + if not self.values or len(self.values) == 0: + raise ValueError("La serie temporal no puede estar vacía") + + # Verificar que todos sean números + if not all(isinstance(v, (int, float)) for v in self.values): + raise ValueError("Todos los valores deben ser numéricos") + + # Verificar que no haya None/NaN + if any(v is None or (isinstance(v, float) and v != v) for v in self.values): + raise ValueError("La serie contiene valores nulos o NaN") + + # Si hay timestamps, verificar longitud + if self.timestamps is not None: + if len(self.timestamps) != len(self.values): + raise ValueError( + f"Timestamps ({len(self.timestamps)}) y values ({len(self.values)}) " + "deben tener la misma longitud" + ) + + return True + + def get_subset(self, start: int, end: int) -> "TimeSeries": + """ + Retorna un subset de la serie temporal. + + Args: + start: Índice de inicio (inclusive) + end: Índice de fin (exclusive) + + Returns: + TimeSeries: Nueva instancia con el subset + """ + subset_values = self.values[start:end] + subset_timestamps = None + + if self.timestamps: + subset_timestamps = self.timestamps[start:end] + + return TimeSeries( + values=subset_values, + timestamps=subset_timestamps, + series_id=self.series_id, + freq=self.freq, + metadata=self.metadata.copy() + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializa la serie a diccionario. + + Returns: + Dict con la representación de la serie + """ + return { + "values": self.values, + "timestamps": self.timestamps, + "series_id": self.series_id, + "freq": self.freq, + "length": self.length, + "metadata": self.metadata + } diff --git a/app/domain/services/__init__.py b/app/domain/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/domain/services/anomaly_service.py b/app/domain/services/anomaly_service.py new file mode 100644 index 0000000000000000000000000000000000000000..ea283b03cb42963ddcac3e57fe2956796a85fba1 --- /dev/null +++ b/app/domain/services/anomaly_service.py @@ -0,0 +1,191 @@ +""" +Servicio de dominio para detección de anomalías. + +Este servicio encapsula la lógica de detección de anomalías, +cumpliendo con SRP y DIP. +""" + +from typing import List +from app.domain.interfaces.forecast_model import IForecastModel +from app.domain.interfaces.data_transformer import IDataTransformer +from app.domain.models.time_series import TimeSeries +from app.domain.models.forecast_config import ForecastConfig +from app.domain.models.anomaly import AnomalyPoint +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class AnomalyService: + """ + Servicio de dominio para detección de anomalías. + + Detecta puntos anómalos comparando valores observados con + pronósticos del modelo, usando intervalos de predicción. + + Attributes: + model: Modelo de forecasting + transformer: Transformador de datos + + Example: + >>> service = AnomalyService(model, transformer) + >>> context = TimeSeries(values=[100, 102, 105, 103, 108]) + >>> recent = [107, 200, 106] # 200 es anomalía + >>> anomalies = service.detect_anomalies(context, recent, config) + >>> sum(1 for a in anomalies if a.is_anomaly) + 1 + """ + + def __init__( + self, + model: IForecastModel, + transformer: IDataTransformer + ): + """ + Inicializa el servicio. + + Args: + model: Implementación de IForecastModel + transformer: Implementación de IDataTransformer + """ + self.model = model + self.transformer = transformer + logger.info("AnomalyService initialized") + + def detect_anomalies( + self, + context: TimeSeries, + recent_observed: List[float], + config: ForecastConfig, + quantile_low: float = 0.05, + quantile_high: float = 0.95 + ) -> List[AnomalyPoint]: + """ + Detecta anomalías comparando observaciones con pronóstico. + + Un punto se considera anómalo si cae fuera del intervalo + [quantile_low, quantile_high] del pronóstico. + + Args: + context: Serie temporal histórica (contexto) + recent_observed: Valores recientes a evaluar + config: Configuración del forecast + quantile_low: Cuantil inferior del intervalo (default: 0.05) + quantile_high: Cuantil superior del intervalo (default: 0.95) + + Returns: + List[AnomalyPoint]: Lista de puntos con indicador de anomalía + + Raises: + ValueError: Si las longitudes no coinciden + + Example: + >>> context = TimeSeries(values=[100, 102, 105]) + >>> recent = [106, 250, 104] # 250 es anomalía + >>> config = ForecastConfig(prediction_length=3) + >>> anomalies = service.detect_anomalies(context, recent, config) + >>> anomalies[1].is_anomaly + True + """ + logger.info( + f"Detecting anomalies in {len(recent_observed)} points " + f"(interval: [{quantile_low}, {quantile_high}])" + ) + + # Validar longitudes + if len(recent_observed) != config.prediction_length: + raise ValueError( + f"recent_observed length ({len(recent_observed)}) must equal " + f"prediction_length ({config.prediction_length})" + ) + + # Preparar config con cuantiles necesarios + quantiles = sorted(set([quantile_low, 0.5, quantile_high])) + config_anomaly = ForecastConfig( + prediction_length=config.prediction_length, + quantile_levels=quantiles, + freq=config.freq + ) + + # Construir DataFrame de contexto + context_df = self.transformer.build_context_df( + values=context.values, + timestamps=context.timestamps, + series_id=context.series_id, + freq=config.freq + ) + + # Predecir + pred_df = self.model.predict( + context_df=context_df, + prediction_length=config_anomaly.prediction_length, + quantile_levels=config_anomaly.quantile_levels + ) + + # Parsear resultado + result = self.transformer.parse_prediction_result( + pred_df=pred_df, + quantile_levels=quantiles + ) + + # Detectar anomalías + anomalies = [] + q_low_key = f"{quantile_low:.3g}" + q_high_key = f"{quantile_high:.3g}" + + for i, obs in enumerate(recent_observed): + expected = result["median"][i] + lower = result["quantiles"][q_low_key][i] + upper = result["quantiles"][q_high_key][i] + + # Verificar si está fuera del intervalo + is_anom = (obs < lower) or (obs > upper) + + # Calcular z-score aproximado + spread = (upper - lower) / 2 + z_score = abs(obs - expected) / (spread + 1e-8) if spread > 0 else 0 + + anomalies.append(AnomalyPoint( + index=i, + value=obs, + expected=expected, + lower_bound=lower, + upper_bound=upper, + is_anomaly=is_anom, + z_score=z_score + )) + + num_anomalies = sum(1 for a in anomalies if a.is_anomaly) + logger.info( + f"Anomaly detection completed: {num_anomalies}/{len(anomalies)} " + "anomalies detected" + ) + + return anomalies + + def get_anomaly_summary(self, anomalies: List[AnomalyPoint]) -> dict: + """ + Genera un resumen de las anomalías detectadas. + + Args: + anomalies: Lista de anomalías + + Returns: + Dict con estadísticas de las anomalías + """ + total = len(anomalies) + detected = sum(1 for a in anomalies if a.is_anomaly) + + severities = {"low": 0, "medium": 0, "high": 0} + for a in anomalies: + if a.is_anomaly and a.severity: + severities[a.severity] += 1 + + return { + "total_points": total, + "anomalies_detected": detected, + "anomaly_rate": (detected / total * 100) if total > 0 else 0, + "severities": severities, + "max_deviation": max((a.deviation for a in anomalies), default=0), + "max_z_score": max((abs(a.z_score) for a in anomalies), default=0) + } diff --git a/app/domain/services/backtest_service.py b/app/domain/services/backtest_service.py new file mode 100644 index 0000000000000000000000000000000000000000..caf4c6c2fda41cd69ddc43167103efed06e5e911 --- /dev/null +++ b/app/domain/services/backtest_service.py @@ -0,0 +1,243 @@ +""" +Servicio de dominio para backtesting. + +Este servicio encapsula la lógica de validación de modelos, +cumpliendo con SRP y DIP. +""" + +import numpy as np +from dataclasses import dataclass +from typing import List +from app.domain.interfaces.forecast_model import IForecastModel +from app.domain.interfaces.data_transformer import IDataTransformer +from app.domain.models.time_series import TimeSeries +from app.domain.models.forecast_config import ForecastConfig +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +@dataclass +class BacktestMetrics: + """ + Métricas de evaluación de un backtest. + + Attributes: + mae: Mean Absolute Error + mape: Mean Absolute Percentage Error (%) + rmse: Root Mean Squared Error + wql: Weighted Quantile Loss (para cuantil 0.5) + """ + mae: float + mape: float + rmse: float + wql: float + + def to_dict(self) -> dict: + """Serializa las métricas""" + return { + "mae": self.mae, + "mape": self.mape, + "rmse": self.rmse, + "wql": self.wql + } + + +@dataclass +class BacktestResult: + """ + Resultado completo de un backtest. + + Attributes: + metrics: Métricas de evaluación + forecast: Valores pronosticados + actuals: Valores reales + timestamps: Timestamps del período de prueba + """ + metrics: BacktestMetrics + forecast: List[float] + actuals: List[float] + timestamps: List[str] + + def to_dict(self) -> dict: + """Serializa el resultado""" + return { + "metrics": self.metrics.to_dict(), + "forecast": self.forecast, + "actuals": self.actuals, + "timestamps": self.timestamps + } + + +class BacktestService: + """ + Servicio de dominio para backtesting de modelos. + + Realiza validación de modelos separando la serie en train/test + y comparando pronósticos con valores reales. + + Attributes: + model: Modelo de forecasting + transformer: Transformador de datos + + Example: + >>> service = BacktestService(model, transformer) + >>> series = TimeSeries(values=[100, 102, 105, 103, 108, 112, 115]) + >>> result = service.simple_backtest(series, test_length=3) + >>> result.metrics.mae < 5 # Buen modelo + True + """ + + def __init__( + self, + model: IForecastModel, + transformer: IDataTransformer + ): + """ + Inicializa el servicio. + + Args: + model: Implementación de IForecastModel + transformer: Implementación de IDataTransformer + """ + self.model = model + self.transformer = transformer + logger.info("BacktestService initialized") + + def simple_backtest( + self, + series: TimeSeries, + test_length: int, + config: ForecastConfig = None + ) -> BacktestResult: + """ + Realiza un backtest simple: train/test split. + + Separa la serie en train (histórico) y test (validación), + genera pronóstico para el período test y calcula métricas. + + Args: + series: Serie temporal completa + test_length: Número de puntos para test (final de la serie) + config: Configuración del forecast (opcional) + + Returns: + BacktestResult: Resultado con métricas y pronósticos + + Raises: + ValueError: Si test_length >= longitud de la serie + + Example: + >>> series = TimeSeries(values=[100, 102, 105, 103, 108]) + >>> result = service.simple_backtest(series, test_length=2) + >>> len(result.forecast) + 2 + """ + logger.info( + f"Running simple backtest for series '{series.series_id}' " + f"(total_length={series.length}, test_length={test_length})" + ) + + # Validar + if test_length >= series.length: + raise ValueError( + f"test_length ({test_length}) debe ser menor que " + f"la longitud de la serie ({series.length})" + ) + + if test_length < 1: + raise ValueError(f"test_length debe ser >= 1, recibido: {test_length}") + + # Configuración por defecto si no se proporciona + if config is None: + config = ForecastConfig( + prediction_length=test_length, + quantile_levels=[0.5], # Solo mediana para backtest + freq=series.freq + ) + else: + # Ajustar prediction_length + config.prediction_length = test_length + + # Separar train/test + train_length = series.length - test_length + train_series = series.get_subset(0, train_length) + test_values = series.values[train_length:] + + logger.debug(f"Train length: {train_length}, Test length: {test_length}") + + # Construir DataFrame de train + context_df = self.transformer.build_context_df( + values=train_series.values, + timestamps=train_series.timestamps, + series_id=series.series_id, + freq=config.freq + ) + + # Predecir + pred_df = self.model.predict( + context_df=context_df, + prediction_length=test_length, + quantile_levels=[0.5] + ) + + # Parsear resultado + result = self.transformer.parse_prediction_result( + pred_df=pred_df, + quantile_levels=[0.5] + ) + + forecast = np.array(result["median"], dtype=float) + actuals = np.array(test_values, dtype=float) + + # Calcular métricas + metrics = self._calculate_metrics(forecast, actuals) + + logger.info( + f"Backtest completed: MAE={metrics.mae:.2f}, " + f"MAPE={metrics.mape:.2f}%, RMSE={metrics.rmse:.2f}" + ) + + return BacktestResult( + metrics=metrics, + forecast=forecast.tolist(), + actuals=actuals.tolist(), + timestamps=result["timestamps"] + ) + + def _calculate_metrics( + self, + forecast: np.ndarray, + actuals: np.ndarray + ) -> BacktestMetrics: + """ + Calcula métricas de evaluación. + + Args: + forecast: Valores pronosticados + actuals: Valores reales + + Returns: + BacktestMetrics: Métricas calculadas + """ + # MAE: Mean Absolute Error + mae = float(np.mean(np.abs(actuals - forecast))) + + # MAPE: Mean Absolute Percentage Error + eps = 1e-8 + mape = float(np.mean(np.abs((actuals - forecast) / (actuals + eps)))) * 100.0 + + # RMSE: Root Mean Squared Error + rmse = float(np.sqrt(np.mean((actuals - forecast) ** 2))) + + # WQL: Weighted Quantile Loss (para cuantil 0.5 = MAE/2) + tau = 0.5 + diff = actuals - forecast + wql = float(np.mean(np.maximum(tau * diff, (tau - 1) * diff))) + + return BacktestMetrics( + mae=mae, + mape=mape, + rmse=rmse, + wql=wql + ) diff --git a/app/domain/services/forecast_service.py b/app/domain/services/forecast_service.py new file mode 100644 index 0000000000000000000000000000000000000000..b6786f45ff9964bb9f6fffee92a5dd36acba86f6 --- /dev/null +++ b/app/domain/services/forecast_service.py @@ -0,0 +1,194 @@ +""" +Servicio de dominio para forecasting. + +Este servicio orquesta la lógica de negocio de forecasting, +cumpliendo con SRP y DIP. +""" + +from typing import List +from app.domain.interfaces.forecast_model import IForecastModel +from app.domain.interfaces.data_transformer import IDataTransformer +from app.domain.models.time_series import TimeSeries +from app.domain.models.forecast_config import ForecastConfig +from app.domain.models.forecast_result import ForecastResult +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class ForecastService: + """ + Servicio de dominio para operaciones de forecasting. + + Este servicio encapsula la lógica de negocio para generar pronósticos, + dependiendo de abstracciones (IForecastModel, IDataTransformer) en lugar + de implementaciones concretas (DIP). + + Attributes: + model: Modelo de forecasting (implementa IForecastModel) + transformer: Transformador de datos (implementa IDataTransformer) + + Example: + >>> from app.infrastructure.ml.chronos_model import ChronosModel + >>> from app.utils.dataframe_builder import DataFrameBuilder + >>> + >>> model = ChronosModel("amazon/chronos-2") + >>> transformer = DataFrameBuilder() + >>> service = ForecastService(model, transformer) + >>> + >>> series = TimeSeries(values=[100, 102, 105]) + >>> config = ForecastConfig(prediction_length=3) + >>> result = service.forecast_univariate(series, config) + """ + + def __init__( + self, + model: IForecastModel, + transformer: IDataTransformer + ): + """ + Inicializa el servicio con sus dependencias. + + Args: + model: Implementación de IForecastModel + transformer: Implementación de IDataTransformer + """ + self.model = model + self.transformer = transformer + + model_info = self.model.get_model_info() + logger.info( + f"ForecastService initialized with model: {model_info.get('type', 'unknown')}" + ) + + def forecast_univariate( + self, + series: TimeSeries, + config: ForecastConfig + ) -> ForecastResult: + """ + Genera pronóstico para una serie univariada. + + Args: + series: Serie temporal a pronosticar + config: Configuración del forecast + + Returns: + ForecastResult: Resultado con pronósticos + + Raises: + ValueError: Si la serie o configuración son inválidas + RuntimeError: Si el modelo falla al predecir + + Example: + >>> series = TimeSeries(values=[100, 102, 105, 103, 108]) + >>> config = ForecastConfig(prediction_length=3) + >>> result = service.forecast_univariate(series, config) + >>> len(result.median) + 3 + """ + logger.info( + f"Forecasting univariate series '{series.series_id}' " + f"(length={series.length}, horizon={config.prediction_length})" + ) + + # Validar entrada + series.validate() + config.validate() + + # Transformar serie a DataFrame + context_df = self.transformer.build_context_df( + values=series.values, + timestamps=series.timestamps, + series_id=series.series_id, + freq=config.freq + ) + + logger.debug(f"Context DataFrame shape: {context_df.shape}") + + # Validar DataFrame + self.model.validate_context(context_df) + + # Predecir + try: + pred_df = self.model.predict( + context_df=context_df, + prediction_length=config.prediction_length, + quantile_levels=config.quantile_levels + ) + except Exception as e: + logger.error(f"Model prediction failed: {e}", exc_info=True) + raise RuntimeError(f"Error al predecir: {e}") from e + + logger.debug(f"Prediction DataFrame shape: {pred_df.shape}") + + # Parsear resultado + result_dict = self.transformer.parse_prediction_result( + pred_df=pred_df, + quantile_levels=config.quantile_levels + ) + + # Crear ForecastResult + result = ForecastResult( + timestamps=result_dict["timestamps"], + median=result_dict["median"], + quantiles=result_dict["quantiles"], + series_id=series.series_id, + metadata={ + "prediction_length": config.prediction_length, + "quantile_levels": config.quantile_levels, + "freq": config.freq, + "model": self.model.get_model_info() + } + ) + + logger.info( + f"Forecast completed: {result.length} periods generated " + f"for series '{series.series_id}'" + ) + + return result + + def forecast_multi_series( + self, + series_list: List[TimeSeries], + config: ForecastConfig + ) -> List[ForecastResult]: + """ + Genera pronósticos para múltiples series. + + Args: + series_list: Lista de series temporales + config: Configuración del forecast (misma para todas) + + Returns: + List[ForecastResult]: Lista de resultados (uno por serie) + + Example: + >>> series1 = TimeSeries(values=[100, 102], series_id="A") + >>> series2 = TimeSeries(values=[200, 205], series_id="B") + >>> results = service.forecast_multi_series([series1, series2], config) + >>> len(results) + 2 + """ + logger.info(f"Forecasting {len(series_list)} series") + + if not series_list: + raise ValueError("series_list no puede estar vacía") + + results = [] + for i, series in enumerate(series_list): + logger.debug(f"Processing series {i+1}/{len(series_list)}: {series.series_id}") + + try: + result = self.forecast_univariate(series, config) + results.append(result) + except Exception as e: + logger.error( + f"Failed to forecast series '{series.series_id}': {e}", + exc_info=True + ) + raise + + logger.info(f"Multi-series forecast completed: {len(results)} series processed") + return results diff --git a/app/infrastructure/__init__.py b/app/infrastructure/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/infrastructure/config/__init__.py b/app/infrastructure/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/infrastructure/config/settings.py b/app/infrastructure/config/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..f14ea62541af8f5dc0e71fe66a2d1765df1b28cd --- /dev/null +++ b/app/infrastructure/config/settings.py @@ -0,0 +1,75 @@ +""" +Configuración centralizada del proyecto usando Pydantic Settings. + +Este módulo implementa el patrón Singleton para la configuración, +cumpliendo con el principio SRP (Single Responsibility Principle). +""" + +from typing import List +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """ + Configuración centralizada de la aplicación. + + Todas las configuraciones se cargan desde variables de entorno + o valores por defecto. Esto permite fácil configuración en + diferentes ambientes (dev, staging, production). + """ + + # API Configuration + api_title: str = "Chronos-2 Forecasting API" + api_version: str = "3.0.0" + api_description: str = ( + "API de pronósticos con Chronos-2 + Excel Add-in. " + "Refactorizado con Clean Architecture y principios SOLID." + ) + api_port: int = 8000 + + # Model Configuration + model_id: str = "amazon/chronos-2" + device_map: str = "cpu" + + # CORS Configuration + cors_origins: List[str] = [ + "https://localhost:3000", + "https://localhost:3001", + "https://ttzzs-chronos2-excel-forecasting-api.hf.space", + "*" # Permitir todos los orígenes para Office Add-ins + ] + + # Logging + log_level: str = "INFO" + log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + + # Static Files + static_dir: str = "static" + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + extra="ignore" + ) + + +# Singleton instance +_settings_instance = None + + +def get_settings() -> Settings: + """ + Obtiene la instancia singleton de Settings. + + Returns: + Settings: Instancia de configuración + """ + global _settings_instance + if _settings_instance is None: + _settings_instance = Settings() + return _settings_instance + + +# Exportar instancia por defecto para uso directo +settings = get_settings() diff --git a/app/infrastructure/ml/__init__.py b/app/infrastructure/ml/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/infrastructure/ml/chronos_model.py b/app/infrastructure/ml/chronos_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b4acd541431935570198cdce540cf0682c81d5b9 --- /dev/null +++ b/app/infrastructure/ml/chronos_model.py @@ -0,0 +1,129 @@ +""" +Implementación concreta del modelo Chronos-2. + +Este módulo implementa la interfaz IForecastModel usando Chronos2Pipeline, +aplicando el principio DIP (Dependency Inversion Principle). +""" + +from typing import List, Dict, Any +import pandas as pd +from chronos import Chronos2Pipeline + +from app.domain.interfaces.forecast_model import IForecastModel +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class ChronosModel(IForecastModel): + """ + Implementación concreta de IForecastModel usando Chronos-2. + + Esta clase puede ser reemplazada por otra implementación + (Prophet, ARIMA, etc.) sin modificar el resto del código, + gracias al principio DIP. + + Attributes: + model_id: ID del modelo en HuggingFace + device_map: Dispositivo para inferencia (cpu/cuda) + pipeline: Pipeline de Chronos2 + """ + + def __init__(self, model_id: str = "amazon/chronos-2", device_map: str = "cpu"): + """ + Inicializa el modelo Chronos-2. + + Args: + model_id: ID del modelo en HuggingFace + device_map: Dispositivo para inferencia (cpu/cuda) + """ + self.model_id = model_id + self.device_map = device_map + + logger.info(f"Loading Chronos model: {model_id} on {device_map}") + + try: + self.pipeline = Chronos2Pipeline.from_pretrained( + model_id, + device_map=device_map + ) + logger.info("Chronos model loaded successfully") + except Exception as e: + logger.error(f"Failed to load Chronos model: {e}") + raise + + def predict( + self, + context_df: pd.DataFrame, + prediction_length: int, + quantile_levels: List[float], + **kwargs + ) -> pd.DataFrame: + """ + Genera pronósticos probabilísticos usando Chronos-2. + + Args: + context_df: DataFrame con columnas [id, timestamp, target] + prediction_length: Horizonte de predicción + quantile_levels: Cuantiles a calcular (ej: [0.1, 0.5, 0.9]) + **kwargs: Argumentos adicionales para el pipeline + + Returns: + DataFrame con pronósticos y cuantiles + + Raises: + ValueError: Si el context_df no tiene el formato correcto + RuntimeError: Si falla la inferencia + """ + logger.debug( + f"Predicting {prediction_length} steps with " + f"{len(quantile_levels)} quantiles" + ) + + # Validar formato del DataFrame + required_cols = {"id", "timestamp", "target"} + if not required_cols.issubset(context_df.columns): + raise ValueError( + f"context_df debe tener columnas: {required_cols}. " + f"Encontradas: {set(context_df.columns)}" + ) + + try: + # Realizar predicción + pred_df = self.pipeline.predict_df( + context_df, + prediction_length=prediction_length, + quantile_levels=quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + **kwargs + ) + + # Ordenar resultado + result = pred_df.sort_values(["id", "timestamp"]) + + logger.debug(f"Prediction completed: {len(result)} rows") + return result + + except Exception as e: + logger.error(f"Prediction failed: {e}") + raise RuntimeError(f"Error en predicción: {e}") from e + + def get_model_info(self) -> Dict[str, Any]: + """ + Retorna información del modelo. + + Returns: + Diccionario con información del modelo + """ + return { + "type": "Chronos2", + "model_id": self.model_id, + "device": self.device_map, + "provider": "Amazon", + "version": "2.0" + } + + def __repr__(self) -> str: + return f"ChronosModel(model_id='{self.model_id}', device='{self.device_map}')" diff --git a/app/infrastructure/ml/model_factory.py b/app/infrastructure/ml/model_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..dae6046267aa2e9cc3371d4cfa13f3afd125d1d4 --- /dev/null +++ b/app/infrastructure/ml/model_factory.py @@ -0,0 +1,172 @@ +""" +Factory para crear modelos de forecasting. + +Este módulo implementa el patrón Factory aplicando OCP +(Open/Closed Principle) - abierto para extensión, cerrado para modificación. +""" + +from typing import Dict, Type, List + +from app.domain.interfaces.forecast_model import IForecastModel +from app.infrastructure.ml.chronos_model import ChronosModel +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class ModelFactory: + """ + Factory para crear modelos de forecasting. + + Permite agregar nuevos modelos sin modificar código existente, + aplicando el principio OCP (Open/Closed Principle). + + Ejemplo de uso: + >>> model = ModelFactory.create("chronos2", model_id="amazon/chronos-2") + >>> # Futuro: model = ModelFactory.create("prophet", ...) + """ + + # Registro de modelos disponibles + _models: Dict[str, Type[IForecastModel]] = { + "chronos2": ChronosModel, + # Futuro: Agregar sin modificar código existente + # "prophet": ProphetModel, + # "arima": ARIMAModel, + # "custom": CustomModel, + } + + @classmethod + def create( + cls, + model_type: str, + **kwargs + ) -> IForecastModel: + """ + Crea una instancia de modelo de forecasting. + + Args: + model_type: Tipo de modelo ("chronos2", "prophet", etc.) + **kwargs: Parámetros específicos del modelo + + Returns: + Instancia de IForecastModel + + Raises: + ValueError: Si el tipo de modelo no existe + + Example: + >>> model = ModelFactory.create( + ... "chronos2", + ... model_id="amazon/chronos-2", + ... device_map="cpu" + ... ) + """ + if model_type not in cls._models: + available = ", ".join(cls._models.keys()) + raise ValueError( + f"Unknown model type: '{model_type}'. " + f"Available: {available}" + ) + + model_class = cls._models[model_type] + logger.info(f"Creating model: {model_type}") + + try: + instance = model_class(**kwargs) + logger.info(f"Model created: {instance}") + return instance + except Exception as e: + logger.error(f"Failed to create model '{model_type}': {e}") + raise + + @classmethod + def register_model( + cls, + name: str, + model_class: Type[IForecastModel] + ) -> None: + """ + Registra un nuevo tipo de modelo (OCP - extensión). + + Permite agregar nuevos modelos dinámicamente sin modificar + el código de la factory. + + Args: + name: Nombre del modelo + model_class: Clase que implementa IForecastModel + + Raises: + TypeError: Si model_class no implementa IForecastModel + ValueError: Si el nombre ya está registrado + + Example: + >>> class MyCustomModel(IForecastModel): + ... pass + >>> ModelFactory.register_model("custom", MyCustomModel) + """ + # Validar que implementa la interfaz + if not issubclass(model_class, IForecastModel): + raise TypeError( + f"{model_class.__name__} debe implementar IForecastModel" + ) + + # Validar que no esté duplicado + if name in cls._models: + raise ValueError( + f"Model '{name}' ya está registrado. " + f"Use un nombre diferente o llame a unregister_model primero." + ) + + cls._models[name] = model_class + logger.info(f"Registered new model: {name} -> {model_class.__name__}") + + @classmethod + def unregister_model(cls, name: str) -> None: + """ + Elimina un modelo del registro. + + Args: + name: Nombre del modelo a eliminar + + Raises: + ValueError: Si el modelo no existe + """ + if name not in cls._models: + raise ValueError(f"Model '{name}' no está registrado") + + del cls._models[name] + logger.info(f"Unregistered model: {name}") + + @classmethod + def list_available_models(cls) -> List[str]: + """ + Lista todos los modelos disponibles. + + Returns: + Lista de nombres de modelos + """ + return list(cls._models.keys()) + + @classmethod + def get_model_info(cls, model_type: str) -> Dict[str, str]: + """ + Obtiene información sobre un tipo de modelo. + + Args: + model_type: Nombre del tipo de modelo + + Returns: + Diccionario con información del modelo + + Raises: + ValueError: Si el modelo no existe + """ + if model_type not in cls._models: + raise ValueError(f"Model '{model_type}' no está registrado") + + model_class = cls._models[model_type] + return { + "name": model_type, + "class": model_class.__name__, + "module": model_class.__module__ + } diff --git a/app/main_from_hf_space.py b/app/main_from_hf_space.py new file mode 100644 index 0000000000000000000000000000000000000000..3b618395237ea7a4e2273c43eea3fb356d6b043a --- /dev/null +++ b/app/main_from_hf_space.py @@ -0,0 +1,681 @@ +import os +from typing import List, Dict, Optional +import json + +import numpy as np +import pandas as pd +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field +from huggingface_hub import InferenceClient + + +# ========================= +# Configuración +# ========================= + +HF_TOKEN = os.getenv("HF_TOKEN") +MODEL_ID = os.getenv("CHRONOS_MODEL_ID", "amazon/chronos-t5-large") + +app = FastAPI( + title="Chronos-2 Forecasting API (HF Inference)", + description=( + "API de pronósticos usando Chronos-2 via Hugging Face Inference API. " + "Compatible con Excel Add-in." + ), + version="1.0.0", +) + +# Configurar CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # En producción, especificar dominios permitidos + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Cliente de HF Inference +if not HF_TOKEN: + print("⚠️ WARNING: HF_TOKEN no configurado. La API puede no funcionar correctamente.") + print(" Configura HF_TOKEN en las variables de entorno del Space.") + client = None +else: + client = InferenceClient(token=HF_TOKEN) + + +# ========================= +# Modelos Pydantic +# ========================= + +class UnivariateSeries(BaseModel): + values: List[float] + + +class ForecastUnivariateRequest(BaseModel): + series: UnivariateSeries + prediction_length: int = Field(7, description="Número de pasos a predecir") + quantile_levels: Optional[List[float]] = Field( + default=[0.1, 0.5, 0.9], + description="Cuantiles para intervalos de confianza" + ) + freq: str = Field("D", description="Frecuencia temporal (D, W, M, etc.)") + + +class ForecastUnivariateResponse(BaseModel): + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + + +class AnomalyDetectionRequest(BaseModel): + context: UnivariateSeries + recent_observed: List[float] + prediction_length: int = 7 + quantile_low: float = 0.05 + quantile_high: float = 0.95 + + +class AnomalyPoint(BaseModel): + index: int + value: float + predicted_median: float + lower: float + upper: float + is_anomaly: bool + + +class AnomalyDetectionResponse(BaseModel): + anomalies: List[AnomalyPoint] + + +class BacktestRequest(BaseModel): + series: UnivariateSeries + prediction_length: int = 7 + test_length: int = 28 + + +class BacktestMetrics(BaseModel): + mae: float + mape: float + rmse: float + + +class BacktestResponse(BaseModel): + metrics: BacktestMetrics + forecast_median: List[float] + forecast_timestamps: List[str] + actuals: List[float] + + +# Modelos para Multi-Series +class MultiSeriesItem(BaseModel): + series_id: str + values: List[float] + + +class ForecastMultiIdRequest(BaseModel): + series_list: List[MultiSeriesItem] + prediction_length: int = Field(7, description="Número de pasos a predecir") + quantile_levels: Optional[List[float]] = Field( + default=[0.1, 0.5, 0.9], + description="Cuantiles para intervalos de confianza" + ) + freq: str = Field("D", description="Frecuencia temporal (D, W, M, etc.)") + + +class ForecastMultiIdResponse(BaseModel): + forecasts: List[ForecastUnivariateResponse] + + +# Modelos para Covariates +class CovariateData(BaseModel): + values: List[float] + name: str = Field(..., description="Nombre de la covariable") + + +class ForecastWithCovariatesRequest(BaseModel): + target_series: UnivariateSeries + covariates_history: List[CovariateData] + covariates_future: List[CovariateData] + prediction_length: int = Field(7, description="Número de pasos a predecir") + quantile_levels: Optional[List[float]] = Field( + default=[0.1, 0.5, 0.9], + description="Cuantiles para intervalos de confianza" + ) + freq: str = Field("D", description="Frecuencia temporal") + + +# Modelos para Scenarios +class ScenarioData(BaseModel): + scenario_name: str + covariate_values: Dict[str, List[float]] + + +class GenerateScenariosRequest(BaseModel): + target_series: UnivariateSeries + scenarios: List[ScenarioData] + prediction_length: int = Field(7, description="Número de pasos a predecir") + freq: str = Field("D", description="Frecuencia temporal") + + +class ScenarioForecast(BaseModel): + scenario_name: str + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + + +class GenerateScenariosResponse(BaseModel): + scenarios: List[ScenarioForecast] + + +# Modelos para Multivariate +class MultivariateSeries(BaseModel): + series_name: str + values: List[float] + + +class ForecastMultivariateRequest(BaseModel): + series_list: List[MultivariateSeries] + prediction_length: int = Field(7, description="Número de pasos a predecir") + quantile_levels: Optional[List[float]] = Field( + default=[0.1, 0.5, 0.9], + description="Cuantiles para intervalos de confianza" + ) + freq: str = Field("D", description="Frecuencia temporal") + + +class MultivariateForecast(BaseModel): + series_name: str + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + + +class ForecastMultivariateResponse(BaseModel): + forecasts: List[MultivariateForecast] + + +# ========================= +# Función auxiliar para llamar a HF Inference +# ========================= + +def call_chronos_inference(series: List[float], prediction_length: int) -> Dict: + """ + Llama a la API de Hugging Face Inference para Chronos. + Retorna un diccionario con las predicciones. + """ + if client is None: + raise HTTPException( + status_code=503, + detail="HF_TOKEN no configurado. Contacta al administrador del servicio." + ) + + try: + # Intentar usando el endpoint específico de time series + import requests + + url = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}" + headers = {"Authorization": f"Bearer {HF_TOKEN}"} + + payload = { + "inputs": series, + "parameters": { + "prediction_length": prediction_length, + "num_samples": 100 # Para obtener cuantiles + } + } + + response = requests.post(url, headers=headers, json=payload, timeout=60) + + if response.status_code == 503: + raise HTTPException( + status_code=503, + detail="El modelo está cargando. Por favor, intenta de nuevo en 30-60 segundos." + ) + elif response.status_code != 200: + raise HTTPException( + status_code=response.status_code, + detail=f"Error de la API de HuggingFace: {response.text}" + ) + + result = response.json() + return result + + except requests.exceptions.Timeout: + raise HTTPException( + status_code=504, + detail="Timeout al comunicarse con HuggingFace API. El modelo puede estar cargando." + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Error inesperado: {str(e)}" + ) + + +def process_chronos_output(raw_output: Dict, prediction_length: int) -> Dict: + """ + Procesa la salida de Chronos para extraer mediana y cuantiles. + """ + # La API de Chronos puede devolver diferentes formatos + # Intentamos adaptarnos a ellos + + if isinstance(raw_output, list): + # Si es una lista de valores, asumimos que es la predicción media + median = raw_output[:prediction_length] + return { + "median": median, + "quantiles": { + "0.1": median, # Sin cuantiles, usar median + "0.5": median, + "0.9": median + } + } + + # Si tiene estructura más compleja, intentar extraer + if "forecast" in raw_output: + forecast = raw_output["forecast"] + if "median" in forecast: + median = forecast["median"][:prediction_length] + else: + median = forecast.get("mean", [0] * prediction_length)[:prediction_length] + + quantiles = forecast.get("quantiles", {}) + return { + "median": median, + "quantiles": quantiles + } + + # Formato por defecto + return { + "median": [0] * prediction_length, + "quantiles": { + "0.1": [0] * prediction_length, + "0.5": [0] * prediction_length, + "0.9": [0] * prediction_length + } + } + + +# ========================= +# Endpoints +# ========================= + +@app.get("/") +def root(): + """Información básica de la API""" + return { + "name": "Chronos-2 Forecasting API", + "version": "1.0.0", + "model": MODEL_ID, + "status": "running", + "docs": "/docs", + "health": "/health" + } + + +@app.get("/health") +def health(): + """Health check del servicio""" + return { + "status": "ok" if HF_TOKEN else "warning", + "model_id": MODEL_ID, + "hf_token_configured": HF_TOKEN is not None, + "message": "Ready" if HF_TOKEN else "HF_TOKEN not configured" + } + + +@app.post("/forecast_univariate", response_model=ForecastUnivariateResponse) +def forecast_univariate(req: ForecastUnivariateRequest): + """ + Pronóstico para una serie temporal univariada. + + Compatible con el Excel Add-in. + """ + values = req.series.values + n = len(values) + + if n == 0: + raise HTTPException(status_code=400, detail="La serie no puede estar vacía.") + + if n < 3: + raise HTTPException( + status_code=400, + detail="La serie debe tener al menos 3 puntos históricos." + ) + + # Llamar a la API de HuggingFace + raw_output = call_chronos_inference(values, req.prediction_length) + + # Procesar la salida + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + return ForecastUnivariateResponse( + timestamps=timestamps, + median=processed["median"], + quantiles=processed["quantiles"] + ) + + +@app.post("/detect_anomalies", response_model=AnomalyDetectionResponse) +def detect_anomalies(req: AnomalyDetectionRequest): + """ + Detecta anomalías comparando valores observados con predicciones. + """ + n_hist = len(req.context.values) + + if n_hist == 0: + raise HTTPException(status_code=400, detail="El contexto no puede estar vacío.") + + if len(req.recent_observed) != req.prediction_length: + raise HTTPException( + status_code=400, + detail="recent_observed debe tener la misma longitud que prediction_length." + ) + + # Hacer predicción + raw_output = call_chronos_inference(req.context.values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Comparar con valores observados + anomalies: List[AnomalyPoint] = [] + + median = processed["median"] + # Intentar obtener cuantiles o usar aproximaciones + q_low = processed["quantiles"].get(str(req.quantile_low), median) + q_high = processed["quantiles"].get(str(req.quantile_high), median) + + for i, obs in enumerate(req.recent_observed): + if i < len(median): + lower = q_low[i] if i < len(q_low) else median[i] * 0.8 + upper = q_high[i] if i < len(q_high) else median[i] * 1.2 + predicted = median[i] + is_anom = (obs < lower) or (obs > upper) + + anomalies.append( + AnomalyPoint( + index=i, + value=obs, + predicted_median=predicted, + lower=lower, + upper=upper, + is_anomaly=is_anom, + ) + ) + + return AnomalyDetectionResponse(anomalies=anomalies) + + +@app.post("/backtest_simple", response_model=BacktestResponse) +def backtest_simple(req: BacktestRequest): + """ + Backtesting simple: divide la serie en train/test y evalúa métricas. + """ + values = np.array(req.series.values, dtype=float) + n = len(values) + + if n <= req.test_length: + raise HTTPException( + status_code=400, + detail="La serie debe ser más larga que test_length." + ) + + # Dividir en train/test + train = values[: n - req.test_length].tolist() + test = values[n - req.test_length :].tolist() + + # Hacer predicción + raw_output = call_chronos_inference(train, req.test_length) + processed = process_chronos_output(raw_output, req.test_length) + + forecast = np.array(processed["median"], dtype=float) + test_arr = np.array(test, dtype=float) + + # Calcular métricas + mae = float(np.mean(np.abs(test_arr - forecast))) + rmse = float(np.sqrt(np.mean((test_arr - forecast) ** 2))) + + eps = 1e-8 + mape = float(np.mean(np.abs((test_arr - forecast) / (test_arr + eps)))) * 100.0 + + timestamps = [f"test_t{i+1}" for i in range(req.test_length)] + + metrics = BacktestMetrics(mae=mae, mape=mape, rmse=rmse) + + return BacktestResponse( + metrics=metrics, + forecast_median=forecast.tolist(), + forecast_timestamps=timestamps, + actuals=test, + ) + + +# ========================= +# Endpoints simplificados para testing +# ========================= + +@app.post("/simple_forecast") +def simple_forecast(series: List[float], prediction_length: int = 7): + """ + Endpoint simplificado para testing rápido. + """ + if not series: + raise HTTPException(status_code=400, detail="Serie vacía") + + raw_output = call_chronos_inference(series, prediction_length) + processed = process_chronos_output(raw_output, prediction_length) + + return { + "input_series": series, + "prediction_length": prediction_length, + "forecast": processed["median"], + "model": MODEL_ID + } + + +# ========================= +# NUEVOS ENDPOINTS IMPLEMENTADOS +# ========================= + +@app.post("/forecast_multi_id", response_model=ForecastMultiIdResponse) +def forecast_multi_id(req: ForecastMultiIdRequest): + """ + Pronóstico para múltiples series temporales independientes. + Cada serie se procesa por separado y devuelve su pronóstico. + + Útil para pronósticos de múltiples productos, ubicaciones, etc. + """ + if not req.series_list: + raise HTTPException(status_code=400, detail="La lista de series no puede estar vacía.") + + forecasts = [] + + for series_item in req.series_list: + values = series_item.values + + if len(values) < 3: + raise HTTPException( + status_code=400, + detail=f"La serie '{series_item.series_id}' debe tener al menos 3 puntos." + ) + + # Hacer predicción para esta serie + raw_output = call_chronos_inference(values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + # Agregar a la lista de resultados + forecasts.append( + ForecastUnivariateResponse( + timestamps=timestamps, + median=processed["median"], + quantiles=processed["quantiles"] + ) + ) + + return ForecastMultiIdResponse(forecasts=forecasts) + + +@app.post("/forecast_with_covariates") +def forecast_with_covariates(req: ForecastWithCovariatesRequest): + """ + Pronóstico con variables covariables (exógenas). + + NOTA: Chronos-2 es un modelo univariado puro. Esta implementación + es una aproximación que usa las covariables para ajustar el contexto, + pero no es un modelo multivariado verdadero. + + Para pronósticos reales con covariables, considera usar modelos como + TimesFM, Temporal Fusion Transformer, o Prophet. + """ + target_values = req.target_series.values + + if len(target_values) < 3: + raise HTTPException( + status_code=400, + detail="La serie objetivo debe tener al menos 3 puntos." + ) + + # Verificar que las covariables tengan la longitud correcta + for cov in req.covariates_history: + if len(cov.values) != len(target_values): + raise HTTPException( + status_code=400, + detail=f"La covariable '{cov.name}' debe tener la misma longitud que la serie objetivo." + ) + + for cov in req.covariates_future: + if len(cov.values) != req.prediction_length: + raise HTTPException( + status_code=400, + detail=f"La covariable futura '{cov.name}' debe tener longitud = prediction_length." + ) + + # APROXIMACIÓN: Usar solo la serie objetivo + # En un modelo verdadero con covariables, estas se integrarían en el modelo + raw_output = call_chronos_inference(target_values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + # Nota: Las covariables se devuelven para referencia pero no afectan el forecast + return { + "timestamps": timestamps, + "median": processed["median"], + "quantiles": processed["quantiles"], + "note": "Chronos-2 no usa covariables nativamente. Este forecast se basa solo en la serie objetivo.", + "covariates_used": [cov.name for cov in req.covariates_history], + "covariates_future": [cov.name for cov in req.covariates_future] + } + + +@app.post("/generate_scenarios", response_model=GenerateScenariosResponse) +def generate_scenarios(req: GenerateScenariosRequest): + """ + Genera pronósticos para múltiples escenarios "what-if". + + Cada escenario representa una configuración diferente de covariables futuras. + Útil para análisis de sensibilidad y planificación. + + NOTA: Como Chronos-2 no usa covariables, todos los escenarios + producirán el mismo forecast base. Esta funcionalidad es más útil + con modelos que soporten covariables. + """ + target_values = req.target_series.values + + if len(target_values) < 3: + raise HTTPException( + status_code=400, + detail="La serie objetivo debe tener al menos 3 puntos." + ) + + if not req.scenarios: + raise HTTPException( + status_code=400, + detail="Debe proporcionar al menos un escenario." + ) + + # Hacer una predicción base + raw_output = call_chronos_inference(target_values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + scenarios_output = [] + + for scenario in req.scenarios: + # En un modelo real con covariables, aquí se usarían los valores + # de scenario.covariate_values para generar diferentes forecasts + + # Por ahora, todos los escenarios usan el mismo forecast base + scenarios_output.append( + ScenarioForecast( + scenario_name=scenario.scenario_name, + timestamps=timestamps, + median=processed["median"], + quantiles=processed["quantiles"] + ) + ) + + return GenerateScenariosResponse(scenarios=scenarios_output) + + +@app.post("/forecast_multivariate", response_model=ForecastMultivariateResponse) +def forecast_multivariate(req: ForecastMultivariateRequest): + """ + Pronóstico multivariado: predice múltiples series relacionadas. + + NOTA: Chronos-2 es fundamentalmente univariado. Esta implementación + procesa cada serie independientemente. Para pronósticos multivariados + verdaderos (que capturan correlaciones entre series), usa modelos como + Temporal Fusion Transformer, DeepAR, o Vector Autoregression (VAR). + """ + if not req.series_list: + raise HTTPException( + status_code=400, + detail="La lista de series no puede estar vacía." + ) + + forecasts = [] + + for series_item in req.series_list: + values = series_item.values + + if len(values) < 3: + raise HTTPException( + status_code=400, + detail=f"La serie '{series_item.series_name}' debe tener al menos 3 puntos." + ) + + # Procesar cada serie independientemente + raw_output = call_chronos_inference(values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + forecasts.append( + MultivariateForecast( + series_name=series_item.series_name, + timestamps=timestamps, + median=processed["median"], + quantiles=processed["quantiles"] + ) + ) + + return ForecastMultivariateResponse(forecasts=forecasts) + + +if __name__ == "__main__": + import uvicorn + port = int(os.getenv("PORT", 7860)) + uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/app/main_hf.py b/app/main_hf.py new file mode 100644 index 0000000000000000000000000000000000000000..acddce7f8a35af18c0a3c3e246d0dc5aa8e315d5 --- /dev/null +++ b/app/main_hf.py @@ -0,0 +1,681 @@ +import os +from typing import List, Dict, Optional +import json + +import numpy as np +import pandas as pd +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field +from huggingface_hub import InferenceClient + + +# ========================= +# Configuración +# ========================= + +HF_TOKEN = os.getenv("HF_TOKEN") +MODEL_ID = os.getenv("CHRONOS_MODEL_ID", "amazon/chronos-2") + +app = FastAPI( + title="Chronos-2 Forecasting API (HF Inference)", + description=( + "API de pronósticos usando Chronos-2 via Hugging Face Inference API. " + "Compatible con Excel Add-in." + ), + version="1.0.0", +) + +# Configurar CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # En producción, especificar dominios permitidos + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Cliente de HF Inference +if not HF_TOKEN: + print("⚠️ WARNING: HF_TOKEN no configurado. La API puede no funcionar correctamente.") + print(" Configura HF_TOKEN en las variables de entorno del Space.") + client = None +else: + client = InferenceClient(token=HF_TOKEN) + + +# ========================= +# Modelos Pydantic +# ========================= + +class UnivariateSeries(BaseModel): + values: List[float] + + +class ForecastUnivariateRequest(BaseModel): + series: UnivariateSeries + prediction_length: int = Field(7, description="Número de pasos a predecir") + quantile_levels: Optional[List[float]] = Field( + default=[0.1, 0.5, 0.9], + description="Cuantiles para intervalos de confianza" + ) + freq: str = Field("D", description="Frecuencia temporal (D, W, M, etc.)") + + +class ForecastUnivariateResponse(BaseModel): + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + + +class AnomalyDetectionRequest(BaseModel): + context: UnivariateSeries + recent_observed: List[float] + prediction_length: int = 7 + quantile_low: float = 0.05 + quantile_high: float = 0.95 + + +class AnomalyPoint(BaseModel): + index: int + value: float + predicted_median: float + lower: float + upper: float + is_anomaly: bool + + +class AnomalyDetectionResponse(BaseModel): + anomalies: List[AnomalyPoint] + + +class BacktestRequest(BaseModel): + series: UnivariateSeries + prediction_length: int = 7 + test_length: int = 28 + + +class BacktestMetrics(BaseModel): + mae: float + mape: float + rmse: float + + +class BacktestResponse(BaseModel): + metrics: BacktestMetrics + forecast_median: List[float] + forecast_timestamps: List[str] + actuals: List[float] + + +# Modelos para Multi-Series +class MultiSeriesItem(BaseModel): + series_id: str + values: List[float] + + +class ForecastMultiIdRequest(BaseModel): + series_list: List[MultiSeriesItem] + prediction_length: int = Field(7, description="Número de pasos a predecir") + quantile_levels: Optional[List[float]] = Field( + default=[0.1, 0.5, 0.9], + description="Cuantiles para intervalos de confianza" + ) + freq: str = Field("D", description="Frecuencia temporal (D, W, M, etc.)") + + +class ForecastMultiIdResponse(BaseModel): + forecasts: List[ForecastUnivariateResponse] + + +# Modelos para Covariates +class CovariateData(BaseModel): + values: List[float] + name: str = Field(..., description="Nombre de la covariable") + + +class ForecastWithCovariatesRequest(BaseModel): + target_series: UnivariateSeries + covariates_history: List[CovariateData] + covariates_future: List[CovariateData] + prediction_length: int = Field(7, description="Número de pasos a predecir") + quantile_levels: Optional[List[float]] = Field( + default=[0.1, 0.5, 0.9], + description="Cuantiles para intervalos de confianza" + ) + freq: str = Field("D", description="Frecuencia temporal") + + +# Modelos para Scenarios +class ScenarioData(BaseModel): + scenario_name: str + covariate_values: Dict[str, List[float]] + + +class GenerateScenariosRequest(BaseModel): + target_series: UnivariateSeries + scenarios: List[ScenarioData] + prediction_length: int = Field(7, description="Número de pasos a predecir") + freq: str = Field("D", description="Frecuencia temporal") + + +class ScenarioForecast(BaseModel): + scenario_name: str + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + + +class GenerateScenariosResponse(BaseModel): + scenarios: List[ScenarioForecast] + + +# Modelos para Multivariate +class MultivariateSeries(BaseModel): + series_name: str + values: List[float] + + +class ForecastMultivariateRequest(BaseModel): + series_list: List[MultivariateSeries] + prediction_length: int = Field(7, description="Número de pasos a predecir") + quantile_levels: Optional[List[float]] = Field( + default=[0.1, 0.5, 0.9], + description="Cuantiles para intervalos de confianza" + ) + freq: str = Field("D", description="Frecuencia temporal") + + +class MultivariateForecast(BaseModel): + series_name: str + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + + +class ForecastMultivariateResponse(BaseModel): + forecasts: List[MultivariateForecast] + + +# ========================= +# Función auxiliar para llamar a HF Inference +# ========================= + +def call_chronos_inference(series: List[float], prediction_length: int) -> Dict: + """ + Llama a la API de Hugging Face Inference para Chronos. + Retorna un diccionario con las predicciones. + """ + if client is None: + raise HTTPException( + status_code=503, + detail="HF_TOKEN no configurado. Contacta al administrador del servicio." + ) + + try: + # Intentar usando el endpoint específico de time series + import requests + + url = f"https://api-inference.huggingface.co/models/{MODEL_ID}" + headers = {"Authorization": f"Bearer {HF_TOKEN}"} + + payload = { + "inputs": series, + "parameters": { + "prediction_length": prediction_length, + "num_samples": 100 # Para obtener cuantiles + } + } + + response = requests.post(url, headers=headers, json=payload, timeout=60) + + if response.status_code == 503: + raise HTTPException( + status_code=503, + detail="El modelo está cargando. Por favor, intenta de nuevo en 30-60 segundos." + ) + elif response.status_code != 200: + raise HTTPException( + status_code=response.status_code, + detail=f"Error de la API de HuggingFace: {response.text}" + ) + + result = response.json() + return result + + except requests.exceptions.Timeout: + raise HTTPException( + status_code=504, + detail="Timeout al comunicarse con HuggingFace API. El modelo puede estar cargando." + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Error inesperado: {str(e)}" + ) + + +def process_chronos_output(raw_output: Dict, prediction_length: int) -> Dict: + """ + Procesa la salida de Chronos para extraer mediana y cuantiles. + """ + # La API de Chronos puede devolver diferentes formatos + # Intentamos adaptarnos a ellos + + if isinstance(raw_output, list): + # Si es una lista de valores, asumimos que es la predicción media + median = raw_output[:prediction_length] + return { + "median": median, + "quantiles": { + "0.1": median, # Sin cuantiles, usar median + "0.5": median, + "0.9": median + } + } + + # Si tiene estructura más compleja, intentar extraer + if "forecast" in raw_output: + forecast = raw_output["forecast"] + if "median" in forecast: + median = forecast["median"][:prediction_length] + else: + median = forecast.get("mean", [0] * prediction_length)[:prediction_length] + + quantiles = forecast.get("quantiles", {}) + return { + "median": median, + "quantiles": quantiles + } + + # Formato por defecto + return { + "median": [0] * prediction_length, + "quantiles": { + "0.1": [0] * prediction_length, + "0.5": [0] * prediction_length, + "0.9": [0] * prediction_length + } + } + + +# ========================= +# Endpoints +# ========================= + +@app.get("/") +def root(): + """Información básica de la API""" + return { + "name": "Chronos-2 Forecasting API", + "version": "1.0.0", + "model": MODEL_ID, + "status": "running", + "docs": "/docs", + "health": "/health" + } + + +@app.get("/health") +def health(): + """Health check del servicio""" + return { + "status": "ok" if HF_TOKEN else "warning", + "model_id": MODEL_ID, + "hf_token_configured": HF_TOKEN is not None, + "message": "Ready" if HF_TOKEN else "HF_TOKEN not configured" + } + + +@app.post("/forecast_univariate", response_model=ForecastUnivariateResponse) +def forecast_univariate(req: ForecastUnivariateRequest): + """ + Pronóstico para una serie temporal univariada. + + Compatible con el Excel Add-in. + """ + values = req.series.values + n = len(values) + + if n == 0: + raise HTTPException(status_code=400, detail="La serie no puede estar vacía.") + + if n < 3: + raise HTTPException( + status_code=400, + detail="La serie debe tener al menos 3 puntos históricos." + ) + + # Llamar a la API de HuggingFace + raw_output = call_chronos_inference(values, req.prediction_length) + + # Procesar la salida + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + return ForecastUnivariateResponse( + timestamps=timestamps, + median=processed["median"], + quantiles=processed["quantiles"] + ) + + +@app.post("/detect_anomalies", response_model=AnomalyDetectionResponse) +def detect_anomalies(req: AnomalyDetectionRequest): + """ + Detecta anomalías comparando valores observados con predicciones. + """ + n_hist = len(req.context.values) + + if n_hist == 0: + raise HTTPException(status_code=400, detail="El contexto no puede estar vacío.") + + if len(req.recent_observed) != req.prediction_length: + raise HTTPException( + status_code=400, + detail="recent_observed debe tener la misma longitud que prediction_length." + ) + + # Hacer predicción + raw_output = call_chronos_inference(req.context.values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Comparar con valores observados + anomalies: List[AnomalyPoint] = [] + + median = processed["median"] + # Intentar obtener cuantiles o usar aproximaciones + q_low = processed["quantiles"].get(str(req.quantile_low), median) + q_high = processed["quantiles"].get(str(req.quantile_high), median) + + for i, obs in enumerate(req.recent_observed): + if i < len(median): + lower = q_low[i] if i < len(q_low) else median[i] * 0.8 + upper = q_high[i] if i < len(q_high) else median[i] * 1.2 + predicted = median[i] + is_anom = (obs < lower) or (obs > upper) + + anomalies.append( + AnomalyPoint( + index=i, + value=obs, + predicted_median=predicted, + lower=lower, + upper=upper, + is_anomaly=is_anom, + ) + ) + + return AnomalyDetectionResponse(anomalies=anomalies) + + +@app.post("/backtest_simple", response_model=BacktestResponse) +def backtest_simple(req: BacktestRequest): + """ + Backtesting simple: divide la serie en train/test y evalúa métricas. + """ + values = np.array(req.series.values, dtype=float) + n = len(values) + + if n <= req.test_length: + raise HTTPException( + status_code=400, + detail="La serie debe ser más larga que test_length." + ) + + # Dividir en train/test + train = values[: n - req.test_length].tolist() + test = values[n - req.test_length :].tolist() + + # Hacer predicción + raw_output = call_chronos_inference(train, req.test_length) + processed = process_chronos_output(raw_output, req.test_length) + + forecast = np.array(processed["median"], dtype=float) + test_arr = np.array(test, dtype=float) + + # Calcular métricas + mae = float(np.mean(np.abs(test_arr - forecast))) + rmse = float(np.sqrt(np.mean((test_arr - forecast) ** 2))) + + eps = 1e-8 + mape = float(np.mean(np.abs((test_arr - forecast) / (test_arr + eps)))) * 100.0 + + timestamps = [f"test_t{i+1}" for i in range(req.test_length)] + + metrics = BacktestMetrics(mae=mae, mape=mape, rmse=rmse) + + return BacktestResponse( + metrics=metrics, + forecast_median=forecast.tolist(), + forecast_timestamps=timestamps, + actuals=test, + ) + + +# ========================= +# Endpoints simplificados para testing +# ========================= + +@app.post("/simple_forecast") +def simple_forecast(series: List[float], prediction_length: int = 7): + """ + Endpoint simplificado para testing rápido. + """ + if not series: + raise HTTPException(status_code=400, detail="Serie vacía") + + raw_output = call_chronos_inference(series, prediction_length) + processed = process_chronos_output(raw_output, prediction_length) + + return { + "input_series": series, + "prediction_length": prediction_length, + "forecast": processed["median"], + "model": MODEL_ID + } + + +# ========================= +# NUEVOS ENDPOINTS IMPLEMENTADOS +# ========================= + +@app.post("/forecast_multi_id", response_model=ForecastMultiIdResponse) +def forecast_multi_id(req: ForecastMultiIdRequest): + """ + Pronóstico para múltiples series temporales independientes. + Cada serie se procesa por separado y devuelve su pronóstico. + + Útil para pronósticos de múltiples productos, ubicaciones, etc. + """ + if not req.series_list: + raise HTTPException(status_code=400, detail="La lista de series no puede estar vacía.") + + forecasts = [] + + for series_item in req.series_list: + values = series_item.values + + if len(values) < 3: + raise HTTPException( + status_code=400, + detail=f"La serie '{series_item.series_id}' debe tener al menos 3 puntos." + ) + + # Hacer predicción para esta serie + raw_output = call_chronos_inference(values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + # Agregar a la lista de resultados + forecasts.append( + ForecastUnivariateResponse( + timestamps=timestamps, + median=processed["median"], + quantiles=processed["quantiles"] + ) + ) + + return ForecastMultiIdResponse(forecasts=forecasts) + + +@app.post("/forecast_with_covariates") +def forecast_with_covariates(req: ForecastWithCovariatesRequest): + """ + Pronóstico con variables covariables (exógenas). + + NOTA: Chronos-2 es un modelo univariado puro. Esta implementación + es una aproximación que usa las covariables para ajustar el contexto, + pero no es un modelo multivariado verdadero. + + Para pronósticos reales con covariables, considera usar modelos como + TimesFM, Temporal Fusion Transformer, o Prophet. + """ + target_values = req.target_series.values + + if len(target_values) < 3: + raise HTTPException( + status_code=400, + detail="La serie objetivo debe tener al menos 3 puntos." + ) + + # Verificar que las covariables tengan la longitud correcta + for cov in req.covariates_history: + if len(cov.values) != len(target_values): + raise HTTPException( + status_code=400, + detail=f"La covariable '{cov.name}' debe tener la misma longitud que la serie objetivo." + ) + + for cov in req.covariates_future: + if len(cov.values) != req.prediction_length: + raise HTTPException( + status_code=400, + detail=f"La covariable futura '{cov.name}' debe tener longitud = prediction_length." + ) + + # APROXIMACIÓN: Usar solo la serie objetivo + # En un modelo verdadero con covariables, estas se integrarían en el modelo + raw_output = call_chronos_inference(target_values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + # Nota: Las covariables se devuelven para referencia pero no afectan el forecast + return { + "timestamps": timestamps, + "median": processed["median"], + "quantiles": processed["quantiles"], + "note": "Chronos-2 no usa covariables nativamente. Este forecast se basa solo en la serie objetivo.", + "covariates_used": [cov.name for cov in req.covariates_history], + "covariates_future": [cov.name for cov in req.covariates_future] + } + + +@app.post("/generate_scenarios", response_model=GenerateScenariosResponse) +def generate_scenarios(req: GenerateScenariosRequest): + """ + Genera pronósticos para múltiples escenarios "what-if". + + Cada escenario representa una configuración diferente de covariables futuras. + Útil para análisis de sensibilidad y planificación. + + NOTA: Como Chronos-2 no usa covariables, todos los escenarios + producirán el mismo forecast base. Esta funcionalidad es más útil + con modelos que soporten covariables. + """ + target_values = req.target_series.values + + if len(target_values) < 3: + raise HTTPException( + status_code=400, + detail="La serie objetivo debe tener al menos 3 puntos." + ) + + if not req.scenarios: + raise HTTPException( + status_code=400, + detail="Debe proporcionar al menos un escenario." + ) + + # Hacer una predicción base + raw_output = call_chronos_inference(target_values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + scenarios_output = [] + + for scenario in req.scenarios: + # En un modelo real con covariables, aquí se usarían los valores + # de scenario.covariate_values para generar diferentes forecasts + + # Por ahora, todos los escenarios usan el mismo forecast base + scenarios_output.append( + ScenarioForecast( + scenario_name=scenario.scenario_name, + timestamps=timestamps, + median=processed["median"], + quantiles=processed["quantiles"] + ) + ) + + return GenerateScenariosResponse(scenarios=scenarios_output) + + +@app.post("/forecast_multivariate", response_model=ForecastMultivariateResponse) +def forecast_multivariate(req: ForecastMultivariateRequest): + """ + Pronóstico multivariado: predice múltiples series relacionadas. + + NOTA: Chronos-2 es fundamentalmente univariado. Esta implementación + procesa cada serie independientemente. Para pronósticos multivariados + verdaderos (que capturan correlaciones entre series), usa modelos como + Temporal Fusion Transformer, DeepAR, o Vector Autoregression (VAR). + """ + if not req.series_list: + raise HTTPException( + status_code=400, + detail="La lista de series no puede estar vacía." + ) + + forecasts = [] + + for series_item in req.series_list: + values = series_item.values + + if len(values) < 3: + raise HTTPException( + status_code=400, + detail=f"La serie '{series_item.series_name}' debe tener al menos 3 puntos." + ) + + # Procesar cada serie independientemente + raw_output = call_chronos_inference(values, req.prediction_length) + processed = process_chronos_output(raw_output, req.prediction_length) + + # Generar timestamps + timestamps = [f"t+{i+1}" for i in range(req.prediction_length)] + + forecasts.append( + MultivariateForecast( + series_name=series_item.series_name, + timestamps=timestamps, + median=processed["median"], + quantiles=processed["quantiles"] + ) + ) + + return ForecastMultivariateResponse(forecasts=forecasts) + + +if __name__ == "__main__": + import uvicorn + port = int(os.getenv("PORT", 7860)) + uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/app/main_v2.1.1_backup.py b/app/main_v2.1.1_backup.py new file mode 100644 index 0000000000000000000000000000000000000000..97e59e21d6c7f2d90eb1e8360340a1ec55c7256a --- /dev/null +++ b/app/main_v2.1.1_backup.py @@ -0,0 +1,717 @@ +import os +from typing import List, Dict, Optional + +import numpy as np +import pandas as pd +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +from pydantic import BaseModel, Field + +from chronos import Chronos2Pipeline + + +# ========================= +# Configuración del modelo +# ========================= + +MODEL_ID = os.getenv("CHRONOS_MODEL_ID", "amazon/chronos-2") +DEVICE_MAP = os.getenv("DEVICE_MAP", "cpu") # "cpu" o "cuda" + +app = FastAPI( + title="Chronos-2 Universal Forecasting API + Excel Add-in", + description=( + "Servidor para pronósticos con Chronos-2: univariante, " + "multivariante, covariables, escenarios, anomalías y backtesting. " + "Incluye Excel Add-in v2.1.0 con archivos estáticos." + ), + version="2.1.0", +) + +# Configurar CORS para Excel Add-in +app.add_middleware( + CORSMiddleware, + allow_origins=[ + "https://localhost:3001", + "https://localhost:3000", + "https://ttzzs-chronos2-excel-forecasting-api.hf.space", + "*" # Permitir todos los orígenes para Office Add-ins + ], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Carga única del modelo al iniciar el proceso +pipeline = Chronos2Pipeline.from_pretrained(MODEL_ID, device_map=DEVICE_MAP) + +# ========================= +# Archivos estáticos para Excel Add-in +# ========================= + +# Montar directorios estáticos si existen +if os.path.exists("static"): + app.mount("/assets", StaticFiles(directory="static/assets"), name="assets") + app.mount("/taskpane", StaticFiles(directory="static/taskpane"), name="taskpane") + app.mount("/commands", StaticFiles(directory="static/commands"), name="commands") + + # Endpoint para manifest.xml + @app.get("/manifest.xml", response_class=FileResponse) + async def get_manifest(): + """Devuelve el manifest.xml del Excel Add-in""" + return FileResponse("static/manifest.xml", media_type="application/xml") + + @app.get("/", tags=["Info"]) + async def root_with_addon(): + """Información del API + Add-in""" + return { + "name": "Chronos-2 Forecasting API", + "version": "2.1.0", + "model": MODEL_ID, + "endpoints": { + "api": [ + "/health", + "/forecast_univariate", + "/forecast_multi_id", + "/forecast_with_covariates", + "/forecast_multivariate", + "/forecast_scenarios", + "/detect_anomalies", + "/backtest_simple" + ], + "add_in": [ + "/manifest.xml", + "/taskpane/taskpane.html", + "/assets/icon-*.png" + ] + }, + "docs": "/docs", + "excel_add_in": { + "manifest_url": "https://ttzzs-chronos2-excel-forecasting-api.hf.space/manifest.xml", + "version": "2.1.0", + "features": [ + "Univariate Forecast", + "Multi-Series Forecast", + "Forecast with Covariates", + "Scenario Analysis", + "Multivariate Forecast", + "Anomaly Detection", + "Backtest" + ] + } + } +else: + @app.get("/", tags=["Info"]) + async def root_api_only(): + """Información del API (sin Add-in)""" + return { + "name": "Chronos-2 Forecasting API", + "version": "2.1.0", + "model": MODEL_ID, + "docs": "/docs" + } + + +# ========================= +# Modelos Pydantic comunes +# ========================= + +class BaseForecastConfig(BaseModel): + prediction_length: int = Field( + 7, description="Horizonte de predicción (número de pasos futuros)" + ) + quantile_levels: List[float] = Field( + default_factory=lambda: [0.1, 0.5, 0.9], + description="Cuantiles para el pronóstico probabilístico", + ) + start_timestamp: Optional[str] = Field( + default=None, + description=( + "Fecha/hora inicial del histórico (formato ISO). " + "Si no se especifica, se usan índices enteros." + ), + ) + freq: str = Field( + "D", + description="Frecuencia temporal (p.ej. 'D' diario, 'H' horario, 'W' semanal...).", + ) + + +class UnivariateSeries(BaseModel): + values: List[float] + + +class MultiSeriesItem(BaseModel): + series_id: str + values: List[float] + + +class CovariatePoint(BaseModel): + """ + Punto temporal usado tanto para contexto (histórico) como para covariables futuras. + """ + timestamp: Optional[str] = None # opcional si se usan índices enteros + id: Optional[str] = None # id de serie, por defecto 'series_0' + target: Optional[float] = None # valor de la variable objetivo (histórico) + covariates: Dict[str, float] = Field( + default_factory=dict, + description="Nombre -> valor de cada covariable dinámica.", + ) + + +# ========================= +# 1) Healthcheck +# ========================= + +@app.get("/health") +def health(): + """ + Devuelve información básica del estado del servidor y el modelo cargado. + """ + return { + "status": "ok", + "model_id": MODEL_ID, + "device_map": DEVICE_MAP, + } + + +# ========================= +# 2) Pronóstico univariante +# ========================= + +class ForecastUnivariateRequest(BaseForecastConfig): + series: UnivariateSeries + + +class ForecastUnivariateResponse(BaseModel): + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] # "0.1" -> [..], "0.9" -> [..] + + +@app.post("/forecast_univariate", response_model=ForecastUnivariateResponse) +def forecast_univariate(req: ForecastUnivariateRequest): + """ + Pronóstico para una sola serie temporal (univariante, sin covariables). + Pensado para uso directo desde Excel u otras herramientas sencillas. + """ + values = req.series.values + n = len(values) + if n == 0: + raise HTTPException(status_code=400, detail="La serie no puede estar vacía.") + + # Construimos contexto como DataFrame largo (id, timestamp, target) + if req.start_timestamp: + timestamps = pd.date_range( + start=pd.to_datetime(req.start_timestamp), + periods=n, + freq=req.freq, + ) + else: + timestamps = pd.RangeIndex(start=0, stop=n, step=1) + + context_df = pd.DataFrame( + { + "id": ["series_0"] * n, + "timestamp": timestamps, + "target": values, + } + ) + + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + ) + + pred_df = pred_df.sort_values("timestamp") + timestamps_out = pred_df["timestamp"].astype(str).tolist() + median = pred_df["predictions"].astype(float).tolist() + + quantiles_dict: Dict[str, List[float]] = {} + for q in req.quantile_levels: + key = f"{q:.3g}" + if key in pred_df.columns: + quantiles_dict[key] = pred_df[key].astype(float).tolist() + + return ForecastUnivariateResponse( + timestamps=timestamps_out, + median=median, + quantiles=quantiles_dict, + ) + + +# ========================= +# 3) Multi-serie (multi-id) +# ========================= + +class ForecastMultiSeriesRequest(BaseForecastConfig): + series_list: List[MultiSeriesItem] + + +class SeriesForecast(BaseModel): + series_id: str + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + + +class ForecastMultiSeriesResponse(BaseModel): + forecasts: List[SeriesForecast] + + +@app.post("/forecast_multi_id", response_model=ForecastMultiSeriesResponse) +def forecast_multi_id(req: ForecastMultiSeriesRequest): + """ + Pronóstico para múltiples series (por ejemplo, varios SKU o tiendas). + """ + if not req.series_list: + raise HTTPException(status_code=400, detail="Debes enviar al menos una serie.") + + frames = [] + for item in req.series_list: + n = len(item.values) + if n == 0: + continue + if req.start_timestamp: + timestamps = pd.date_range( + start=pd.to_datetime(req.start_timestamp), + periods=n, + freq=req.freq, + ) + else: + timestamps = pd.RangeIndex(start=0, stop=n, step=1) + + frames.append( + pd.DataFrame( + { + "id": [item.series_id] * n, + "timestamp": timestamps, + "target": item.values, + } + ) + ) + + if not frames: + raise HTTPException(status_code=400, detail="Todas las series están vacías.") + + context_df = pd.concat(frames, ignore_index=True) + + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + ) + + forecasts: List[SeriesForecast] = [] + for series_id, group in pred_df.groupby("id"): + group = group.sort_values("timestamp") + timestamps_out = group["timestamp"].astype(str).tolist() + median = group["predictions"].astype(float).tolist() + quantiles_dict: Dict[str, List[float]] = {} + for q in req.quantile_levels: + key = f"{q:.3g}" + if key in group.columns: + quantiles_dict[key] = group[key].astype(float).tolist() + + forecasts.append( + SeriesForecast( + series_id=series_id, + timestamps=timestamps_out, + median=median, + quantiles=quantiles_dict, + ) + ) + + return ForecastMultiSeriesResponse(forecasts=forecasts) + + +# ========================= +# 4) Pronóstico con covariables +# ========================= + +class ForecastWithCovariatesRequest(BaseForecastConfig): + context: List[CovariatePoint] + future: Optional[List[CovariatePoint]] = None + + +class ForecastWithCovariatesResponse(BaseModel): + # filas con todas las columnas de pred_df serializadas como string + pred_df: List[Dict[str, str]] + + +@app.post("/forecast_with_covariates", response_model=ForecastWithCovariatesResponse) +def forecast_with_covariates(req: ForecastWithCovariatesRequest): + """ + Pronóstico con información de covariables (promos, precio, clima...) tanto + en el histórico (context) como en futuros posibles (future). + """ + if not req.context: + raise HTTPException(status_code=400, detail="El contexto no puede estar vacío.") + + ctx_rows = [] + for p in req.context: + if p.target is None: + continue + row = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + "target": p.target, + } + for k, v in p.covariates.items(): + row[k] = v + ctx_rows.append(row) + + context_df = pd.DataFrame(ctx_rows) + if "timestamp" not in context_df or context_df["timestamp"].isna().any(): + context_df["timestamp"] = pd.RangeIndex(start=0, stop=len(context_df), step=1) + + future_df = None + if req.future: + fut_rows = [] + for p in req.future: + row = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + } + for k, v in p.covariates.items(): + row[k] = v + fut_rows.append(row) + future_df = pd.DataFrame(fut_rows) + if "timestamp" not in future_df or future_df["timestamp"].isna().any(): + future_df["timestamp"] = pd.RangeIndex( + start=context_df["timestamp"].max() + 1, + stop=context_df["timestamp"].max() + 1 + len(future_df), + step=1, + ) + + pred_df = pipeline.predict_df( + context_df, + future_df=future_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + ) + + pred_df = pred_df.sort_values(["id", "timestamp"]) + out_records: List[Dict[str, str]] = [] + for _, row in pred_df.iterrows(): + record = {k: str(v) for k, v in row.items()} + out_records.append(record) + + return ForecastWithCovariatesResponse(pred_df=out_records) + + +# ========================= +# 5) Multivariante (varios targets) +# ========================= + +class MultivariateContextPoint(BaseModel): + timestamp: Optional[str] = None + id: Optional[str] = None + targets: Dict[str, float] # p.ej. {"demand": 100, "returns": 5} + covariates: Dict[str, float] = Field(default_factory=dict) + + +class ForecastMultivariateRequest(BaseForecastConfig): + context: List[MultivariateContextPoint] + target_columns: List[str] # nombres de columnas objetivo + + +class ForecastMultivariateResponse(BaseModel): + pred_df: List[Dict[str, str]] + + +@app.post("/forecast_multivariate", response_model=ForecastMultivariateResponse) +def forecast_multivariate(req: ForecastMultivariateRequest): + """ + Pronóstico multivariante: múltiples columnas objetivo (p.ej. demanda y devoluciones). + """ + if not req.context: + raise HTTPException(status_code=400, detail="El contexto no puede estar vacío.") + if not req.target_columns: + raise HTTPException(status_code=400, detail="Debes indicar columnas objetivo.") + + rows = [] + for p in req.context: + base = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + } + for t_name, t_val in p.targets.items(): + base[t_name] = t_val + for k, v in p.covariates.items(): + base[k] = v + rows.append(base) + + context_df = pd.DataFrame(rows) + if "timestamp" not in context_df or context_df["timestamp"].isna().any(): + context_df["timestamp"] = pd.RangeIndex(start=0, stop=len(context_df), step=1) + + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target=req.target_columns, + ) + + pred_df = pred_df.sort_values(["id", "timestamp"]) + out_records = [{k: str(v) for k, v in row.items()} for _, row in pred_df.iterrows()] + return ForecastMultivariateResponse(pred_df=out_records) + + +# ========================= +# 6) Escenarios (what-if) +# ========================= + +class ScenarioDefinition(BaseModel): + name: str + future_covariates: List[CovariatePoint] + + +class ScenarioForecast(BaseModel): + name: str + pred_df: List[Dict[str, str]] + + +class ForecastScenariosRequest(BaseForecastConfig): + context: List[CovariatePoint] + scenarios: List[ScenarioDefinition] + + +class ForecastScenariosResponse(BaseModel): + scenarios: List[ScenarioForecast] + + +@app.post("/forecast_scenarios", response_model=ForecastScenariosResponse) +def forecast_scenarios(req: ForecastScenariosRequest): + """ + Evaluación de múltiples escenarios (what-if) cambiando las covariables futuras + (por ejemplo, promo ON/OFF, diferentes precios, etc.). + """ + if not req.context: + raise HTTPException(status_code=400, detail="El contexto no puede estar vacío.") + if not req.scenarios: + raise HTTPException(status_code=400, detail="Debes definir al menos un escenario.") + + ctx_rows = [] + for p in req.context: + if p.target is None: + continue + row = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + "target": p.target, + } + for k, v in p.covariates.items(): + row[k] = v + ctx_rows.append(row) + + context_df = pd.DataFrame(ctx_rows) + if "timestamp" not in context_df or context_df["timestamp"].isna().any(): + context_df["timestamp"] = pd.RangeIndex(start=0, stop=len(context_df), step=1) + + results: List[ScenarioForecast] = [] + + for scen in req.scenarios: + fut_rows = [] + for p in scen.future_covariates: + row = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + } + for k, v in p.covariates.items(): + row[k] = v + fut_rows.append(row) + future_df = pd.DataFrame(fut_rows) + if "timestamp" not in future_df or future_df["timestamp"].isna().any(): + future_df["timestamp"] = pd.RangeIndex( + start=context_df["timestamp"].max() + 1, + stop=context_df["timestamp"].max() + 1 + len(future_df), + step=1, + ) + + pred_df = pipeline.predict_df( + context_df, + future_df=future_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + ) + pred_df = pred_df.sort_values(["id", "timestamp"]) + out_records = [{k: str(v) for k, v in row.items()} for _, row in pred_df.iterrows()] + + results.append(ScenarioForecast(name=scen.name, pred_df=out_records)) + + return ForecastScenariosResponse(scenarios=results) + + +# ========================= +# 7) Detección de anomalías +# ========================= + +class AnomalyDetectionRequest(BaseModel): + context: UnivariateSeries + recent_observed: List[float] + prediction_length: int = 7 + quantile_low: float = 0.05 + quantile_high: float = 0.95 + + +class AnomalyPoint(BaseModel): + index: int + value: float + predicted_median: float + lower: float + upper: float + is_anomaly: bool + + +class AnomalyDetectionResponse(BaseModel): + anomalies: List[AnomalyPoint] + + +@app.post("/detect_anomalies", response_model=AnomalyDetectionResponse) +def detect_anomalies(req: AnomalyDetectionRequest): + """ + Marca como anomalías los puntos observados recientes que caen fuera del + intervalo [quantile_low, quantile_high] del pronóstico. + """ + n_hist = len(req.context.values) + if n_hist == 0: + raise HTTPException(status_code=400, detail="La serie histórica no puede estar vacía.") + if len(req.recent_observed) != req.prediction_length: + raise HTTPException( + status_code=400, + detail="recent_observed debe tener la misma longitud que prediction_length.", + ) + + context_df = pd.DataFrame( + { + "id": ["series_0"] * n_hist, + "timestamp": pd.RangeIndex(start=0, stop=n_hist, step=1), + "target": req.context.values, + } + ) + + quantiles = sorted({req.quantile_low, 0.5, req.quantile_high}) + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.prediction_length, + quantile_levels=quantiles, + id_column="id", + timestamp_column="timestamp", + target="target", + ).sort_values("timestamp") + + q_low_col = f"{req.quantile_low:.3g}" + q_high_col = f"{req.quantile_high:.3g}" + + anomalies: List[AnomalyPoint] = [] + for i, (obs, (_, row)) in enumerate(zip(req.recent_observed, pred_df.iterrows())): + lower = float(row[q_low_col]) + upper = float(row[q_high_col]) + median = float(row["predictions"]) + is_anom = (obs < lower) or (obs > upper) + anomalies.append( + AnomalyPoint( + index=i, + value=obs, + predicted_median=median, + lower=lower, + upper=upper, + is_anomaly=is_anom, + ) + ) + + return AnomalyDetectionResponse(anomalies=anomalies) + + +# ========================= +# 8) Backtest simple +# ========================= + +class BacktestRequest(BaseModel): + series: UnivariateSeries + prediction_length: int = 7 + test_length: int = 28 + + +class BacktestMetrics(BaseModel): + mae: float + mape: float + wql: float # Weighted Quantile Loss aproximada para el cuantil 0.5 + + +class BacktestResponse(BaseModel): + metrics: BacktestMetrics + forecast_median: List[float] + forecast_timestamps: List[str] + actuals: List[float] + + +@app.post("/backtest_simple", response_model=BacktestResponse) +def backtest_simple(req: BacktestRequest): + """ + Backtest sencillo: separamos un tramo final de la serie como test, pronosticamos + ese tramo y calculamos métricas MAE / MAPE / WQL. + """ + values = np.array(req.series.values, dtype=float) + n = len(values) + if n <= req.test_length: + raise HTTPException( + status_code=400, + detail="La serie debe ser más larga que test_length.", + ) + + train = values[: n - req.test_length] + test = values[n - req.test_length :] + + context_df = pd.DataFrame( + { + "id": ["series_0"] * len(train), + "timestamp": pd.RangeIndex(start=0, stop=len(train), step=1), + "target": train.tolist(), + } + ) + + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.test_length, + quantile_levels=[0.5], + id_column="id", + timestamp_column="timestamp", + target="target", + ).sort_values("timestamp") + + forecast = pred_df["predictions"].to_numpy(dtype=float) + timestamps = pred_df["timestamp"].astype(str).tolist() + + mae = float(np.mean(np.abs(test - forecast))) + eps = 1e-8 + mape = float(np.mean(np.abs((test - forecast) / (test + eps)))) * 100.0 + tau = 0.5 + diff = test - forecast + wql = float(np.mean(np.maximum(tau * diff, (tau - 1) * diff))) + + metrics = BacktestMetrics(mae=mae, mape=mape, wql=wql) + + return BacktestResponse( + metrics=metrics, + forecast_median=forecast.tolist(), + forecast_timestamps=timestamps, + actuals=test.tolist(), + ) diff --git a/app/main_v3.py b/app/main_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..7bfde1009a9237bd910faf8bdc61bbd49e124ee1 --- /dev/null +++ b/app/main_v3.py @@ -0,0 +1,186 @@ +""" +Chronos-2 Forecasting API - Clean Architecture Version 3.0 + +Este es el punto de entrada de la aplicación, refactorizado siguiendo +Clean Architecture y principios SOLID. + +Características: +- Arquitectura en capas (Presentation, Application, Domain, Infrastructure) +- Dependency Injection completa +- Separación de responsabilidades +- Código mantenible y testeable +""" + +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +from fastapi.middleware.cors import CORSMiddleware +import os + +from app.infrastructure.config.settings import get_settings +from app.utils.logger import setup_logger + +# Import routers +from app.api.routes import ( + health_router, + forecast_router, + anomaly_router, + backtest_router +) + +logger = setup_logger(__name__) +settings = get_settings() + +# ============================================================================ +# Create FastAPI App +# ============================================================================ + +app = FastAPI( + title=settings.api_title, + version=settings.api_version, + description=settings.api_description, + docs_url="/docs", + redoc_url="/redoc", + openapi_url="/openapi.json" +) + +# ============================================================================ +# Middleware +# ============================================================================ + +# CORS Middleware +app.add_middleware( + CORSMiddleware, + allow_origins=settings.cors_origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# ============================================================================ +# API Routes +# ============================================================================ + +# Health check endpoint (temporal, será movido a routes/health.py) +@app.get("/health", tags=["Health"]) +async def health_check(): + """Check if the API is running and model is loaded.""" + from app.api.dependencies import get_forecast_model + + try: + model = get_forecast_model() + model_info = model.get_model_info() + + return { + "status": "ok", + "version": settings.api_version, + "model": model_info + } + except Exception as e: + logger.error(f"Health check failed: {e}") + return { + "status": "error", + "version": settings.api_version, + "error": str(e) + } + + +# Include routers +app.include_router(health_router) +app.include_router(forecast_router) +app.include_router(anomaly_router) +app.include_router(backtest_router) + +# ============================================================================ +# Static Files (Excel Add-in) +# ============================================================================ + +if os.path.exists(settings.static_dir): + logger.info(f"Mounting static files from: {settings.static_dir}") + + # Mount subdirectories + for subdir in ["assets", "taskpane", "commands"]: + path = os.path.join(settings.static_dir, subdir) + if os.path.exists(path): + app.mount(f"/{subdir}", StaticFiles(directory=path), name=subdir) + logger.info(f"Mounted /{subdir}") + + # Manifest file + manifest_path = os.path.join(settings.static_dir, "manifest.xml") + if os.path.exists(manifest_path): + @app.get("/manifest.xml") + async def get_manifest(): + """Serve Excel Add-in manifest.""" + return FileResponse(manifest_path, media_type="application/xml") + logger.info("Manifest endpoint registered") +else: + logger.warning(f"Static directory not found: {settings.static_dir}") + +# ============================================================================ +# Startup/Shutdown Events +# ============================================================================ + +@app.on_event("startup") +async def startup_event(): + """Initialize resources on startup.""" + logger.info("=" * 60) + logger.info(f"🚀 {settings.api_title} v{settings.api_version}") + logger.info("=" * 60) + logger.info("Architecture: Clean Architecture (4 layers)") + logger.info("Principles: SOLID") + logger.info(f"Model: {settings.model_id}") + logger.info(f"Device: {settings.device_map}") + logger.info("=" * 60) + + # Pre-load model + try: + from app.api.dependencies import get_forecast_model + logger.info("Pre-loading forecast model...") + model = get_forecast_model() + logger.info(f"✅ Model loaded: {model.get_model_info()}") + except Exception as e: + logger.error(f"❌ Failed to load model: {e}") + logger.error("API will start but forecasting will fail until model loads") + + +@app.on_event("shutdown") +async def shutdown_event(): + """Cleanup resources on shutdown.""" + logger.info("=" * 60) + logger.info("Shutting down Chronos-2 API...") + logger.info("=" * 60) + + +# ============================================================================ +# Root Endpoint +# ============================================================================ + +@app.get("/", tags=["Info"]) +async def root(): + """API information and documentation links.""" + return { + "name": settings.api_title, + "version": settings.api_version, + "description": settings.api_description, + "docs": "/docs", + "health": "/health", + "architecture": "Clean Architecture with SOLID principles", + "layers": { + "presentation": "FastAPI (app/api/)", + "application": "Use Cases (app/application/)", + "domain": "Business Logic (app/domain/)", + "infrastructure": "External Services (app/infrastructure/)" + } + } + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run( + "app.main_v3:app", + host="0.0.0.0", + port=settings.api_port, + reload=True, + log_level=settings.log_level.lower() + ) diff --git a/app/main_working_version.py b/app/main_working_version.py new file mode 100644 index 0000000000000000000000000000000000000000..a58afddca8d61755fc7270461d256f60d2c52dbc --- /dev/null +++ b/app/main_working_version.py @@ -0,0 +1,643 @@ +import os +from typing import List, Dict, Optional + +import numpy as np +import pandas as pd +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field + +from chronos import Chronos2Pipeline + + +# ========================= +# Configuración del modelo +# ========================= + +MODEL_ID = os.getenv("CHRONOS_MODEL_ID", "amazon/chronos-2") +DEVICE_MAP = os.getenv("DEVICE_MAP", "cpu") # "cpu" o "cuda" + +app = FastAPI( + title="Chronos-2 Universal Forecasting API", + description=( + "Servidor local (Docker) para pronósticos con Chronos-2: univariante, " + "multivariante, covariables, escenarios, anomalías y backtesting." + ), + version="1.0.0", +) + +# Configurar CORS para Excel Add-in +app.add_middleware( + CORSMiddleware, + allow_origins=["https://localhost:3001", "https://localhost:3000"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Carga única del modelo al iniciar el proceso +pipeline = Chronos2Pipeline.from_pretrained(MODEL_ID, device_map=DEVICE_MAP) + + +# ========================= +# Modelos Pydantic comunes +# ========================= + +class BaseForecastConfig(BaseModel): + prediction_length: int = Field( + 7, description="Horizonte de predicción (número de pasos futuros)" + ) + quantile_levels: List[float] = Field( + default_factory=lambda: [0.1, 0.5, 0.9], + description="Cuantiles para el pronóstico probabilístico", + ) + start_timestamp: Optional[str] = Field( + default=None, + description=( + "Fecha/hora inicial del histórico (formato ISO). " + "Si no se especifica, se usan índices enteros." + ), + ) + freq: str = Field( + "D", + description="Frecuencia temporal (p.ej. 'D' diario, 'H' horario, 'W' semanal...).", + ) + + +class UnivariateSeries(BaseModel): + values: List[float] + + +class MultiSeriesItem(BaseModel): + series_id: str + values: List[float] + + +class CovariatePoint(BaseModel): + """ + Punto temporal usado tanto para contexto (histórico) como para covariables futuras. + """ + timestamp: Optional[str] = None # opcional si se usan índices enteros + id: Optional[str] = None # id de serie, por defecto 'series_0' + target: Optional[float] = None # valor de la variable objetivo (histórico) + covariates: Dict[str, float] = Field( + default_factory=dict, + description="Nombre -> valor de cada covariable dinámica.", + ) + + +# ========================= +# 1) Healthcheck +# ========================= + +@app.get("/health") +def health(): + """ + Devuelve información básica del estado del servidor y el modelo cargado. + """ + return { + "status": "ok", + "model_id": MODEL_ID, + "device_map": DEVICE_MAP, + } + + +# ========================= +# 2) Pronóstico univariante +# ========================= + +class ForecastUnivariateRequest(BaseForecastConfig): + series: UnivariateSeries + + +class ForecastUnivariateResponse(BaseModel): + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] # "0.1" -> [..], "0.9" -> [..] + + +@app.post("/forecast_univariate", response_model=ForecastUnivariateResponse) +def forecast_univariate(req: ForecastUnivariateRequest): + """ + Pronóstico para una sola serie temporal (univariante, sin covariables). + Pensado para uso directo desde Excel u otras herramientas sencillas. + """ + values = req.series.values + n = len(values) + if n == 0: + raise HTTPException(status_code=400, detail="La serie no puede estar vacía.") + + # Construimos contexto como DataFrame largo (id, timestamp, target) + if req.start_timestamp: + timestamps = pd.date_range( + start=pd.to_datetime(req.start_timestamp), + periods=n, + freq=req.freq, + ) + else: + timestamps = pd.RangeIndex(start=0, stop=n, step=1) + + context_df = pd.DataFrame( + { + "id": ["series_0"] * n, + "timestamp": timestamps, + "target": values, + } + ) + + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + ) + + pred_df = pred_df.sort_values("timestamp") + timestamps_out = pred_df["timestamp"].astype(str).tolist() + median = pred_df["predictions"].astype(float).tolist() + + quantiles_dict: Dict[str, List[float]] = {} + for q in req.quantile_levels: + key = f"{q:.3g}" + if key in pred_df.columns: + quantiles_dict[key] = pred_df[key].astype(float).tolist() + + return ForecastUnivariateResponse( + timestamps=timestamps_out, + median=median, + quantiles=quantiles_dict, + ) + + +# ========================= +# 3) Multi-serie (multi-id) +# ========================= + +class ForecastMultiSeriesRequest(BaseForecastConfig): + series_list: List[MultiSeriesItem] + + +class SeriesForecast(BaseModel): + series_id: str + timestamps: List[str] + median: List[float] + quantiles: Dict[str, List[float]] + + +class ForecastMultiSeriesResponse(BaseModel): + forecasts: List[SeriesForecast] + + +@app.post("/forecast_multi_id", response_model=ForecastMultiSeriesResponse) +def forecast_multi_id(req: ForecastMultiSeriesRequest): + """ + Pronóstico para múltiples series (por ejemplo, varios SKU o tiendas). + """ + if not req.series_list: + raise HTTPException(status_code=400, detail="Debes enviar al menos una serie.") + + frames = [] + for item in req.series_list: + n = len(item.values) + if n == 0: + continue + if req.start_timestamp: + timestamps = pd.date_range( + start=pd.to_datetime(req.start_timestamp), + periods=n, + freq=req.freq, + ) + else: + timestamps = pd.RangeIndex(start=0, stop=n, step=1) + + frames.append( + pd.DataFrame( + { + "id": [item.series_id] * n, + "timestamp": timestamps, + "target": item.values, + } + ) + ) + + if not frames: + raise HTTPException(status_code=400, detail="Todas las series están vacías.") + + context_df = pd.concat(frames, ignore_index=True) + + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + ) + + forecasts: List[SeriesForecast] = [] + for series_id, group in pred_df.groupby("id"): + group = group.sort_values("timestamp") + timestamps_out = group["timestamp"].astype(str).tolist() + median = group["predictions"].astype(float).tolist() + quantiles_dict: Dict[str, List[float]] = {} + for q in req.quantile_levels: + key = f"{q:.3g}" + if key in group.columns: + quantiles_dict[key] = group[key].astype(float).tolist() + + forecasts.append( + SeriesForecast( + series_id=series_id, + timestamps=timestamps_out, + median=median, + quantiles=quantiles_dict, + ) + ) + + return ForecastMultiSeriesResponse(forecasts=forecasts) + + +# ========================= +# 4) Pronóstico con covariables +# ========================= + +class ForecastWithCovariatesRequest(BaseForecastConfig): + context: List[CovariatePoint] + future: Optional[List[CovariatePoint]] = None + + +class ForecastWithCovariatesResponse(BaseModel): + # filas con todas las columnas de pred_df serializadas como string + pred_df: List[Dict[str, str]] + + +@app.post("/forecast_with_covariates", response_model=ForecastWithCovariatesResponse) +def forecast_with_covariates(req: ForecastWithCovariatesRequest): + """ + Pronóstico con información de covariables (promos, precio, clima...) tanto + en el histórico (context) como en futuros posibles (future). + """ + if not req.context: + raise HTTPException(status_code=400, detail="El contexto no puede estar vacío.") + + ctx_rows = [] + for p in req.context: + if p.target is None: + continue + row = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + "target": p.target, + } + for k, v in p.covariates.items(): + row[k] = v + ctx_rows.append(row) + + context_df = pd.DataFrame(ctx_rows) + if "timestamp" not in context_df or context_df["timestamp"].isna().any(): + context_df["timestamp"] = pd.RangeIndex(start=0, stop=len(context_df), step=1) + + future_df = None + if req.future: + fut_rows = [] + for p in req.future: + row = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + } + for k, v in p.covariates.items(): + row[k] = v + fut_rows.append(row) + future_df = pd.DataFrame(fut_rows) + if "timestamp" not in future_df or future_df["timestamp"].isna().any(): + future_df["timestamp"] = pd.RangeIndex( + start=context_df["timestamp"].max() + 1, + stop=context_df["timestamp"].max() + 1 + len(future_df), + step=1, + ) + + pred_df = pipeline.predict_df( + context_df, + future_df=future_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + ) + + pred_df = pred_df.sort_values(["id", "timestamp"]) + out_records: List[Dict[str, str]] = [] + for _, row in pred_df.iterrows(): + record = {k: str(v) for k, v in row.items()} + out_records.append(record) + + return ForecastWithCovariatesResponse(pred_df=out_records) + + +# ========================= +# 5) Multivariante (varios targets) +# ========================= + +class MultivariateContextPoint(BaseModel): + timestamp: Optional[str] = None + id: Optional[str] = None + targets: Dict[str, float] # p.ej. {"demand": 100, "returns": 5} + covariates: Dict[str, float] = Field(default_factory=dict) + + +class ForecastMultivariateRequest(BaseForecastConfig): + context: List[MultivariateContextPoint] + target_columns: List[str] # nombres de columnas objetivo + + +class ForecastMultivariateResponse(BaseModel): + pred_df: List[Dict[str, str]] + + +@app.post("/forecast_multivariate", response_model=ForecastMultivariateResponse) +def forecast_multivariate(req: ForecastMultivariateRequest): + """ + Pronóstico multivariante: múltiples columnas objetivo (p.ej. demanda y devoluciones). + """ + if not req.context: + raise HTTPException(status_code=400, detail="El contexto no puede estar vacío.") + if not req.target_columns: + raise HTTPException(status_code=400, detail="Debes indicar columnas objetivo.") + + rows = [] + for p in req.context: + base = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + } + for t_name, t_val in p.targets.items(): + base[t_name] = t_val + for k, v in p.covariates.items(): + base[k] = v + rows.append(base) + + context_df = pd.DataFrame(rows) + if "timestamp" not in context_df or context_df["timestamp"].isna().any(): + context_df["timestamp"] = pd.RangeIndex(start=0, stop=len(context_df), step=1) + + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target=req.target_columns, + ) + + pred_df = pred_df.sort_values(["id", "timestamp"]) + out_records = [{k: str(v) for k, v in row.items()} for _, row in pred_df.iterrows()] + return ForecastMultivariateResponse(pred_df=out_records) + + +# ========================= +# 6) Escenarios (what-if) +# ========================= + +class ScenarioDefinition(BaseModel): + name: str + future_covariates: List[CovariatePoint] + + +class ScenarioForecast(BaseModel): + name: str + pred_df: List[Dict[str, str]] + + +class ForecastScenariosRequest(BaseForecastConfig): + context: List[CovariatePoint] + scenarios: List[ScenarioDefinition] + + +class ForecastScenariosResponse(BaseModel): + scenarios: List[ScenarioForecast] + + +@app.post("/forecast_scenarios", response_model=ForecastScenariosResponse) +def forecast_scenarios(req: ForecastScenariosRequest): + """ + Evaluación de múltiples escenarios (what-if) cambiando las covariables futuras + (por ejemplo, promo ON/OFF, diferentes precios, etc.). + """ + if not req.context: + raise HTTPException(status_code=400, detail="El contexto no puede estar vacío.") + if not req.scenarios: + raise HTTPException(status_code=400, detail="Debes definir al menos un escenario.") + + ctx_rows = [] + for p in req.context: + if p.target is None: + continue + row = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + "target": p.target, + } + for k, v in p.covariates.items(): + row[k] = v + ctx_rows.append(row) + + context_df = pd.DataFrame(ctx_rows) + if "timestamp" not in context_df or context_df["timestamp"].isna().any(): + context_df["timestamp"] = pd.RangeIndex(start=0, stop=len(context_df), step=1) + + results: List[ScenarioForecast] = [] + + for scen in req.scenarios: + fut_rows = [] + for p in scen.future_covariates: + row = { + "id": p.id or "series_0", + "timestamp": p.timestamp, + } + for k, v in p.covariates.items(): + row[k] = v + fut_rows.append(row) + future_df = pd.DataFrame(fut_rows) + if "timestamp" not in future_df or future_df["timestamp"].isna().any(): + future_df["timestamp"] = pd.RangeIndex( + start=context_df["timestamp"].max() + 1, + stop=context_df["timestamp"].max() + 1 + len(future_df), + step=1, + ) + + pred_df = pipeline.predict_df( + context_df, + future_df=future_df, + prediction_length=req.prediction_length, + quantile_levels=req.quantile_levels, + id_column="id", + timestamp_column="timestamp", + target="target", + ) + pred_df = pred_df.sort_values(["id", "timestamp"]) + out_records = [{k: str(v) for k, v in row.items()} for _, row in pred_df.iterrows()] + + results.append(ScenarioForecast(name=scen.name, pred_df=out_records)) + + return ForecastScenariosResponse(scenarios=results) + + +# ========================= +# 7) Detección de anomalías +# ========================= + +class AnomalyDetectionRequest(BaseModel): + context: UnivariateSeries + recent_observed: List[float] + prediction_length: int = 7 + quantile_low: float = 0.05 + quantile_high: float = 0.95 + + +class AnomalyPoint(BaseModel): + index: int + value: float + predicted_median: float + lower: float + upper: float + is_anomaly: bool + + +class AnomalyDetectionResponse(BaseModel): + anomalies: List[AnomalyPoint] + + +@app.post("/detect_anomalies", response_model=AnomalyDetectionResponse) +def detect_anomalies(req: AnomalyDetectionRequest): + """ + Marca como anomalías los puntos observados recientes que caen fuera del + intervalo [quantile_low, quantile_high] del pronóstico. + """ + n_hist = len(req.context.values) + if n_hist == 0: + raise HTTPException(status_code=400, detail="La serie histórica no puede estar vacía.") + if len(req.recent_observed) != req.prediction_length: + raise HTTPException( + status_code=400, + detail="recent_observed debe tener la misma longitud que prediction_length.", + ) + + context_df = pd.DataFrame( + { + "id": ["series_0"] * n_hist, + "timestamp": pd.RangeIndex(start=0, stop=n_hist, step=1), + "target": req.context.values, + } + ) + + quantiles = sorted({req.quantile_low, 0.5, req.quantile_high}) + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.prediction_length, + quantile_levels=quantiles, + id_column="id", + timestamp_column="timestamp", + target="target", + ).sort_values("timestamp") + + q_low_col = f"{req.quantile_low:.3g}" + q_high_col = f"{req.quantile_high:.3g}" + + anomalies: List[AnomalyPoint] = [] + for i, (obs, (_, row)) in enumerate(zip(req.recent_observed, pred_df.iterrows())): + lower = float(row[q_low_col]) + upper = float(row[q_high_col]) + median = float(row["predictions"]) + is_anom = (obs < lower) or (obs > upper) + anomalies.append( + AnomalyPoint( + index=i, + value=obs, + predicted_median=median, + lower=lower, + upper=upper, + is_anomaly=is_anom, + ) + ) + + return AnomalyDetectionResponse(anomalies=anomalies) + + +# ========================= +# 8) Backtest simple +# ========================= + +class BacktestRequest(BaseModel): + series: UnivariateSeries + prediction_length: int = 7 + test_length: int = 28 + + +class BacktestMetrics(BaseModel): + mae: float + mape: float + wql: float # Weighted Quantile Loss aproximada para el cuantil 0.5 + + +class BacktestResponse(BaseModel): + metrics: BacktestMetrics + forecast_median: List[float] + forecast_timestamps: List[str] + actuals: List[float] + + +@app.post("/backtest_simple", response_model=BacktestResponse) +def backtest_simple(req: BacktestRequest): + """ + Backtest sencillo: separamos un tramo final de la serie como test, pronosticamos + ese tramo y calculamos métricas MAE / MAPE / WQL. + """ + values = np.array(req.series.values, dtype=float) + n = len(values) + if n <= req.test_length: + raise HTTPException( + status_code=400, + detail="La serie debe ser más larga que test_length.", + ) + + train = values[: n - req.test_length] + test = values[n - req.test_length :] + + context_df = pd.DataFrame( + { + "id": ["series_0"] * len(train), + "timestamp": pd.RangeIndex(start=0, stop=len(train), step=1), + "target": train.tolist(), + } + ) + + pred_df = pipeline.predict_df( + context_df, + prediction_length=req.test_length, + quantile_levels=[0.5], + id_column="id", + timestamp_column="timestamp", + target="target", + ).sort_values("timestamp") + + forecast = pred_df["predictions"].to_numpy(dtype=float) + timestamps = pred_df["timestamp"].astype(str).tolist() + + mae = float(np.mean(np.abs(test - forecast))) + eps = 1e-8 + mape = float(np.mean(np.abs((test - forecast) / (test + eps)))) * 100.0 + tau = 0.5 + diff = test - forecast + wql = float(np.mean(np.maximum(tau * diff, (tau - 1) * diff))) + + metrics = BacktestMetrics(mae=mae, mape=mape, wql=wql) + + return BacktestResponse( + metrics=metrics, + forecast_median=forecast.tolist(), + forecast_timestamps=timestamps, + actuals=test.tolist(), + ) diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/schemas/requests/__init__.py b/app/schemas/requests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/schemas/responses/__init__.py b/app/schemas/responses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/utils/__init__.py b/app/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/utils/dataframe_builder.py b/app/utils/dataframe_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..fbefa2a5bd9543d99050430b9a349f8675e8b432 --- /dev/null +++ b/app/utils/dataframe_builder.py @@ -0,0 +1,234 @@ +""" +Constructor de DataFrames para series temporales. + +Este módulo implementa la interfaz IDataTransformer, +aplicando ISP (Interface Segregation Principle). +""" + +from typing import List, Optional, Dict, Any +import pandas as pd + +from app.domain.interfaces.data_transformer import IDataTransformer +from app.utils.timestamp_generator import TimestampGenerator +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class DataFrameBuilder(IDataTransformer): + """ + Constructor de DataFrames para modelos de forecasting. + + Implementa IDataTransformer para construir DataFrames en el + formato esperado por Chronos2Pipeline. + + Formato de salida: + - id: Identificador de la serie + - timestamp: Timestamp o índice + - target: Valor de la serie + """ + + def __init__(self): + """Inicializa el builder con un generador de timestamps.""" + self.timestamp_gen = TimestampGenerator() + logger.debug("DataFrameBuilder initialized") + + def build_context_df( + self, + values: List[float], + timestamps: Optional[List[str]] = None, + series_id: str = "series_0", + freq: str = "D" + ) -> pd.DataFrame: + """ + Construye DataFrame de contexto para forecasting. + + Args: + values: Lista de valores de la serie + timestamps: Lista de timestamps (opcional, se generan si no se proveen) + series_id: Identificador de la serie + freq: Frecuencia de la serie + + Returns: + DataFrame con columnas [id, timestamp, target] + + Raises: + ValueError: Si timestamps y values tienen diferente longitud + + Example: + >>> builder = DataFrameBuilder() + >>> df = builder.build_context_df([100, 102, 105], series_id="sales") + >>> df.columns.tolist() + ['id', 'timestamp', 'target'] + """ + n = len(values) + + if n == 0: + raise ValueError("values no puede estar vacío") + + # Generar timestamps si no se proveen + if timestamps is None: + logger.debug(f"Generating {n} integer timestamps") + timestamps = self.timestamp_gen.generate_integer_index(n) + elif len(timestamps) != n: + raise ValueError( + f"timestamps ({len(timestamps)}) y values ({n}) " + f"deben tener la misma longitud" + ) + + # Construir DataFrame + df = pd.DataFrame({ + "id": [series_id] * n, + "timestamp": timestamps, + "target": values + }) + + logger.debug( + f"Built context DataFrame: {len(df)} rows, " + f"series_id='{series_id}'" + ) + + return df + + def parse_prediction_result( + self, + pred_df: pd.DataFrame, + quantile_levels: List[float] + ) -> Dict[str, Any]: + """ + Parsea el resultado de predicción a formato estándar. + + Args: + pred_df: DataFrame de predicción con columnas: + [id, timestamp, predictions, q1, q2, ...] + quantile_levels: Lista de cuantiles solicitados + + Returns: + Diccionario con: + - timestamps: List[str] + - median: List[float] + - quantiles: Dict[str, List[float]] + + Example: + >>> result = builder.parse_prediction_result(pred_df, [0.1, 0.5, 0.9]) + >>> result.keys() + dict_keys(['timestamps', 'median', 'quantiles']) + """ + # Validar que el DataFrame tiene las columnas necesarias + if "timestamp" not in pred_df.columns: + raise ValueError("pred_df debe tener columna 'timestamp'") + if "predictions" not in pred_df.columns: + raise ValueError("pred_df debe tener columna 'predictions'") + + # Extraer timestamps y median + timestamps = pred_df["timestamp"].astype(str).tolist() + median = pred_df["predictions"].astype(float).tolist() + + # Extraer cuantiles + quantiles = {} + for q in quantile_levels: + # Formatear key como string (ej: 0.1 -> "0.1") + key = f"{q:.3g}" + + if key in pred_df.columns: + quantiles[key] = pred_df[key].astype(float).tolist() + else: + logger.warning( + f"Quantile {key} no encontrado en pred_df. " + f"Columnas disponibles: {pred_df.columns.tolist()}" + ) + + result = { + "timestamps": timestamps, + "median": median, + "quantiles": quantiles + } + + logger.debug( + f"Parsed prediction: {len(timestamps)} timestamps, " + f"{len(quantiles)} quantiles" + ) + + return result + + def build_multi_series_df( + self, + series_dict: Dict[str, List[float]], + timestamps_dict: Optional[Dict[str, List[str]]] = None, + freq: str = "D" + ) -> pd.DataFrame: + """ + Construye DataFrame con múltiples series. + + Args: + series_dict: Diccionario {series_id: [values]} + timestamps_dict: Diccionario {series_id: [timestamps]} (opcional) + freq: Frecuencia de las series + + Returns: + DataFrame combinado con todas las series + + Example: + >>> builder = DataFrameBuilder() + >>> series = {"sales": [100, 102], "revenue": [200, 205]} + >>> df = builder.build_multi_series_df(series) + >>> df["id"].unique().tolist() + ['sales', 'revenue'] + """ + dfs = [] + + for series_id, values in series_dict.items(): + # Obtener timestamps para esta serie + timestamps = None + if timestamps_dict and series_id in timestamps_dict: + timestamps = timestamps_dict[series_id] + + # Construir DataFrame individual + df = self.build_context_df( + values=values, + timestamps=timestamps, + series_id=series_id, + freq=freq + ) + dfs.append(df) + + # Combinar todos los DataFrames + result = pd.concat(dfs, ignore_index=True) + + logger.debug( + f"Built multi-series DataFrame: {len(series_dict)} series, " + f"{len(result)} total rows" + ) + + return result + + def validate_context_df(self, df: pd.DataFrame) -> bool: + """ + Valida que un DataFrame tenga el formato correcto. + + Args: + df: DataFrame a validar + + Returns: + True si es válido + + Raises: + ValueError: Si el formato es incorrecto + """ + required_cols = {"id", "timestamp", "target"} + + if not required_cols.issubset(df.columns): + raise ValueError( + f"DataFrame debe tener columnas: {required_cols}. " + f"Encontradas: {set(df.columns)}" + ) + + if len(df) == 0: + raise ValueError("DataFrame no puede estar vacío") + + # Validar que target sea numérico + if not pd.api.types.is_numeric_dtype(df["target"]): + raise ValueError("Columna 'target' debe ser numérica") + + logger.debug("DataFrame validation passed") + return True diff --git a/app/utils/logger.py b/app/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..e3acf2bd25b20c1ebca3d65193b5b427f55bcbf6 --- /dev/null +++ b/app/utils/logger.py @@ -0,0 +1,63 @@ +""" +Sistema de logging centralizado. + +Este módulo proporciona una función para crear loggers consistentes +en toda la aplicación, cumpliendo con SRP. +""" + +import logging +import sys +from typing import Optional +from app.infrastructure.config.settings import settings + + +def setup_logger( + name: str, + level: Optional[str] = None, + format_string: Optional[str] = None +) -> logging.Logger: + """ + Configura y retorna un logger con formato consistente. + + Args: + name: Nombre del logger (típicamente __name__ del módulo) + level: Nivel de logging (DEBUG, INFO, WARNING, ERROR, CRITICAL) + Si es None, usa el nivel de settings + format_string: Formato personalizado del log + Si es None, usa el formato de settings + + Returns: + logging.Logger: Logger configurado + + Example: + >>> from app.utils.logger import setup_logger + >>> logger = setup_logger(__name__) + >>> logger.info("Aplicación iniciada") + """ + logger = logging.getLogger(name) + + # Configurar nivel + log_level = level or settings.log_level + logger.setLevel(getattr(logging, log_level.upper())) + + # Evitar duplicar handlers + if not logger.handlers: + # Handler para stdout + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(getattr(logging, log_level.upper())) + + # Formato + log_format = format_string or settings.log_format + formatter = logging.Formatter(log_format) + handler.setFormatter(formatter) + + logger.addHandler(handler) + + # No propagar a logger raíz para evitar duplicados + logger.propagate = False + + return logger + + +# Logger por defecto para el módulo +logger = setup_logger(__name__) diff --git a/app/utils/timestamp_generator.py b/app/utils/timestamp_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..382977818219029181db7600346cf043794e93c0 --- /dev/null +++ b/app/utils/timestamp_generator.py @@ -0,0 +1,177 @@ +""" +Generador de timestamps para series temporales. + +Este módulo proporciona utilidades para generar timestamps, +aplicando el principio SRP (Single Responsibility Principle). +""" + +from typing import List, Union +from datetime import datetime, timedelta +import pandas as pd + +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + + +class TimestampGenerator: + """ + Generador de timestamps para series temporales. + + Proporciona métodos para generar diferentes tipos de timestamps: + - Rangos de fechas (date_range) + - Índices enteros (integer_index) + - Continuación de series existentes (continue_from) + """ + + @staticmethod + def generate_date_range( + start: Union[str, datetime], + periods: int, + freq: str = "D" + ) -> List[str]: + """ + Genera un rango de fechas. + + Args: + start: Fecha de inicio (string ISO o datetime) + periods: Número de períodos + freq: Frecuencia (D=diario, W=semanal, M=mensual, etc.) + + Returns: + Lista de timestamps como strings ISO + + Example: + >>> gen = TimestampGenerator() + >>> gen.generate_date_range("2025-01-01", 5, "D") + ['2025-01-01', '2025-01-02', '2025-01-03', '2025-01-04', '2025-01-05'] + """ + try: + dates = pd.date_range( + start=pd.to_datetime(start), + periods=periods, + freq=freq + ) + result = dates.astype(str).tolist() + logger.debug(f"Generated {len(result)} timestamps with freq={freq}") + return result + except Exception as e: + logger.error(f"Error generating date range: {e}") + raise ValueError(f"Error generando fechas: {e}") from e + + @staticmethod + def generate_integer_index( + periods: int, + start: int = 0 + ) -> List[int]: + """ + Genera un índice entero secuencial. + + Args: + periods: Número de períodos + start: Valor inicial del índice + + Returns: + Lista de enteros + + Example: + >>> gen = TimestampGenerator() + >>> gen.generate_integer_index(5, start=10) + [10, 11, 12, 13, 14] + """ + if periods < 1: + raise ValueError("periods debe ser >= 1") + + result = list(range(start, start + periods)) + logger.debug(f"Generated integer index: {start} to {start + periods - 1}") + return result + + @staticmethod + def continue_from( + last_timestamp: Union[str, int], + periods: int, + freq: str = "D" + ) -> List[str]: + """ + Continúa una serie temporal desde el último timestamp. + + Args: + last_timestamp: Último timestamp de la serie existente + periods: Número de períodos futuros + freq: Frecuencia (solo para fechas) + + Returns: + Lista de timestamps futuros + + Example: + >>> gen = TimestampGenerator() + >>> gen.continue_from("2025-01-05", 3, "D") + ['2025-01-06', '2025-01-07', '2025-01-08'] + """ + try: + # Intentar parsear como fecha + if isinstance(last_timestamp, str): + try: + last_date = pd.to_datetime(last_timestamp) + next_date = last_date + pd.Timedelta(1, unit=freq) + return TimestampGenerator.generate_date_range( + next_date, periods, freq + ) + except: + # Si falla, intentar como entero + last_int = int(last_timestamp) + return TimestampGenerator.generate_integer_index( + periods, start=last_int + 1 + ) + else: + # Entero + return TimestampGenerator.generate_integer_index( + periods, start=last_timestamp + 1 + ) + except Exception as e: + logger.error(f"Error continuing timestamps: {e}") + raise ValueError(f"Error continuando timestamps: {e}") from e + + @staticmethod + def infer_frequency(timestamps: List[str]) -> str: + """ + Infiere la frecuencia de una lista de timestamps. + + Args: + timestamps: Lista de timestamps (strings ISO) + + Returns: + Código de frecuencia (D, W, M, etc.) + + Raises: + ValueError: Si no se puede inferir la frecuencia + """ + if len(timestamps) < 2: + raise ValueError("Se necesitan al menos 2 timestamps para inferir frecuencia") + + try: + dates = pd.to_datetime(timestamps) + freq = pd.infer_freq(dates) + + if freq is None: + # Fallback: calcular diferencia promedio + diffs = dates.diff().dropna() + avg_diff = diffs.mean() + + if avg_diff.days == 1: + freq = "D" + elif avg_diff.days == 7: + freq = "W" + elif 28 <= avg_diff.days <= 31: + freq = "M" + else: + freq = "D" # Default + + logger.warning(f"Frecuencia inferida aproximadamente: {freq}") + + logger.debug(f"Inferred frequency: {freq}") + return freq + + except Exception as e: + logger.error(f"Error inferring frequency: {e}") + return "D" # Default seguro diff --git a/docs/API.md b/docs/API.md new file mode 100644 index 0000000000000000000000000000000000000000..8789bd675d7aeb3533db60f817cd76e66339f2e7 --- /dev/null +++ b/docs/API.md @@ -0,0 +1,954 @@ +# 📡 Chronos2 Server - API Documentation + +**Version**: 3.0.0 +**Base URL**: `https://your-server.hf.space` or `http://localhost:8000` +**Date**: 2025-11-09 + +--- + +## 📋 Table of Contents + +1. [Overview](#overview) +2. [Authentication](#authentication) +3. [Endpoints](#endpoints) +4. [Data Models](#data-models) +5. [Examples](#examples) +6. [Error Handling](#error-handling) +7. [Rate Limiting](#rate-limiting) +8. [Client Libraries](#client-libraries) + +--- + +## 🎯 Overview + +The Chronos2 API provides time series forecasting capabilities using Amazon's Chronos-2 model. The API supports: + +- ✅ **Univariate forecasting**: Single time series prediction +- ✅ **Multi-series forecasting**: Multiple series in parallel +- ✅ **Anomaly detection**: Identify outliers in data +- ✅ **Backtesting**: Evaluate forecast accuracy + +### Base URLs + +**Production**: `https://your-app.hf.space` +**Local Development**: `http://localhost:8000` + +### API Documentation + +- **Swagger UI**: `/docs` +- **ReDoc**: `/redoc` +- **OpenAPI Schema**: `/openapi.json` + +--- + +## 🔐 Authentication + +**Current**: No authentication required (public API) + +**Future**: API key authentication +```bash +curl -H "X-API-Key: your-api-key" https://api.example.com/forecast/univariate +``` + +--- + +## 📊 Endpoints + +### Health Check + +#### `GET /health` + +Check if the API is running. + +**Response**: +```json +{ + "status": "healthy", + "timestamp": "2025-11-09T12:00:00Z" +} +``` + +**Example**: +```bash +curl http://localhost:8000/health +``` + +--- + +#### `GET /health/info` + +Get system information. + +**Response**: +```json +{ + "version": "3.0.0", + "model_id": "amazon/chronos-2", + "device": "cpu", + "python_version": "3.10.0" +} +``` + +**Example**: +```bash +curl http://localhost:8000/health/info +``` + +--- + +### Forecasting + +#### `POST /forecast/univariate` + +Generate forecast for a single time series. + +**Request Body**: +```json +{ + "values": [100.0, 102.0, 105.0, 103.0, 108.0, 112.0, 115.0], + "prediction_length": 3, + "quantile_levels": [0.1, 0.5, 0.9], + "freq": "D" +} +``` + +**Parameters**: +- `values` (required): Array of numeric values (min 3 points) +- `prediction_length` (required): Number of periods to forecast (≥ 1) +- `quantile_levels` (optional): Quantiles for prediction intervals (default: [0.1, 0.5, 0.9]) +- `freq` (optional): Frequency ("D", "H", "M", default: "D") +- `timestamps` (optional): Custom timestamps +- `series_id` (optional): Series identifier (default: "series_0") + +**Response**: +```json +{ + "timestamps": ["8", "9", "10"], + "median": [118.5, 121.2, 124.0], + "quantiles": { + "0.1": [113.2, 115.8, 118.4], + "0.5": [118.5, 121.2, 124.0], + "0.9": [123.8, 126.6, 129.6] + } +} +``` + +**Fields**: +- `timestamps`: Future time points +- `median`: Point forecasts (50th percentile) +- `quantiles`: Prediction intervals at specified quantile levels + +**Example**: +```bash +curl -X POST http://localhost:8000/forecast/univariate \ + -H "Content-Type: application/json" \ + -d '{ + "values": [100, 102, 105, 103, 108, 112, 115], + "prediction_length": 3, + "quantile_levels": [0.1, 0.5, 0.9], + "freq": "D" + }' +``` + +**Python Example**: +```python +import requests + +response = requests.post( + "http://localhost:8000/forecast/univariate", + json={ + "values": [100, 102, 105, 103, 108, 112, 115], + "prediction_length": 3, + "quantile_levels": [0.1, 0.5, 0.9] + } +) + +data = response.json() +print(f"Median forecast: {data['median']}") +print(f"90% upper bound: {data['quantiles']['0.9']}") +``` + +**JavaScript Example**: +```javascript +const response = await fetch('http://localhost:8000/forecast/univariate', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + values: [100, 102, 105, 103, 108, 112, 115], + prediction_length: 3, + quantile_levels: [0.1, 0.5, 0.9] + }) +}); + +const data = await response.json(); +console.log('Median:', data.median); +console.log('Quantiles:', data.quantiles); +``` + +--- + +#### `POST /forecast/multi-series` + +Generate forecasts for multiple time series. + +**Request Body**: +```json +{ + "series_list": [ + {"values": [100, 102, 105, 108, 112]}, + {"values": [200, 195, 190, 185, 180]}, + {"values": [50, 52, 54, 56, 58]} + ], + "prediction_length": 3, + "quantile_levels": [0.1, 0.5, 0.9], + "freq": "D" +} +``` + +**Parameters**: +- `series_list` (required): Array of series objects with `values` +- Other parameters same as univariate + +**Response**: +```json +{ + "results": [ + { + "timestamps": ["5", "6", "7"], + "median": [115.0, 118.0, 121.0], + "quantiles": { + "0.1": [110.0, 113.0, 116.0], + "0.9": [120.0, 123.0, 126.0] + } + }, + { + "timestamps": ["5", "6", "7"], + "median": [175.0, 170.0, 165.0], + "quantiles": { + "0.1": [170.0, 165.0, 160.0], + "0.9": [180.0, 175.0, 170.0] + } + }, + { + "timestamps": ["5", "6", "7"], + "median": [60.0, 62.0, 64.0], + "quantiles": { + "0.1": [58.0, 60.0, 62.0], + "0.9": [62.0, 64.0, 66.0] + } + } + ] +} +``` + +**Example**: +```bash +curl -X POST http://localhost:8000/forecast/multi-series \ + -H "Content-Type: application/json" \ + -d '{ + "series_list": [ + {"values": [100, 102, 105, 108, 112]}, + {"values": [200, 195, 190, 185, 180]} + ], + "prediction_length": 3 + }' +``` + +--- + +### Anomaly Detection + +#### `POST /anomaly/detect` + +Detect anomalies in recent observations. + +**Request Body**: +```json +{ + "context_values": [100, 102, 105, 103, 108, 112, 115], + "recent_observed": [120, 200, 124], + "prediction_length": 3, + "quantile_low": 0.05, + "quantile_high": 0.95, + "freq": "D" +} +``` + +**Parameters**: +- `context_values` (required): Historical values for context +- `recent_observed` (required): Recent observations to check +- `prediction_length` (required): Must equal length of `recent_observed` +- `quantile_low` (optional): Lower bound quantile (default: 0.05) +- `quantile_high` (optional): Upper bound quantile (default: 0.95) +- `freq` (optional): Frequency (default: "D") + +**Response**: +```json +{ + "anomalies": [ + { + "index": 0, + "value": 120.0, + "expected": 118.5, + "lower_bound": 113.2, + "upper_bound": 123.8, + "is_anomaly": false, + "z_score": 0.3 + }, + { + "index": 1, + "value": 200.0, + "expected": 121.2, + "lower_bound": 115.8, + "upper_bound": 126.6, + "is_anomaly": true, + "z_score": 14.5 + }, + { + "index": 2, + "value": 124.0, + "expected": 124.0, + "lower_bound": 118.4, + "upper_bound": 129.6, + "is_anomaly": false, + "z_score": 0.0 + } + ], + "total_points": 3, + "num_anomalies": 1, + "anomaly_rate": 0.333 +} +``` + +**Fields**: +- `anomalies`: Array of anomaly points + - `index`: Position in `recent_observed` + - `value`: Actual observed value + - `expected`: Forecasted median + - `lower_bound`: Lower prediction bound + - `upper_bound`: Upper prediction bound + - `is_anomaly`: True if outside bounds + - `z_score`: Standardized deviation +- `total_points`: Total observations checked +- `num_anomalies`: Count of anomalies detected +- `anomaly_rate`: Proportion of anomalies + +**Example**: +```bash +curl -X POST http://localhost:8000/anomaly/detect \ + -H "Content-Type: application/json" \ + -d '{ + "context_values": [100, 102, 105, 103, 108, 112, 115], + "recent_observed": [120, 200, 124], + "prediction_length": 3, + "quantile_low": 0.05, + "quantile_high": 0.95 + }' +``` + +**Python Example**: +```python +import requests + +response = requests.post( + "http://localhost:8000/anomaly/detect", + json={ + "context_values": [100, 102, 105, 103, 108, 112, 115], + "recent_observed": [120, 200, 124], + "prediction_length": 3 + } +) + +data = response.json() +print(f"Total anomalies: {data['num_anomalies']}") +print(f"Anomaly rate: {data['anomaly_rate']:.1%}") + +for anomaly in data['anomalies']: + if anomaly['is_anomaly']: + print(f"Anomaly at index {anomaly['index']}: {anomaly['value']}") +``` + +--- + +### Backtesting + +#### `POST /backtest/simple` + +Evaluate forecast accuracy on historical data. + +**Request Body**: +```json +{ + "context_values": [100, 102, 105, 103, 108], + "actual_values": [112, 115, 118], + "prediction_length": 3, + "quantile_levels": [0.1, 0.5, 0.9], + "freq": "D" +} +``` + +**Parameters**: +- `context_values` (required): Training data +- `actual_values` (required): Test data (ground truth) +- `prediction_length` (required): Must equal length of `actual_values` +- `quantile_levels` (optional): Quantiles (default: [0.1, 0.5, 0.9]) +- `freq` (optional): Frequency (default: "D") + +**Response**: +```json +{ + "forecast": [110.5, 113.2, 116.0], + "actuals": [112.0, 115.0, 118.0], + "mae": 1.9, + "mape": 1.6, + "rmse": 2.1, + "errors": [-1.5, -1.8, -2.0] +} +``` + +**Fields**: +- `forecast`: Predicted values (median) +- `actuals`: Actual observed values +- `mae`: Mean Absolute Error +- `mape`: Mean Absolute Percentage Error (%) +- `rmse`: Root Mean Square Error +- `errors`: Residuals (actual - forecast) + +**Metrics Explanation**: +- **MAE**: Average absolute difference (lower is better) +- **MAPE**: Average percentage error (lower is better) +- **RMSE**: Root mean squared error (penalizes large errors) + +**Example**: +```bash +curl -X POST http://localhost:8000/backtest/simple \ + -H "Content-Type: application/json" \ + -d '{ + "context_values": [100, 102, 105, 103, 108], + "actual_values": [112, 115, 118], + "prediction_length": 3 + }' +``` + +**Python Example**: +```python +import requests + +response = requests.post( + "http://localhost:8000/backtest/simple", + json={ + "context_values": [100, 102, 105, 103, 108], + "actual_values": [112, 115, 118], + "prediction_length": 3 + } +) + +data = response.json() +print(f"MAE: {data['mae']:.2f}") +print(f"MAPE: {data['mape']:.2f}%") +print(f"RMSE: {data['rmse']:.2f}") + +# Plot results +import matplotlib.pyplot as plt +plt.plot(data['actuals'], label='Actual') +plt.plot(data['forecast'], label='Forecast') +plt.legend() +plt.show() +``` + +--- + +## 📦 Data Models + +### ForecastUnivariateRequest + +```typescript +{ + values: number[]; // Min 1 item + prediction_length: number; // >= 1 + quantile_levels?: number[]; // [0, 1], default: [0.1, 0.5, 0.9] + freq?: string; // Default: "D" + timestamps?: string[]; // Optional + series_id?: string; // Default: "series_0" +} +``` + +### ForecastUnivariateResponse + +```typescript +{ + timestamps: string[]; + median: number[]; + quantiles: { + [key: string]: number[]; // e.g., "0.1": [...] + }; +} +``` + +### AnomalyDetectionRequest + +```typescript +{ + context_values: number[]; + recent_observed: number[]; + prediction_length: number; // Must equal len(recent_observed) + quantile_low?: number; // Default: 0.05 + quantile_high?: number; // Default: 0.95 + freq?: string; // Default: "D" +} +``` + +### AnomalyPoint + +```typescript +{ + index: number; + value: number; + expected: number; + lower_bound: number; + upper_bound: number; + is_anomaly: boolean; + z_score: number; +} +``` + +### BacktestRequest + +```typescript +{ + context_values: number[]; + actual_values: number[]; + prediction_length: number; // Must equal len(actual_values) + quantile_levels?: number[]; + freq?: string; +} +``` + +### BacktestResponse + +```typescript +{ + forecast: number[]; + actuals: number[]; + mae: number; + mape: number; + rmse: number; + errors: number[]; +} +``` + +--- + +## 💡 Examples + +### Complete Workflow: Forecast → Detect Anomalies → Backtest + +```python +import requests +import pandas as pd + +BASE_URL = "http://localhost:8000" + +# 1. Load your data +data = pd.read_csv("timeseries.csv") +values = data['value'].tolist() + +# Split into train/test +train = values[:100] +test = values[100:110] + +# 2. Generate forecast +forecast_response = requests.post( + f"{BASE_URL}/forecast/univariate", + json={ + "values": train, + "prediction_length": len(test), + "quantile_levels": [0.05, 0.5, 0.95] + } +) +forecast = forecast_response.json() + +print("Forecast median:", forecast['median']) + +# 3. Detect anomalies in test data +anomaly_response = requests.post( + f"{BASE_URL}/anomaly/detect", + json={ + "context_values": train, + "recent_observed": test, + "prediction_length": len(test), + "quantile_low": 0.05, + "quantile_high": 0.95 + } +) +anomalies = anomaly_response.json() + +print(f"Detected {anomalies['num_anomalies']} anomalies") + +# 4. Evaluate forecast accuracy +backtest_response = requests.post( + f"{BASE_URL}/backtest/simple", + json={ + "context_values": train, + "actual_values": test, + "prediction_length": len(test) + } +) +metrics = backtest_response.json() + +print(f"MAE: {metrics['mae']:.2f}") +print(f"MAPE: {metrics['mape']:.2f}%") +``` + +--- + +### Multi-Series Parallel Forecasting + +```python +import requests +import pandas as pd + +# Load multiple series +products = ['A', 'B', 'C'] +series_list = [] + +for product in products: + data = pd.read_csv(f"product_{product}.csv") + series_list.append({ + "values": data['sales'].tolist() + }) + +# Forecast all series in parallel +response = requests.post( + "http://localhost:8000/forecast/multi-series", + json={ + "series_list": series_list, + "prediction_length": 7 + } +) + +results = response.json()['results'] + +for i, product in enumerate(products): + print(f"Product {product} forecast: {results[i]['median']}") +``` + +--- + +### Real-Time Anomaly Monitoring + +```python +import requests +import time + +BASE_URL = "http://localhost:8000" +historical_data = [] + +while True: + # Simulate receiving new data point + new_value = get_latest_sensor_reading() + historical_data.append(new_value) + + # Keep last 100 points as context + context = historical_data[-100:] + + # Check if latest point is anomaly + response = requests.post( + f"{BASE_URL}/anomaly/detect", + json={ + "context_values": context[:-1], + "recent_observed": [new_value], + "prediction_length": 1 + } + ) + + result = response.json() + if result['anomalies'][0]['is_anomaly']: + print(f"🚨 ALERT: Anomaly detected! Value: {new_value}") + send_alert(new_value) + + time.sleep(60) # Check every minute +``` + +--- + +## ⚠️ Error Handling + +### Error Response Format + +```json +{ + "detail": "Error message describing what went wrong" +} +``` + +### HTTP Status Codes + +| Code | Meaning | Example | +|------|---------|---------| +| 200 | Success | Forecast generated successfully | +| 400 | Bad Request | Invalid input data | +| 422 | Validation Error | Missing required fields | +| 500 | Internal Server Error | Model inference failed | + +### Common Errors + +#### 422 Validation Error + +**Cause**: Invalid request data + +**Example**: +```json +{ + "detail": [ + { + "loc": ["body", "values"], + "msg": "field required", + "type": "value_error.missing" + } + ] +} +``` + +**Solution**: Check request body structure + +--- + +#### 400 Bad Request + +**Cause**: Business logic validation failed + +**Example**: +```json +{ + "detail": "values cannot be empty" +} +``` + +**Solution**: Provide at least 3 data points + +--- + +#### 500 Internal Server Error + +**Cause**: Model inference or processing error + +**Example**: +```json +{ + "detail": "Internal server error" +} +``` + +**Solution**: Check logs, retry request + +--- + +## 🚀 Rate Limiting + +**Current**: No rate limiting + +**Future**: +- 100 requests/minute per IP +- 1000 requests/hour per API key + +--- + +## 📚 Client Libraries + +### Python + +```bash +pip install requests pandas +``` + +```python +import requests + +class Chronos2Client: + def __init__(self, base_url="http://localhost:8000"): + self.base_url = base_url + + def forecast(self, values, prediction_length, **kwargs): + response = requests.post( + f"{self.base_url}/forecast/univariate", + json={ + "values": values, + "prediction_length": prediction_length, + **kwargs + } + ) + response.raise_for_status() + return response.json() + + def detect_anomalies(self, context, recent, **kwargs): + response = requests.post( + f"{self.base_url}/anomaly/detect", + json={ + "context_values": context, + "recent_observed": recent, + "prediction_length": len(recent), + **kwargs + } + ) + response.raise_for_status() + return response.json() + +# Usage +client = Chronos2Client() +result = client.forecast([100, 102, 105], 3) +print(result['median']) +``` + +--- + +### JavaScript/TypeScript + +```bash +npm install axios +``` + +```typescript +import axios from 'axios'; + +class Chronos2Client { + private baseURL: string; + + constructor(baseURL: string = 'http://localhost:8000') { + this.baseURL = baseURL; + } + + async forecast( + values: number[], + predictionLength: number, + options: any = {} + ) { + const response = await axios.post( + `${this.baseURL}/forecast/univariate`, + { + values, + prediction_length: predictionLength, + ...options + } + ); + return response.data; + } + + async detectAnomalies( + context: number[], + recent: number[], + options: any = {} + ) { + const response = await axios.post( + `${this.baseURL}/anomaly/detect`, + { + context_values: context, + recent_observed: recent, + prediction_length: recent.length, + ...options + } + ); + return response.data; + } +} + +// Usage +const client = new Chronos2Client(); +const result = await client.forecast([100, 102, 105], 3); +console.log(result.median); +``` + +--- + +### cURL Examples + +**Forecast**: +```bash +curl -X POST http://localhost:8000/forecast/univariate \ + -H "Content-Type: application/json" \ + -d '{"values":[100,102,105],"prediction_length":3}' +``` + +**Anomaly Detection**: +```bash +curl -X POST http://localhost:8000/anomaly/detect \ + -H "Content-Type: application/json" \ + -d '{ + "context_values":[100,102,105,103,108], + "recent_observed":[200], + "prediction_length":1 + }' +``` + +**Backtest**: +```bash +curl -X POST http://localhost:8000/backtest/simple \ + -H "Content-Type: application/json" \ + -d '{ + "context_values":[100,102,105], + "actual_values":[108,112], + "prediction_length":2 + }' +``` + +--- + +## 🔍 Advanced Usage + +### Custom Quantile Levels + +```python +# Fine-grained prediction intervals +response = requests.post( + "http://localhost:8000/forecast/univariate", + json={ + "values": [100, 102, 105, 103, 108], + "prediction_length": 5, + "quantile_levels": [0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95] + } +) + +data = response.json() +# Now you have more granular intervals +print("5-95% interval:", data['quantiles']['0.05'], "-", data['quantiles']['0.95']) +print("25-75% interval:", data['quantiles']['0.25'], "-", data['quantiles']['0.75']) +``` + +### Custom Timestamps + +```python +import pandas as pd + +# Use actual dates +dates = pd.date_range('2025-01-01', periods=10, freq='D') +timestamps = dates.astype(str).tolist() + +response = requests.post( + "http://localhost:8000/forecast/univariate", + json={ + "values": [100, 102, 105, 103, 108, 112, 115, 118, 120, 122], + "timestamps": timestamps, + "prediction_length": 7, + "freq": "D" + } +) + +# Response will have future dates +forecast = response.json() +print("Future dates:", forecast['timestamps']) +# ['2025-01-11', '2025-01-12', ...] +``` + +--- + +## 📞 Support + +**Documentation**: `/docs`, `/redoc` +**Issues**: GitHub Issues +**Email**: support@example.com + +--- + +**Last Updated**: 2025-11-09 +**Version**: 3.0.0 +**API Status**: Production Ready diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000000000000000000000000000000000000..2d2306c229c825ced2db81faa680a4038067af36 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,1044 @@ +# 🏛️ Chronos2 Server - Architecture Documentation + +**Version**: 3.0.0 +**Date**: 2025-11-09 +**Author**: Claude AI +**Status**: Production Ready + +--- + +## 📋 Table of Contents + +1. [Overview](#overview) +2. [Architecture Principles](#architecture-principles) +3. [System Architecture](#system-architecture) +4. [Layer Details](#layer-details) +5. [Design Patterns](#design-patterns) +6. [Data Flow](#data-flow) +7. [Component Diagrams](#component-diagrams) +8. [SOLID Principles](#solid-principles) +9. [Testing Strategy](#testing-strategy) +10. [Deployment Architecture](#deployment-architecture) + +--- + +## 🎯 Overview + +Chronos2 Server is a **time series forecasting API** powered by Amazon's Chronos-2 model. The system follows **Clean Architecture** principles with strict layer separation and **SOLID** design principles. + +### Key Features + +- ✅ **Probabilistic forecasting** with quantile predictions +- ✅ **Anomaly detection** using forecast bounds +- ✅ **Backtesting** for model evaluation +- ✅ **Multi-series forecasting** support +- ✅ **Excel integration** via Office Add-in +- ✅ **REST API** with OpenAPI documentation + +### Technology Stack + +**Backend:** +- FastAPI (Python 3.10+) +- Chronos-2 (Amazon ML model) +- Pandas (Data manipulation) +- Pydantic (Data validation) + +**Frontend:** +- Office.js (Excel Add-in) +- Vanilla JavaScript (ES6+) +- HTML5/CSS3 + +**Testing:** +- pytest (Unit & Integration tests) +- pytest-cov (Coverage reports) +- FastAPI TestClient (API testing) + +**Deployment:** +- Docker (Containerization) +- HuggingFace Spaces (Hosting) + +--- + +## 🏗️ Architecture Principles + +### Clean Architecture + +The system follows **Clean Architecture** (Uncle Bob) with 4 distinct layers: + +``` +┌─────────────────────────────────────────────────────────┐ +│ Presentation Layer │ +│ (API Routes, Controllers, Excel UI) │ +└──────────────────┬──────────────────────────────────────┘ + │ Depends on ↓ +┌──────────────────▼──────────────────────────────────────┐ +│ Application Layer │ +│ (Use Cases, DTOs, Mappers) │ +└──────────────────┬──────────────────────────────────────┘ + │ Depends on ↓ +┌──────────────────▼──────────────────────────────────────┐ +│ Domain Layer │ +│ (Business Logic, Models, Services, Interfaces) │ +└──────────────────┬──────────────────────────────────────┘ + │ Depends on ↓ +┌──────────────────▼──────────────────────────────────────┐ +│ Infrastructure Layer │ +│ (External Services, ML Models, Config, DB) │ +└─────────────────────────────────────────────────────────┘ +``` + +**Dependency Rule**: Dependencies point **inward** only. Inner layers know nothing about outer layers. + +### Design Goals + +1. **Maintainability**: Easy to understand and modify +2. **Testability**: Components can be tested in isolation +3. **Scalability**: Easy to add new features +4. **Flexibility**: Easy to swap implementations +5. **Reliability**: Robust error handling + +--- + +## 🎨 System Architecture + +### High-Level Architecture + +``` +┌──────────────────────────────────────────────────────────────┐ +│ CLIENT LAYER │ +│ ┌────────────────────┐ ┌────────────────────┐ │ +│ │ Excel Add-in │ │ REST Clients │ │ +│ │ (Office.js) │ │ (curl, Postman) │ │ +│ └────────┬───────────┘ └────────┬───────────┘ │ +└───────────┼──────────────────────────────┼───────────────────┘ + │ │ + └──────────────┬───────────────┘ + │ HTTP/HTTPS + ┌──────────────▼──────────────┐ + │ FastAPI Server │ + │ (API Gateway/Router) │ + └──────────────┬──────────────┘ + │ + ┌──────────────────┴──────────────────┐ + │ │ +┌───────▼────────┐ ┌────────▼────────┐ +│ API Routes │ │ Static Files │ +│ /forecast │ │ (Excel UI) │ +│ /anomaly │ └─────────────────┘ +│ /backtest │ +│ /health │ +└───────┬────────┘ + │ + │ Dependency Injection + │ +┌───────▼────────────────────────────────────────┐ +│ APPLICATION LAYER │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Use Cases │ │ Mappers │ │ +│ │ (Business │ │ (DTO ↔ │ │ +│ │ Workflows) │ │ Domain) │ │ +│ └──────┬───────┘ └──────────────┘ │ +└─────────┼──────────────────────────────────────┘ + │ +┌─────────▼──────────────────────────────────────┐ +│ DOMAIN LAYER │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Services │ │ Models │ │ +│ │ - Forecast │ │ - TimeSeries│ │ +│ │ - Anomaly │ │ - Config │ │ +│ │ - Backtest │ │ - Result │ │ +│ └──────┬───────┘ └──────────────┘ │ +│ │ │ +│ ┌──────▼───────────────────┐ │ +│ │ Interfaces │ │ +│ │ - IForecastModel │ │ +│ │ - IDataTransformer │ │ +│ └──────────────────────────┘ │ +└────────────────────────────────────────────────┘ + │ +┌─────────▼──────────────────────────────────────┐ +│ INFRASTRUCTURE LAYER │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ ML Models │ │ Config │ │ +│ │ - Chronos2 │ │ - Settings │ │ +│ │ - Factory │ │ - Logger │ │ +│ └──────────────┘ └──────────────┘ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Transformers│ │ Generators │ │ +│ │ - DataFrame │ │ - Timestamp │ │ +│ │ - Builder │ │ │ │ +│ └──────────────┘ └──────────────┘ │ +└────────────────────────────────────────────────┘ +``` + +--- + +## 📦 Layer Details + +### 1. Presentation Layer (API) + +**Location**: `app/api/` + +**Responsibilities**: +- HTTP request/response handling +- Input validation (Pydantic) +- Error handling and formatting +- API documentation (OpenAPI) +- CORS middleware + +**Components**: +``` +app/api/ +├── dependencies.py # Dependency injection setup +├── routes/ +│ ├── health.py # Health check endpoints +│ ├── forecast.py # Forecasting endpoints +│ ├── anomaly.py # Anomaly detection endpoints +│ └── backtest.py # Backtesting endpoints +└── middleware/ + └── cors.py # CORS configuration +``` + +**Example Route**: +```python +@router.post("/forecast/univariate") +async def forecast_univariate( + request: ForecastUnivariateRequest, + use_case: ForecastUseCase = Depends(get_forecast_use_case) +) -> ForecastUnivariateResponse: + """Univariate forecasting endpoint""" + # 1. Validate request (Pydantic) + # 2. Execute use case + # 3. Return response + result = use_case.execute(request) + return result +``` + +**Key Principles**: +- ✅ **SRP**: Routes only handle HTTP concerns +- ✅ **DIP**: Depends on use cases (abstractions) +- ✅ No business logic in routes + +--- + +### 2. Application Layer + +**Location**: `app/application/` + +**Responsibilities**: +- Orchestrate business workflows (Use Cases) +- Transform between API and Domain models (Mappers) +- Coordinate multiple domain services +- Transaction boundaries + +**Components**: +``` +app/application/ +├── dtos/ # Data Transfer Objects +│ ├── forecast_dtos.py # Forecast DTOs +│ ├── anomaly_dtos.py # Anomaly DTOs +│ └── backtest_dtos.py # Backtest DTOs +├── use_cases/ # Business workflows +│ ├── forecast_use_case.py +│ ├── anomaly_use_case.py +│ └── backtest_use_case.py +└── mappers/ # DTO ↔ Domain mapping + ├── forecast_mapper.py + ├── anomaly_mapper.py + └── backtest_mapper.py +``` + +**Example Use Case**: +```python +class ForecastUnivariateUseCase: + """Orchestrates univariate forecasting workflow""" + + def __init__(self, forecast_service: ForecastService): + self.forecast_service = forecast_service + + def execute(self, input_dto: ForecastInputDTO) -> ForecastOutputDTO: + # 1. Validate DTO + input_dto.validate() + + # 2. Map DTO → Domain + series = mapper.to_time_series(input_dto) + config = mapper.to_forecast_config(input_dto) + + # 3. Execute domain logic + result = self.forecast_service.forecast_univariate(series, config) + + # 4. Map Domain → DTO + output_dto = mapper.to_output_dto(result) + + return output_dto +``` + +**Key Principles**: +- ✅ **SRP**: Use cases orchestrate, don't implement logic +- ✅ **OCP**: New use cases without modifying existing +- ✅ **DIP**: Depends on domain interfaces + +--- + +### 3. Domain Layer (Core) + +**Location**: `app/domain/` + +**Responsibilities**: +- Define business rules +- Implement core algorithms +- Define domain models (entities, value objects) +- Define interfaces (ports) + +**Components**: +``` +app/domain/ +├── models/ # Domain models +│ ├── time_series.py # TimeSeries entity +│ ├── forecast_config.py # ForecastConfig value object +│ ├── forecast_result.py # ForecastResult entity +│ └── anomaly.py # Anomaly models +├── services/ # Business logic +│ ├── forecast_service.py +│ ├── anomaly_service.py +│ └── backtest_service.py +└── interfaces/ # Abstractions (ports) + ├── forecast_model.py # IForecastModel + └── data_transformer.py # IDataTransformer +``` + +**Example Domain Service**: +```python +class ForecastService: + """Domain service for forecasting logic""" + + def __init__( + self, + model: IForecastModel, # Abstraction (DIP) + transformer: IDataTransformer + ): + self.model = model + self.transformer = transformer + + def forecast_univariate( + self, + series: TimeSeries, # Domain model + config: ForecastConfig + ) -> ForecastResult: + # 1. Validate domain rules + if not series.validate(): + raise ValueError("Invalid time series") + + # 2. Transform to ML format + context_df = self.transformer.build_context_df( + series.values, series.timestamps + ) + + # 3. Call ML model + pred_df = self.model.predict( + context_df, + config.prediction_length, + config.quantile_levels + ) + + # 4. Transform back to domain + result = self.transformer.parse_prediction_result(pred_df) + + # 5. Return domain model + return ForecastResult(**result) +``` + +**Key Principles**: +- ✅ **SRP**: Each service has one business responsibility +- ✅ **DIP**: Depends on interfaces, not implementations +- ✅ **ISP**: Small, focused interfaces +- ✅ No dependencies on outer layers + +--- + +### 4. Infrastructure Layer + +**Location**: `app/infrastructure/` + +**Responsibilities**: +- Implement domain interfaces (adapters) +- External service integration (ML models, databases) +- Configuration management +- Logging, monitoring + +**Components**: +``` +app/infrastructure/ +├── ml/ # ML model implementations +│ ├── chronos_model.py # Chronos2 adapter +│ └── model_factory.py # Factory pattern +├── config/ # Configuration +│ └── settings.py # Pydantic settings +└── persistence/ # Data persistence (future) + └── cache.py # Caching layer +``` + +**Example Infrastructure**: +```python +class ChronosModel(IForecastModel): + """Adapter for Chronos-2 model (DIP)""" + + def __init__(self, model_id: str, device_map: str): + self.pipeline = Chronos2Pipeline.from_pretrained( + model_id, device_map=device_map + ) + + def predict( + self, + context_df: pd.DataFrame, + prediction_length: int, + quantile_levels: List[float] + ) -> pd.DataFrame: + """Implements IForecastModel interface""" + return self.pipeline.predict_df( + context_df, + prediction_length=prediction_length, + quantile_levels=quantile_levels + ) +``` + +**Factory Pattern**: +```python +class ModelFactory: + """Factory for creating forecast models (OCP)""" + + _models = { + "chronos2": ChronosModel, + # Future: "prophet": ProphetModel, + # Future: "arima": ARIMAModel, + } + + @classmethod + def create(cls, model_type: str, **kwargs) -> IForecastModel: + """Create model instance""" + if model_type not in cls._models: + raise ValueError(f"Unknown model: {model_type}") + + model_class = cls._models[model_type] + return model_class(**kwargs) + + @classmethod + def register_model(cls, name: str, model_class: Type[IForecastModel]): + """Register new model (OCP - extension)""" + cls._models[name] = model_class +``` + +**Key Principles**: +- ✅ **DIP**: Implements domain interfaces +- ✅ **OCP**: Factory allows extension without modification +- ✅ **SRP**: Each adapter has one external responsibility + +--- + +## 🎨 Design Patterns + +### 1. Dependency Injection (DI) + +**Implementation**: FastAPI `Depends()` + +```python +# Define dependencies +def get_forecast_model() -> IForecastModel: + """Singleton model instance""" + return ModelFactory.create("chronos2", model_id=settings.model_id) + +def get_forecast_service( + model: IForecastModel = Depends(get_forecast_model), + transformer: IDataTransformer = Depends(get_data_transformer) +) -> ForecastService: + """Inject dependencies""" + return ForecastService(model=model, transformer=transformer) + +# Use in routes +@router.post("/forecast/univariate") +async def forecast( + use_case: ForecastUseCase = Depends(get_forecast_use_case) +): + return use_case.execute(...) +``` + +**Benefits**: +- ✅ Loose coupling +- ✅ Easy testing (mock dependencies) +- ✅ Configurable at runtime + +--- + +### 2. Factory Pattern + +**Implementation**: `ModelFactory` + +```python +# Create model +model = ModelFactory.create("chronos2", model_id="amazon/chronos-2") + +# Extend without modifying factory +ModelFactory.register_model("custom", CustomModel) +model = ModelFactory.create("custom", ...) +``` + +**Benefits**: +- ✅ OCP compliance (Open for extension) +- ✅ Centralized model creation +- ✅ Easy to add new models + +--- + +### 3. Repository Pattern (Implicit) + +**Implementation**: `DataFrameBuilder` + +```python +class DataFrameBuilder(IDataTransformer): + """Repository-like interface for data""" + + def build_context_df(self, values, timestamps): + """Build context from raw data""" + ... + + def parse_prediction_result(self, pred_df): + """Parse model output""" + ... +``` + +**Benefits**: +- ✅ Data access abstraction +- ✅ Easy to swap data sources +- ✅ Testable with mocks + +--- + +### 4. Strategy Pattern (Implicit) + +**Implementation**: `IForecastModel` interface + +```python +# Different strategies +model1 = ChronosModel(...) +model2 = ProphetModel(...) # Future + +# Same interface +service = ForecastService(model=model1) # ✅ +service = ForecastService(model=model2) # ✅ +``` + +**Benefits**: +- ✅ LSP compliance (Liskov Substitution) +- ✅ Interchangeable implementations +- ✅ Easy A/B testing + +--- + +## 🔄 Data Flow + +### Forecast Request Flow + +``` +1. Client Request + ↓ + POST /forecast/univariate + Body: {"values": [100, 102, 105], "prediction_length": 3} + +2. API Layer (Route) + ↓ + - Validate request (Pydantic) + - Inject use case + ↓ + ForecastUnivariateUseCase + +3. Application Layer (Use Case) + ↓ + - Validate DTO + - Map DTO → Domain models + ↓ + ForecastService + +4. Domain Layer (Service) + ↓ + - Validate business rules + - Call transformer + ↓ + DataFrameBuilder.build_context_df() + +5. Infrastructure Layer (Transformer) + ↓ + - Build DataFrame + ↓ + Back to Domain (Service) + ↓ + - Call model + ↓ + IForecastModel.predict() + +6. Infrastructure Layer (Model) + ↓ + - Chronos2Pipeline.predict_df() + ↓ + Back to Domain (Service) + ↓ + - Parse result + ↓ + DataFrameBuilder.parse_prediction_result() + +7. Domain Layer (Service) + ↓ + - Create ForecastResult (domain model) + ↓ + Back to Application (Use Case) + +8. Application Layer (Use Case) + ↓ + - Map Domain → DTO + ↓ + Back to API (Route) + +9. API Layer (Route) + ↓ + - Serialize response (Pydantic) + ↓ + 200 OK + Body: {"timestamps": [...], "median": [...], "quantiles": {...}} + +10. Client Response +``` + +**Key Observations**: +- ✅ Clear layer boundaries +- ✅ Each layer has specific responsibility +- ✅ Dependencies point inward +- ✅ Domain models never leak to API + +--- + +## 📊 Component Diagrams + +### Forecasting Component Interaction + +``` +┌─────────────────────────────────────────────────────────┐ +│ CLIENT │ +└────────────────────┬────────────────────────────────────┘ + │ HTTP POST + ┌──────────▼──────────┐ + │ ForecastController │ (API Layer) + └──────────┬──────────┘ + │ execute() + ┌──────────▼──────────────┐ + │ ForecastUseCase │ (Application Layer) + └──────────┬──────────────┘ + │ forecast_univariate() + ┌──────────▼──────────────┐ + │ ForecastService │ (Domain Layer) + │ ┌──────────────────┐ │ + │ │ TimeSeries │ │ + │ │ ForecastConfig │ │ + │ │ ForecastResult │ │ + │ └──────────────────┘ │ + └──────┬──────────┬───────┘ + │ │ + build_df() │ │ predict() + │ │ + ┌──────────▼────┐ ┌─▼──────────────┐ + │ DataFrame │ │ ChronosModel │ (Infrastructure) + │ Builder │ │ (IForecastModel)│ + └────────────────┘ └────────────────┘ + │ + ┌─────▼──────┐ + │ Chronos2 │ + │ Pipeline │ + └────────────┘ +``` + +--- + +## 🎯 SOLID Principles + +### Single Responsibility Principle (SRP) ✅ + +**Each class has ONE reason to change** + +```python +# ✅ Good: Separate responsibilities +class ForecastService: + """Only forecasting logic""" + def forecast_univariate(self, series, config): + ... + +class DataFrameBuilder: + """Only data transformation""" + def build_context_df(self, values): + ... + +class ChronosModel: + """Only ML inference""" + def predict(self, context_df): + ... +``` + +**Violations to avoid**: +```python +# ❌ Bad: Multiple responsibilities +class ForecastServiceBad: + def forecast(self, values): + # Data transformation (should be separate) + df = self._build_dataframe(values) + # ML inference (should be separate) + pred = self._call_model(df) + # HTTP response (should be in API layer) + return {"status": 200, "data": pred} +``` + +--- + +### Open/Closed Principle (OCP) ✅ + +**Open for extension, closed for modification** + +```python +# ✅ Good: Extension without modification +class ModelFactory: + _models = {"chronos2": ChronosModel} + + @classmethod + def register_model(cls, name, model_class): + """Extend by registering new models""" + cls._models[name] = model_class + +# Add new model without modifying factory +ModelFactory.register_model("prophet", ProphetModel) +``` + +**Example extension**: +```python +# New model type (no changes to existing code) +class ProphetModel(IForecastModel): + def predict(self, context_df, prediction_length, quantile_levels): + # Prophet-specific implementation + ... + +# Register and use +ModelFactory.register_model("prophet", ProphetModel) +model = ModelFactory.create("prophet") +service = ForecastService(model=model) # Works! +``` + +--- + +### Liskov Substitution Principle (LSP) ✅ + +**Subtypes must be substitutable for their base types** + +```python +# ✅ Good: Any IForecastModel works +def forecast_with_any_model(model: IForecastModel): + result = model.predict(df, 7, [0.5]) + # Works with ChronosModel, ProphetModel, etc. + return result + +# All implementations honor the contract +model1 = ChronosModel(...) +model2 = ProphetModel(...) # Future + +forecast_with_any_model(model1) # ✅ Works +forecast_with_any_model(model2) # ✅ Works +``` + +--- + +### Interface Segregation Principle (ISP) ✅ + +**Clients shouldn't depend on methods they don't use** + +```python +# ✅ Good: Small, focused interfaces +class IForecastModel(ABC): + """Only forecasting methods""" + @abstractmethod + def predict(self, context_df, prediction_length, quantile_levels): + pass + + @abstractmethod + def get_model_info(self): + pass + +class IDataTransformer(ABC): + """Only transformation methods""" + @abstractmethod + def build_context_df(self, values): + pass + + @abstractmethod + def parse_prediction_result(self, pred_df): + pass +``` + +**Violations to avoid**: +```python +# ❌ Bad: Fat interface +class IForecastModelBad(ABC): + @abstractmethod + def predict(self, ...): pass + + @abstractmethod + def build_dataframe(self, ...): pass # Should be separate + + @abstractmethod + def validate_input(self, ...): pass # Should be separate + + @abstractmethod + def format_response(self, ...): pass # Should be separate +``` + +--- + +### Dependency Inversion Principle (DIP) ✅ + +**Depend on abstractions, not concretions** + +```python +# ✅ Good: Depends on abstraction +class ForecastService: + def __init__( + self, + model: IForecastModel, # Abstract interface + transformer: IDataTransformer # Abstract interface + ): + self.model = model + self.transformer = transformer +``` + +**Violations to avoid**: +```python +# ❌ Bad: Depends on concrete implementation +class ForecastServiceBad: + def __init__(self): + # Coupled to Chronos2Pipeline directly + self.model = Chronos2Pipeline.from_pretrained(...) + # Coupled to DataFrameBuilder directly + self.transformer = DataFrameBuilder() +``` + +--- + +## 🧪 Testing Strategy + +### Test Pyramid + +``` + ╱╲ + ╱ ╲ + ╱ E2E ╲ 10 tests (Integration) + ╱────────╲ + ╱ ╲ + ╱ Integration╲ 25 tests (API, Services) + ╱──────────────╲ + ╱ ╲ + ╱ Unit Tests ╲ 45 tests (Fast, Isolated) +╱────────────────────╲ +``` + +### Unit Tests (45+) + +**Focus**: Individual components in isolation + +**Tools**: pytest, unittest.mock + +**Example**: +```python +def test_forecast_service(mock_model, mock_transformer): + """Test service logic with mocks""" + service = ForecastService(mock_model, mock_transformer) + + series = TimeSeries(values=[100, 102, 105]) + config = ForecastConfig(prediction_length=3) + + result = service.forecast_univariate(series, config) + + assert len(result.timestamps) == 3 + mock_model.predict.assert_called_once() +``` + +### Integration Tests (25+) + +**Focus**: Multiple components working together + +**Tools**: FastAPI TestClient + +**Example**: +```python +@patch('app.infrastructure.ml.chronos_model.Chronos2Pipeline') +def test_forecast_endpoint_e2e(mock_pipeline): + """Test complete API flow""" + mock_pipeline.predict_df.return_value = sample_df + + response = client.post("/forecast/univariate", json={ + "values": [100, 102, 105], + "prediction_length": 3 + }) + + assert response.status_code == 200 + data = response.json() + assert "timestamps" in data +``` + +### Test Coverage + +- **Domain Layer**: 80% +- **Application Layer**: 70% +- **Infrastructure Layer**: 85% +- **API Layer**: 90% +- **Overall**: ~80% + +--- + +## 🚀 Deployment Architecture + +### Container Architecture + +``` +┌─────────────────────────────────────────────────┐ +│ Docker Container │ +│ ┌───────────────────────────────────────────┐ │ +│ │ FastAPI Application │ │ +│ │ ┌─────────────┐ ┌──────────────┐ │ │ +│ │ │ API Server │ │ Static Files│ │ │ +│ │ │ (Port 8000)│ │ (Excel UI) │ │ │ +│ │ └─────────────┘ └──────────────┘ │ │ +│ └───────────────────────────────────────────┘ │ +│ ┌───────────────────────────────────────────┐ │ +│ │ Chronos-2 Model │ │ +│ │ (amazon/chronos-2) │ │ +│ └───────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────┘ + │ + ┌─────────▼─────────┐ + │ HuggingFace │ + │ Spaces │ + │ (Public URL) │ + └───────────────────┘ +``` + +### Environment Configuration + +**Production**: +- HuggingFace Spaces +- CPU inference (free tier) +- Public HTTPS endpoint + +**Development**: +- Local Docker +- Hot reload +- Debug mode + +**Testing**: +- CI/CD pipeline +- Automated tests +- Coverage reports + +--- + +## 📈 Performance Considerations + +### Model Loading + +```python +# Singleton pattern for model (loaded once) +_model_instance = None + +def get_forecast_model(): + global _model_instance + if _model_instance is None: + _model_instance = ModelFactory.create("chronos2") + return _model_instance +``` + +### Caching Strategy (Future) + +```python +# Redis cache for repeated forecasts +@cache(ttl=3600) +def forecast_univariate(values, prediction_length): + ... +``` + +### Async Processing (Future) + +```python +# Background tasks for long forecasts +@router.post("/forecast/async") +async def forecast_async(background_tasks: BackgroundTasks): + background_tasks.add_task(long_forecast) + return {"task_id": "..."} +``` + +--- + +## 🔐 Security Considerations + +### Input Validation + +- ✅ Pydantic validation at API layer +- ✅ Domain validation in services +- ✅ Type hints throughout + +### Error Handling + +- ✅ Structured error responses +- ✅ No sensitive data in errors +- ✅ Logging for debugging + +### CORS Configuration + +```python +# Configurable CORS +app.add_middleware( + CORSMiddleware, + allow_origins=settings.cors_origins, + allow_methods=["GET", "POST"], + allow_headers=["*"] +) +``` + +--- + +## 📚 References + +### Architecture Patterns + +- **Clean Architecture**: Robert C. Martin +- **Domain-Driven Design**: Eric Evans +- **SOLID Principles**: Robert C. Martin + +### Frameworks & Libraries + +- **FastAPI**: https://fastapi.tiangolo.com/ +- **Chronos**: https://github.com/amazon-science/chronos-forecasting +- **Pydantic**: https://docs.pydantic.dev/ + +--- + +## 🎓 Learning Resources + +### For New Developers + +1. Read `DEVELOPMENT.md` for setup instructions +2. Review `API.md` for endpoint documentation +3. Study test examples in `tests/` +4. Start with simple features (add endpoint) + +### Architecture Books + +- "Clean Architecture" by Robert C. Martin +- "Domain-Driven Design" by Eric Evans +- "Patterns of Enterprise Application Architecture" by Martin Fowler + +--- + +**Last Updated**: 2025-11-09 +**Version**: 3.0.0 +**Maintainer**: Claude AI diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..c731fde292ee439e12806d240dfcfd3425673059 --- /dev/null +++ b/docs/DEPLOYMENT.md @@ -0,0 +1,791 @@ +# 🚀 Chronos2 Server - Deployment Guide + +**Version**: 3.0.0 +**Date**: 2025-11-09 +**Target**: Production & Staging Environments + +--- + +## 📋 Table of Contents + +1. [Overview](#overview) +2. [Prerequisites](#prerequisites) +3. [Quick Deployment](#quick-deployment) +4. [Deployment Methods](#deployment-methods) +5. [Environment Configuration](#environment-configuration) +6. [Monitoring & Logging](#monitoring--logging) +7. [Scaling](#scaling) +8. [Troubleshooting](#troubleshooting) +9. [Rollback Procedures](#rollback-procedures) + +--- + +## 🎯 Overview + +This guide covers deploying Chronos2 Server to: +- **Local/Development**: For testing +- **Staging**: Pre-production environment +- **Production**: Live environment (HuggingFace Spaces or self-hosted) + +### Deployment Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ Load Balancer │ +│ (Optional - Nginx) │ +└──────────────────┬──────────────────────────────────┘ + │ + ┌─────────┴─────────┐ + │ │ + ┌────▼────┐ ┌────▼────┐ + │ Worker 1│ │ Worker 2│ + │ (Docker)│ │ (Docker)│ + └────┬────┘ └────┬────┘ + │ │ + └─────────┬─────────┘ + │ + ┌─────────▼──────────┐ + │ Shared Storage │ + │ (Model Cache) │ + └────────────────────┘ +``` + +--- + +## 🔧 Prerequisites + +### Required + +- **Docker**: 20.10+ or **Docker Compose**: 1.29+ +- **Python**: 3.10+ (for local development) +- **Git**: For version control +- **8GB RAM**: Minimum for model loading +- **10GB Disk**: For model cache + +### Optional + +- **HuggingFace Account**: For HF Spaces deployment +- **GitHub Account**: For CI/CD +- **Domain Name**: For custom URL +- **SSL Certificate**: For HTTPS (Let's Encrypt recommended) + +### Check Prerequisites + +```bash +# Check Docker +docker --version +docker-compose --version + +# Check Python +python3 --version + +# Check available resources +free -h # RAM +df -h # Disk space +``` + +--- + +## ⚡ Quick Deployment + +### Method 1: Automated Deployment Script + +**Staging**: +```bash +./deploy.sh staging +``` + +**Production**: +```bash +./deploy.sh production v3.0.0 +``` + +The script will: +1. ✅ Run pre-deployment checks +2. ✅ Build Docker image +3. ✅ Run tests (production only) +4. ✅ Deploy container +5. ✅ Verify deployment +6. ✅ Display access URLs + +--- + +### Method 2: Docker Compose + +**Development**: +```bash +docker-compose up -d +``` + +**Production**: +```bash +docker-compose -f docker-compose.prod.yml up -d +``` + +--- + +### Method 3: Manual Docker + +```bash +# Build image +docker build -f Dockerfile.production -t chronos2-server:latest . + +# Run container +docker run -d \ + --name chronos2-api \ + -p 8000:8000 \ + -e MODEL_ID=amazon/chronos-2 \ + -e DEVICE_MAP=cpu \ + --restart unless-stopped \ + chronos2-server:latest + +# Check status +docker ps +docker logs chronos2-api +``` + +--- + +## 🏗️ Deployment Methods + +### 1. HuggingFace Spaces (Recommended for Public APIs) + +**Advantages**: +- ✅ Free hosting +- ✅ Automatic HTTPS +- ✅ Built-in CDN +- ✅ Git-based deployment + +**Steps**: + +```bash +# 1. Install HuggingFace CLI +pip install huggingface_hub + +# 2. Login +huggingface-cli login + +# 3. Create Space +huggingface-cli repo create chronos2-server --type=space --space_sdk=docker + +# 4. Configure Space +# Create README.md in repo root: +cat > README.md << 'EOF' +--- +title: Chronos2 Forecasting API +emoji: 📊 +colorFrom: blue +colorTo: green +sdk: docker +pinned: false +--- + +# Chronos2 Server + +Time series forecasting API powered by Amazon Chronos-2. +EOF + +# 5. Push to HuggingFace +git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/chronos2-server +git push hf main +``` + +**HuggingFace Space Configuration** (`Dockerfile`): +```dockerfile +FROM python:3.10-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app/ ./app/ +COPY static/ ./static/ + +ENV PORT=7860 +EXPOSE 7860 + +CMD ["uvicorn", "app.main_v3:app", "--host", "0.0.0.0", "--port", "7860"] +``` + +--- + +### 2. Self-Hosted (VPS/Cloud) + +**Supported Platforms**: +- AWS EC2 +- Google Cloud Compute +- DigitalOcean Droplets +- Linode +- Azure VMs + +**Example: AWS EC2** + +```bash +# 1. Launch EC2 instance (t3.medium or larger) +# - Ubuntu 22.04 LTS +# - 8GB RAM minimum +# - 20GB storage + +# 2. SSH into instance +ssh ubuntu@your-instance-ip + +# 3. Install Docker +curl -fsSL https://get.docker.com -o get-docker.sh +sh get-docker.sh +sudo usermod -aG docker ubuntu + +# 4. Clone repository +git clone https://github.com/yourusername/chronos2-server.git +cd chronos2-server + +# 5. Deploy +./deploy.sh production + +# 6. Configure firewall +sudo ufw allow 8000/tcp +sudo ufw enable +``` + +--- + +### 3. Kubernetes (Enterprise) + +**Deployment YAML** (`k8s/deployment.yaml`): + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chronos2-server + namespace: production +spec: + replicas: 2 + selector: + matchLabels: + app: chronos2-server + template: + metadata: + labels: + app: chronos2-server + spec: + containers: + - name: api + image: chronos2-server:3.0.0 + ports: + - containerPort: 8000 + env: + - name: MODEL_ID + value: "amazon/chronos-2" + - name: LOG_LEVEL + value: "INFO" + resources: + requests: + memory: "2Gi" + cpu: "1" + limits: + memory: "4Gi" + cpu: "2" + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 60 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 +--- +apiVersion: v1 +kind: Service +metadata: + name: chronos2-service + namespace: production +spec: + selector: + app: chronos2-server + ports: + - protocol: TCP + port: 80 + targetPort: 8000 + type: LoadBalancer +``` + +**Deploy to Kubernetes**: +```bash +kubectl apply -f k8s/deployment.yaml +kubectl get pods -n production +kubectl get services -n production +``` + +--- + +## ⚙️ Environment Configuration + +### Environment Variables + +Create `.env` file: + +```ini +# API Configuration +API_TITLE=Chronos-2 Forecasting API +API_VERSION=3.0.0 +API_PORT=8000 + +# Model Configuration +MODEL_ID=amazon/chronos-2 +DEVICE_MAP=cpu # or "cuda" for GPU + +# CORS Configuration +CORS_ORIGINS=["https://yourdomain.com","https://app.yourdomain.com"] + +# Logging +LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL + +# Performance +WORKERS=2 # Number of Uvicorn workers +MAX_REQUESTS=1000 # Restart worker after N requests +TIMEOUT=300 # Request timeout in seconds + +# Cache (if using Redis) +REDIS_URL=redis://localhost:6379 +CACHE_TTL=3600 # Cache time-to-live in seconds +``` + +### Production Best Practices + +```ini +# Production .env +LOG_LEVEL=WARNING +WORKERS=4 +MAX_REQUESTS=500 +TIMEOUT=120 +CORS_ORIGINS=["https://api.yourdomain.com"] +``` + +### Staging Best Practices + +```ini +# Staging .env +LOG_LEVEL=DEBUG +WORKERS=1 +MAX_REQUESTS=100 +TIMEOUT=300 +CORS_ORIGINS=["*"] +``` + +--- + +## 📊 Monitoring & Logging + +### Application Logs + +**View logs**: +```bash +# Docker logs +docker logs chronos2-api -f + +# Log file (if mounted volume) +tail -f logs/app.log +``` + +**Log Format** (JSON structured): +```json +{ + "timestamp": "2025-11-09T12:00:00Z", + "level": "INFO", + "message": "Forecast request completed", + "request_id": "abc123", + "duration_ms": 1250, + "endpoint": "/forecast/univariate" +} +``` + +### Health Monitoring + +**Health Check Endpoint**: +```bash +curl http://localhost:8000/health +``` + +**Response**: +```json +{ + "status": "healthy", + "timestamp": "2025-11-09T12:00:00Z", + "version": "3.0.0", + "model_loaded": true +} +``` + +### Performance Metrics + +**Add Prometheus Metrics** (Optional): + +```python +# app/utils/metrics.py +from prometheus_client import Counter, Histogram, Gauge + +# Counters +requests_total = Counter('http_requests_total', 'Total HTTP requests', ['method', 'endpoint', 'status']) +forecast_requests = Counter('forecast_requests_total', 'Total forecast requests') + +# Histograms +request_duration = Histogram('http_request_duration_seconds', 'HTTP request duration') +model_inference_duration = Histogram('model_inference_duration_seconds', 'Model inference duration') + +# Gauges +model_loaded = Gauge('model_loaded', 'Whether model is loaded') +``` + +**Prometheus Configuration** (`monitoring/prometheus.yml`): +```yaml +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'chronos2-api' + static_configs: + - targets: ['api:8000'] +``` + +--- + +## 📈 Scaling + +### Horizontal Scaling (Multiple Workers) + +**Docker Compose** (Scale to 3 replicas): +```bash +docker-compose -f docker-compose.prod.yml up -d --scale api=3 +``` + +**Load Balancer Configuration** (Nginx): +```nginx +upstream chronos2_backend { + least_conn; + server localhost:8001; + server localhost:8002; + server localhost:8003; +} + +server { + listen 80; + server_name api.yourdomain.com; + + location / { + proxy_pass http://chronos2_backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_connect_timeout 300s; + proxy_send_timeout 300s; + proxy_read_timeout 300s; + } +} +``` + +### Vertical Scaling (More Resources) + +**Increase Docker resources**: +```yaml +# docker-compose.prod.yml +deploy: + resources: + limits: + cpus: '4' + memory: 8G + reservations: + cpus: '2' + memory: 4G +``` + +### Caching Strategy + +**Redis Caching** (Future Enhancement): +```python +# app/infrastructure/cache/redis_cache.py +import redis +import json + +class ForecastCache: + def __init__(self, redis_url: str): + self.redis = redis.from_url(redis_url) + + def get_forecast(self, key: str): + """Get cached forecast""" + cached = self.redis.get(f"forecast:{key}") + return json.loads(cached) if cached else None + + def set_forecast(self, key: str, data: dict, ttl: int = 3600): + """Cache forecast for TTL seconds""" + self.redis.setex( + f"forecast:{key}", + ttl, + json.dumps(data) + ) +``` + +--- + +## 🔧 Troubleshooting + +### Common Issues + +#### Issue 1: Container Won't Start + +**Symptom**: Container exits immediately + +**Diagnosis**: +```bash +docker logs chronos2-api +``` + +**Solutions**: +- Check if port 8000 is available: `netstat -tuln | grep 8000` +- Verify environment variables: `docker inspect chronos2-api` +- Check disk space: `df -h` +- Verify model can be downloaded: Check internet connection + +--- + +#### Issue 2: Model Loading Failed + +**Symptom**: "Failed to load model" error + +**Solutions**: +```bash +# Check internet connectivity +curl -I https://huggingface.co + +# Clear model cache +rm -rf ~/.cache/huggingface + +# Increase memory +docker run -d --memory="8g" ... + +# Pre-download model +docker exec chronos2-api python -c "from chronos import Chronos2Pipeline; Chronos2Pipeline.from_pretrained('amazon/chronos-2')" +``` + +--- + +#### Issue 3: High Memory Usage + +**Symptom**: OOM (Out of Memory) errors + +**Solutions**: +- Reduce number of workers: `WORKERS=1` +- Add swap space: `sudo dd if=/dev/zero of=/swapfile bs=1G count=4` +- Use smaller model variant (if available) +- Implement request queuing + +--- + +#### Issue 4: Slow Response Times + +**Symptom**: Requests timeout or take too long + +**Diagnosis**: +```bash +# Check request logs +docker logs chronos2-api | grep "duration_ms" + +# Monitor resource usage +docker stats chronos2-api +``` + +**Solutions**: +- Enable caching (Redis) +- Optimize prediction_length (smaller = faster) +- Add more workers +- Use GPU if available: `DEVICE_MAP=cuda` + +--- + +### Health Check Failed + +```bash +# Manual health check +curl -v http://localhost:8000/health + +# Check if process is running +docker exec chronos2-api ps aux + +# Check logs for errors +docker logs chronos2-api --tail 100 + +# Restart container +docker restart chronos2-api +``` + +--- + +## 🔄 Rollback Procedures + +### Rollback to Previous Version + +```bash +# 1. Stop current version +docker stop chronos2-api + +# 2. Start previous version +docker run -d \ + --name chronos2-api \ + -p 8000:8000 \ + chronos2-server:v2.1.1 # Previous version + +# 3. Verify +curl http://localhost:8000/health +``` + +### Blue-Green Deployment + +```bash +# 1. Deploy new version (Green) on different port +docker run -d \ + --name chronos2-api-green \ + -p 8001:8000 \ + chronos2-server:v3.0.0 + +# 2. Test green deployment +curl http://localhost:8001/health + +# 3. Switch traffic (update load balancer) +# Update Nginx config to point to 8001 + +# 4. Stop old version (Blue) +docker stop chronos2-api-blue +``` + +### Canary Deployment + +```bash +# 1. Route 10% traffic to new version +# Configure load balancer: +# - 90% to v2.1.1 +# - 10% to v3.0.0 + +# 2. Monitor metrics + +# 3. Gradually increase to 50%, then 100% + +# 4. Decommission old version +``` + +--- + +## 🔐 Security Best Practices + +### 1. Use Non-Root User +```dockerfile +# Already implemented in Dockerfile.production +USER appuser +``` + +### 2. Environment Secrets +```bash +# Don't commit .env to git +echo ".env" >> .gitignore + +# Use secrets management +# - AWS Secrets Manager +# - HashiCorp Vault +# - Kubernetes Secrets +``` + +### 3. HTTPS/TLS +```bash +# Use Let's Encrypt for free SSL +sudo apt-get install certbot python3-certbot-nginx +sudo certbot --nginx -d api.yourdomain.com +``` + +### 4. Rate Limiting +```python +# Add to app/api/middleware/rate_limit.py +from slowapi import Limiter +from slowapi.util import get_remote_address + +limiter = Limiter(key_func=get_remote_address) + +@app.get("/forecast/univariate") +@limiter.limit("100/minute") +async def forecast(...): + ... +``` + +### 5. API Key Authentication +```python +# Add to app/api/dependencies.py +from fastapi import Security, HTTPException +from fastapi.security import APIKeyHeader + +api_key_header = APIKeyHeader(name="X-API-Key") + +async def verify_api_key(api_key: str = Security(api_key_header)): + if api_key != settings.api_key: + raise HTTPException(status_code=403, detail="Invalid API Key") + return api_key +``` + +--- + +## 📞 Support & Maintenance + +### Regular Maintenance Tasks + +**Weekly**: +- Check logs for errors +- Monitor resource usage +- Review security updates + +**Monthly**: +- Update dependencies: `pip install --upgrade -r requirements.txt` +- Clear old logs: `find logs/ -mtime +30 -delete` +- Review performance metrics + +**Quarterly**: +- Review and optimize database queries +- Update Docker base images +- Security audit + +### Backup Procedures + +```bash +# Backup logs +tar -czf logs-backup-$(date +%Y%m%d).tar.gz logs/ + +# Backup configuration +tar -czf config-backup-$(date +%Y%m%d).tar.gz .env docker-compose.prod.yml + +# Upload to S3 (example) +aws s3 cp logs-backup-*.tar.gz s3://your-backup-bucket/ +``` + +--- + +## 🎉 Deployment Checklist + +Before deploying to production: + +- [ ] All tests passing +- [ ] Documentation updated +- [ ] Environment variables configured +- [ ] SSL/TLS certificate installed +- [ ] Health check endpoint working +- [ ] Monitoring configured +- [ ] Backup procedures in place +- [ ] Rollback plan documented +- [ ] Load testing completed +- [ ] Security scan passed +- [ ] Team notified of deployment + +--- + +**Last Updated**: 2025-11-09 +**Version**: 3.0.0 +**Maintainer**: DevOps Team diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..203a3c7b6245c48817ff9cbcb100ccf3b7b8126d --- /dev/null +++ b/docs/DEVELOPMENT.md @@ -0,0 +1,878 @@ +# 👨‍💻 Chronos2 Server - Development Guide + +**Version**: 3.0.0 +**Date**: 2025-11-09 +**For**: Developers contributing to or extending the project + +--- + +## 📋 Table of Contents + +1. [Getting Started](#getting-started) +2. [Development Setup](#development-setup) +3. [Project Structure](#project-structure) +4. [Development Workflow](#development-workflow) +5. [Adding Features](#adding-features) +6. [Testing](#testing) +7. [Code Style](#code-style) +8. [Debugging](#debugging) +9. [Contributing](#contributing) + +--- + +## 🚀 Getting Started + +### Prerequisites + +- **Python**: 3.10 or higher +- **Git**: For version control +- **Docker**: (Optional) For containerized development +- **IDE**: VS Code, PyCharm, or similar + +### Quick Setup + +```bash +# 1. Clone repository +git clone https://github.com/yourusername/chronos2-server.git +cd chronos2-server + +# 2. Create virtual environment +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# 3. Install dependencies +pip install -r requirements.txt + +# 4. Run tests +pytest tests/ -v + +# 5. Start server +python -m uvicorn app.main_v3:app --reload --host 0.0.0.0 --port 8000 + +# 6. Open browser +# http://localhost:8000/docs +``` + +--- + +## 🛠️ Development Setup + +### 1. Environment Setup + +**Create `.env` file**: +```bash +cp .env.example .env +``` + +**Edit `.env`**: +```ini +# API Configuration +API_TITLE=Chronos-2 Forecasting API +API_VERSION=3.0.0 +API_PORT=8000 + +# Model Configuration +MODEL_ID=amazon/chronos-2 +DEVICE_MAP=cpu + +# CORS +CORS_ORIGINS=["http://localhost:3000","https://localhost:3001","*"] + +# Logging +LOG_LEVEL=INFO +``` + +--- + +### 2. Install Development Dependencies + +```bash +# Core dependencies +pip install -r requirements.txt + +# Development tools +pip install \ + pytest \ + pytest-cov \ + pytest-mock \ + black \ + flake8 \ + mypy \ + ipython + +# Optional: Pre-commit hooks +pip install pre-commit +pre-commit install +``` + +--- + +### 3. IDE Configuration + +#### VS Code + +**Create `.vscode/settings.json`**: +```json +{ + "python.defaultInterpreterPath": "${workspaceFolder}/venv/bin/python", + "python.linting.enabled": true, + "python.linting.flake8Enabled": true, + "python.formatting.provider": "black", + "python.testing.pytestEnabled": true, + "python.testing.pytestArgs": ["tests"], + "editor.formatOnSave": true, + "editor.rulers": [88] +} +``` + +**Create `.vscode/launch.json`**: +```json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: FastAPI", + "type": "python", + "request": "launch", + "module": "uvicorn", + "args": [ + "app.main_v3:app", + "--reload", + "--host", + "0.0.0.0", + "--port", + "8000" + ], + "jinja": true, + "justMyCode": false + }, + { + "name": "Python: Current Test File", + "type": "python", + "request": "launch", + "module": "pytest", + "args": [ + "${file}", + "-v" + ] + } + ] +} +``` + +--- + +#### PyCharm + +1. **Open Project**: File → Open → Select `chronos2-server/` +2. **Configure Interpreter**: Settings → Project → Python Interpreter → Add → Virtualenv → Existing → `venv/bin/python` +3. **Enable pytest**: Settings → Tools → Python Integrated Tools → Testing → pytest +4. **Run Configuration**: + - Click "Add Configuration" + - Script path: Select `uvicorn` + - Parameters: `app.main_v3:app --reload` + +--- + +## 📂 Project Structure + +``` +chronos2-server/ +├── app/ # Application code +│ ├── api/ # Presentation layer +│ │ ├── dependencies.py # Dependency injection +│ │ ├── routes/ # API endpoints +│ │ │ ├── health.py +│ │ │ ├── forecast.py +│ │ │ ├── anomaly.py +│ │ │ └── backtest.py +│ │ └── middleware/ +│ │ +│ ├── application/ # Application layer +│ │ ├── dtos/ # Data Transfer Objects +│ │ ├── use_cases/ # Business workflows +│ │ └── mappers/ # DTO ↔ Domain mapping +│ │ +│ ├── domain/ # Domain layer (Core) +│ │ ├── interfaces/ # Abstract interfaces +│ │ ├── models/ # Domain models +│ │ └── services/ # Business logic +│ │ +│ ├── infrastructure/ # Infrastructure layer +│ │ ├── config/ # Configuration +│ │ └── ml/ # ML model implementations +│ │ +│ ├── utils/ # Shared utilities +│ └── main_v3.py # Application entry point +│ +├── tests/ # Test suite +│ ├── conftest.py # Shared fixtures +│ ├── unit/ # Unit tests +│ └── integration/ # Integration tests +│ +├── docs/ # Documentation +│ ├── ARCHITECTURE.md +│ ├── API.md +│ └── DEVELOPMENT.md (this file) +│ +├── static/ # Frontend (Excel Add-in) +│ └── taskpane/ +│ +├── requirements.txt # Dependencies +├── pytest.ini # Pytest configuration +├── .env.example # Environment template +└── README.md # Project overview +``` + +--- + +## 🔄 Development Workflow + +### 1. Feature Development Cycle + +``` +1. Create Feature Branch + ↓ +2. Write Tests (TDD) + ↓ +3. Implement Feature + ↓ +4. Run Tests + ↓ +5. Code Review + ↓ +6. Merge to Main +``` + +### 2. Git Workflow + +```bash +# 1. Create feature branch +git checkout -b feature/add-prophet-model + +# 2. Make changes +# ... edit files ... + +# 3. Run tests +pytest tests/ -v + +# 4. Commit +git add . +git commit -m "feat: Add Prophet model support + +- Implement ProphetModel class +- Register in ModelFactory +- Add tests +" + +# 5. Push +git push origin feature/add-prophet-model + +# 6. Create Pull Request +gh pr create --title "Add Prophet model support" --body "..." +``` + +### 3. Commit Message Convention + +**Format**: `(): ` + +**Types**: +- `feat`: New feature +- `fix`: Bug fix +- `docs`: Documentation +- `test`: Tests +- `refactor`: Code refactoring +- `style`: Formatting +- `chore`: Maintenance + +**Examples**: +```bash +feat(api): Add streaming forecast endpoint +fix(domain): Handle empty time series +docs(api): Update forecast examples +test(services): Add anomaly service tests +refactor(infrastructure): Extract model loading +``` + +--- + +## ➕ Adding Features + +### Example: Add New ML Model + +**Step 1: Define Interface Implementation** + +Create `app/infrastructure/ml/prophet_model.py`: +```python +from typing import List, Dict, Any +import pandas as pd +from prophet import Prophet +from app.domain.interfaces.forecast_model import IForecastModel + +class ProphetModel(IForecastModel): + """Prophet model implementation""" + + def __init__(self, **kwargs): + self.model = Prophet(**kwargs) + self._fitted = False + + def predict( + self, + context_df: pd.DataFrame, + prediction_length: int, + quantile_levels: List[float], + **kwargs + ) -> pd.DataFrame: + """Implement predict method""" + # Convert to Prophet format + prophet_df = pd.DataFrame({ + 'ds': pd.to_datetime(context_df['timestamp']), + 'y': context_df['target'] + }) + + # Fit model + if not self._fitted: + self.model.fit(prophet_df) + self._fitted = True + + # Make forecast + future = self.model.make_future_dataframe(periods=prediction_length) + forecast = self.model.predict(future) + + # Convert to expected format + result_df = pd.DataFrame({ + 'id': context_df['id'].iloc[0], + 'timestamp': forecast['ds'].tail(prediction_length), + 'predictions': forecast['yhat'].tail(prediction_length) + }) + + # Add quantiles + for q in quantile_levels: + col_name = f"{q:.3g}" + # Calculate quantile from Prophet's interval + result_df[col_name] = forecast[f'yhat_lower'].tail(prediction_length) + + return result_df + + def get_model_info(self) -> Dict[str, Any]: + return { + "type": "Prophet", + "provider": "Facebook", + "fitted": self._fitted + } +``` + +**Step 2: Register in Factory** + +Edit `app/infrastructure/ml/model_factory.py`: +```python +from app.infrastructure.ml.prophet_model import ProphetModel + +class ModelFactory: + _models = { + "chronos2": ChronosModel, + "prophet": ProphetModel, # Add this line + } +``` + +**Step 3: Add Tests** + +Create `tests/unit/test_prophet_model.py`: +```python +import pytest +import pandas as pd +from app.infrastructure.ml.prophet_model import ProphetModel + +def test_prophet_model_initialization(): + """Test Prophet model creation""" + model = ProphetModel() + assert isinstance(model, ProphetModel) + assert model._fitted is False + +def test_prophet_predict(): + """Test Prophet prediction""" + model = ProphetModel() + + context_df = pd.DataFrame({ + 'id': ['series_0'] * 10, + 'timestamp': pd.date_range('2025-01-01', periods=10), + 'target': [100, 102, 105, 103, 108, 112, 115, 118, 120, 122] + }) + + result = model.predict( + context_df=context_df, + prediction_length=3, + quantile_levels=[0.1, 0.5, 0.9] + ) + + assert len(result) == 3 + assert 'predictions' in result.columns +``` + +**Step 4: Update Documentation** + +Edit `docs/API.md`: +```markdown +### Available Models + +- **chronos2**: Amazon Chronos-2 (default) +- **prophet**: Facebook Prophet (new!) + +```python +# Use Prophet model +model = ModelFactory.create("prophet") +service = ForecastService(model=model) +``` +``` + +**Step 5: Run Tests** +```bash +pytest tests/unit/test_prophet_model.py -v +pytest tests/ -v # All tests +``` + +--- + +### Example: Add New API Endpoint + +**Step 1: Create Use Case** + +Create `app/application/use_cases/evaluate_use_case.py`: +```python +from app.domain.services.forecast_service import ForecastService +from app.application.dtos.evaluate_dtos import EvaluateInputDTO, EvaluateOutputDTO + +class EvaluateUseCase: + """Evaluate model on multiple test sets""" + + def __init__(self, forecast_service: ForecastService): + self.forecast_service = forecast_service + + def execute(self, input_dto: EvaluateInputDTO) -> EvaluateOutputDTO: + # Implement evaluation logic + ... + return EvaluateOutputDTO(...) +``` + +**Step 2: Create Route** + +Create `app/api/routes/evaluate.py`: +```python +from fastapi import APIRouter, Depends +from app.api.dependencies import get_evaluate_use_case +from app.application.use_cases.evaluate_use_case import EvaluateUseCase +from app.application.dtos.evaluate_dtos import EvaluateInputDTO, EvaluateOutputDTO + +router = APIRouter(prefix="/evaluate", tags=["Evaluate"]) + +@router.post("/", response_model=EvaluateOutputDTO) +async def evaluate_model( + request: EvaluateInputDTO, + use_case: EvaluateUseCase = Depends(get_evaluate_use_case) +): + """Evaluate model performance""" + return use_case.execute(request) +``` + +**Step 3: Register Route** + +Edit `app/main_v3.py`: +```python +from app.api.routes import evaluate + +app.include_router(evaluate.router) +``` + +**Step 4: Add Tests** + +Create `tests/integration/test_evaluate_endpoint.py`: +```python +from fastapi.testclient import TestClient +from app.main_v3 import app + +client = TestClient(app) + +def test_evaluate_endpoint(): + response = client.post("/evaluate/", json={ + "test_sets": [...] + }) + assert response.status_code == 200 +``` + +--- + +## 🧪 Testing + +### Running Tests + +```bash +# All tests +pytest tests/ -v + +# Specific test file +pytest tests/unit/test_forecast_service.py -v + +# Specific test +pytest tests/unit/test_forecast_service.py::test_forecast_univariate_success -v + +# With coverage +pytest tests/ --cov=app --cov-report=html + +# Only unit tests +pytest tests/ -m unit + +# Only integration tests +pytest tests/ -m integration + +# Watch mode (requires pytest-watch) +ptw tests/ +``` + +### Writing Tests + +**Unit Test Template**: +```python +import pytest +from app.domain.services.forecast_service import ForecastService + +@pytest.mark.unit +class TestForecastService: + """Test suite for ForecastService""" + + def test_forecast_success(self, mock_model, mock_transformer): + """Test successful forecast""" + # Arrange + service = ForecastService(mock_model, mock_transformer) + series = TimeSeries(values=[100, 102, 105]) + config = ForecastConfig(prediction_length=3) + + # Act + result = service.forecast_univariate(series, config) + + # Assert + assert len(result.timestamps) == 3 + mock_model.predict.assert_called_once() + + def test_forecast_with_invalid_data(self): + """Test error handling""" + # Arrange + service = ForecastService(...) + + # Act & Assert + with pytest.raises(ValueError, match="Invalid"): + service.forecast_univariate(...) +``` + +**Integration Test Template**: +```python +from fastapi.testclient import TestClient +from app.main_v3 import app + +client = TestClient(app) + +@pytest.mark.integration +def test_forecast_endpoint(): + """Test forecast endpoint E2E""" + # Arrange + payload = { + "values": [100, 102, 105], + "prediction_length": 3 + } + + # Act + response = client.post("/forecast/univariate", json=payload) + + # Assert + assert response.status_code == 200 + data = response.json() + assert "median" in data + assert len(data["median"]) == 3 +``` + +--- + +## 🎨 Code Style + +### Formatting + +**Use Black** (line length 88): +```bash +black app/ tests/ +``` + +**Use isort** (import sorting): +```bash +isort app/ tests/ +``` + +### Linting + +**Flake8**: +```bash +flake8 app/ tests/ --max-line-length=88 +``` + +**MyPy** (type checking): +```bash +mypy app/ +``` + +### Style Guidelines + +**1. Type Hints**: +```python +# ✅ Good +def forecast(values: List[float], length: int) -> Dict[str, List[float]]: + ... + +# ❌ Bad +def forecast(values, length): + ... +``` + +**2. Docstrings**: +```python +def forecast_univariate(self, series: TimeSeries, config: ForecastConfig) -> ForecastResult: + """ + Generate forecast for univariate time series. + + Args: + series: Time series to forecast + config: Forecast configuration + + Returns: + Forecast result with predictions + + Raises: + ValueError: If series is invalid + """ + ... +``` + +**3. Naming Conventions**: +```python +# Classes: PascalCase +class ForecastService: + ... + +# Functions/methods: snake_case +def forecast_univariate(): + ... + +# Constants: UPPER_SNAKE_CASE +MAX_PREDICTION_LENGTH = 365 + +# Private methods: _leading_underscore +def _validate_input(): + ... +``` + +--- + +## 🐛 Debugging + +### Debug Server + +**Run with debugger**: +```bash +# With pdb +python -m pdb -m uvicorn app.main_v3:app --reload + +# With ipdb (better interface) +pip install ipdb +python -m ipdb -m uvicorn app.main_v3:app --reload +``` + +**Add breakpoint in code**: +```python +def forecast_univariate(self, series, config): + import ipdb; ipdb.set_trace() # Debugger stops here + ... +``` + +### Logging + +**Add logging**: +```python +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) + +def forecast_univariate(self, series, config): + logger.info(f"Forecasting series with {len(series.values)} points") + logger.debug(f"Config: {config}") + + try: + result = self._do_forecast(series, config) + logger.info("Forecast successful") + return result + except Exception as e: + logger.error(f"Forecast failed: {e}", exc_info=True) + raise +``` + +**View logs**: +```bash +# Set log level in .env +LOG_LEVEL=DEBUG + +# Or environment variable +LOG_LEVEL=DEBUG python -m uvicorn app.main_v3:app --reload +``` + +### Testing in Isolation + +**Test single component**: +```python +# test_debug.py +from app.domain.services.forecast_service import ForecastService + +# Create mocks +model = MockModel() +transformer = MockTransformer() + +# Test service +service = ForecastService(model, transformer) +result = service.forecast_univariate(...) + +print(result) +``` + +--- + +## 🤝 Contributing + +### Pull Request Process + +1. **Fork & Clone**: +```bash +gh repo fork yourusername/chronos2-server --clone +cd chronos2-server +``` + +2. **Create Branch**: +```bash +git checkout -b feature/my-feature +``` + +3. **Make Changes**: +```bash +# Edit files +# Add tests +# Update docs +``` + +4. **Run Tests**: +```bash +pytest tests/ -v +black app/ tests/ +flake8 app/ tests/ +``` + +5. **Commit**: +```bash +git add . +git commit -m "feat: Add my feature" +``` + +6. **Push**: +```bash +git push origin feature/my-feature +``` + +7. **Create PR**: +```bash +gh pr create --title "Add my feature" --body "Description..." +``` + +### Code Review Checklist + +- [ ] Tests added/updated +- [ ] Documentation updated +- [ ] Code formatted (black, isort) +- [ ] Linting passes (flake8) +- [ ] Type hints added (mypy) +- [ ] Commit message follows convention +- [ ] PR description clear + +--- + +## 📚 Resources + +### Documentation + +- **Architecture**: `docs/ARCHITECTURE.md` +- **API**: `docs/API.md` +- **Interactive API**: http://localhost:8000/docs + +### External Resources + +- **FastAPI**: https://fastapi.tiangolo.com/ +- **Chronos**: https://github.com/amazon-science/chronos-forecasting +- **Pytest**: https://docs.pytest.org/ +- **Clean Architecture**: https://blog.cleancoder.com/uncle-bob/2012/08/13/the-clean-architecture.html + +### Getting Help + +- **GitHub Issues**: Report bugs, request features +- **Discussions**: Ask questions, share ideas +- **Email**: support@example.com + +--- + +## 🎓 Learning Path + +### For New Contributors + +**Week 1: Understanding** +1. Read `README.md` +2. Read `docs/ARCHITECTURE.md` +3. Run the project locally +4. Explore API at `/docs` + +**Week 2: Small Changes** +1. Fix a typo in docs +2. Add a test case +3. Improve error message + +**Week 3: Features** +1. Add new endpoint +2. Implement new model +3. Add new use case + +--- + +## 🔧 Troubleshooting + +### Common Issues + +**Issue**: `ModuleNotFoundError: No module named 'app'` +```bash +# Solution: Run from project root +cd /path/to/chronos2-server +python -m uvicorn app.main_v3:app --reload +``` + +**Issue**: `Model loading fails` +```bash +# Solution: Check internet connection, HuggingFace access +pip install --upgrade transformers +``` + +**Issue**: `Tests fail with import errors` +```bash +# Solution: Install test dependencies +pip install pytest pytest-cov pytest-mock +``` + +**Issue**: `Port 8000 already in use` +```bash +# Solution: Use different port +uvicorn app.main_v3:app --port 8001 +``` + +--- + +**Last Updated**: 2025-11-09 +**Version**: 3.0.0 +**Maintainer**: Claude AI diff --git a/docs/QUICK_START_REFACTORING.md b/docs/QUICK_START_REFACTORING.md new file mode 100644 index 0000000000000000000000000000000000000000..23a7d0e839d5187fcad8c46bff6b3b7e5425cb1b --- /dev/null +++ b/docs/QUICK_START_REFACTORING.md @@ -0,0 +1,408 @@ +# 🚀 Quick Start - Refactorización SOLID + +**Para**: Desarrollador que inicia la refactorización +**Tiempo estimado**: 15 minutos de setup inicial +**Objetivo**: Entender y comenzar la refactorización + +--- + +## 📖 Contexto Rápido + +### Estado Actual (v2.1.1) +- 1 archivo Python monolítico (680 líneas) +- 1 archivo JavaScript monolítico (1,105 líneas) +- 0% cobertura de tests +- 25% cumplimiento SOLID + +### Estado Objetivo (v3.0.0) +- 20+ archivos Python modulares (<150 líneas c/u) +- 6+ archivos JavaScript modulares +- 70% cobertura de tests +- 85% cumplimiento SOLID + +--- + +## 🎯 Principios Clave + +### SOLID en 1 Minuto + +``` +S - Single Responsibility → 1 clase = 1 trabajo +O - Open/Closed → Extender sin modificar +L - Liskov Substitution → Interfaces intercambiables +I - Interface Segregation → Interfaces pequeñas +D - Dependency Inversion → Depender de abstracciones +``` + +### Arquitectura en Capas + +``` +┌─────────────────┐ +│ API Layer │ ← Endpoints (FastAPI) +├─────────────────┤ +│ Domain Layer │ ← Lógica de negocio +├─────────────────┤ +│ Infrastructure │ ← Implementaciones (Chronos, DB) +└─────────────────┘ +``` + +--- + +## 🛠️ Setup Inicial + +### 1. Preparar Entorno + +```bash +# Navegar al proyecto +cd /var/home/joss/Proyectos/chronos2-server + +# Crear branch de refactorización +git checkout -b refactor/solid-architecture + +# Backup del código actual +cp app/main.py app/main_v2.1.1_backup.py +cp static/taskpane/taskpane.js static/taskpane/taskpane_v2.1.1_backup.js + +# Instalar dependencias de desarrollo +pip install pytest pytest-cov black flake8 mypy +``` + +### 2. Crear Estructura de Carpetas + +```bash +# Backend +mkdir -p app/{api/{routes,middleware},domain/{models,services,interfaces},infrastructure/{ml,config},schemas/{requests,responses},utils} + +# Frontend +mkdir -p static/taskpane/js/{services,excel,ui,features,config} + +# Tests +mkdir -p tests/{unit,integration,fixtures} + +# Docs +mkdir -p docs +``` + +### 3. Verificar Estructura + +```bash +tree -L 3 app/ +``` + +**Output esperado**: +``` +app/ +├── api/ +│ ├── routes/ +│ └── middleware/ +├── domain/ +│ ├── models/ +│ ├── services/ +│ └── interfaces/ +├── infrastructure/ +│ ├── ml/ +│ └── config/ +├── schemas/ +│ ├── requests/ +│ └── responses/ +└── utils/ +``` + +--- + +## 📝 Primera Tarea: Settings Centralizados + +### ¿Por qué empezar aquí? +- Fácil de implementar +- Sin dependencias +- Base para todo lo demás + +### Código a Crear + +**app/infrastructure/config/settings.py**: +```python +from pydantic_settings import BaseSettings + +class Settings(BaseSettings): + # API + api_title: str = "Chronos-2 Forecasting API" + api_version: str = "3.0.0" + + # Model + model_id: str = "amazon/chronos-2" + device_map: str = "cpu" + + # CORS + cors_origins: list = ["*"] + + class Config: + env_file = ".env" + +settings = Settings() +``` + +### Test Rápido + +```python +# En Python REPL +from app.infrastructure.config.settings import settings +print(settings.api_title) # "Chronos-2 Forecasting API" +print(settings.model_id) # "amazon/chronos-2" +``` + +--- + +## 📝 Segunda Tarea: Logger Centralizado + +**app/utils/logger.py**: +```python +import logging + +def setup_logger(name: str) -> logging.Logger: + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + + if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + + return logger +``` + +### Uso + +```python +from app.utils.logger import setup_logger + +logger = setup_logger(__name__) +logger.info("Test message") +logger.error("Error message") +``` + +--- + +## 📝 Tercera Tarea: Primera Interface + +**app/domain/interfaces/forecast_model.py**: +```python +from abc import ABC, abstractmethod +from typing import List, Dict, Any +import pandas as pd + +class IForecastModel(ABC): + """Interface para modelos de forecasting""" + + @abstractmethod + def predict( + self, + context_df: pd.DataFrame, + prediction_length: int, + quantile_levels: List[float], + **kwargs + ) -> pd.DataFrame: + pass + + @abstractmethod + def get_model_info(self) -> Dict[str, Any]: + pass +``` + +### ¿Por qué una interface? +- **Abstracción**: No depender de implementación específica +- **Testeable**: Fácil de mockear +- **Extensible**: Agregar modelos (Prophet, ARIMA) sin cambiar código + +--- + +## 🧪 Primer Test + +**tests/unit/test_settings.py**: +```python +import pytest +from app.infrastructure.config.settings import Settings + +def test_settings_defaults(): + settings = Settings() + assert settings.api_title == "Chronos-2 Forecasting API" + assert settings.api_version == "3.0.0" + assert settings.model_id == "amazon/chronos-2" + +def test_settings_from_env(monkeypatch): + monkeypatch.setenv("MODEL_ID", "amazon/chronos-t5-small") + settings = Settings() + assert settings.model_id == "amazon/chronos-t5-small" +``` + +### Correr Test + +```bash +pytest tests/unit/test_settings.py -v +``` + +**Output esperado**: +``` +tests/unit/test_settings.py::test_settings_defaults PASSED +tests/unit/test_settings.py::test_settings_from_env PASSED +``` + +--- + +## 🎯 Checklist de Hoy (2-3 horas) + +- [ ] Setup entorno (git branch, dependencias) +- [ ] Crear estructura de carpetas +- [ ] Implementar Settings +- [ ] Implementar Logger +- [ ] Crear primera Interface (IForecastModel) +- [ ] Escribir primeros tests +- [ ] Commit: "chore: setup SOLID architecture foundation" + +--- + +## 📚 Recursos de Referencia + +### Documentación del Proyecto +1. `ANALISIS_SOLID.md` - Problemas identificados +2. `PLAN_REFACTORIZACION.md` - Plan completo +3. `docs/QUICK_START_REFACTORING.md` - Este archivo + +### Lectura Recomendada +- [Clean Architecture - Robert Martin](https://blog.cleancoder.com/uncle-bob/2012/08/13/the-clean-architecture.html) +- [SOLID Principles - Python](https://realpython.com/solid-principles-python/) +- [FastAPI Best Practices](https://fastapi.tiangolo.com/tutorial/bigger-applications/) + +--- + +## 🚨 Problemas Comunes + +### "ModuleNotFoundError: No module named 'app.infrastructure'" + +**Solución**: Agregar `__init__.py` en cada carpeta: +```bash +touch app/__init__.py +touch app/infrastructure/__init__.py +touch app/infrastructure/config/__init__.py +# etc... +``` + +### "Settings no carga .env" + +**Solución**: Instalar `python-dotenv`: +```bash +pip install python-dotenv +``` + +### "Tests no encuentran módulos" + +**Solución**: Agregar `__init__.py` en `tests/`: +```bash +touch tests/__init__.py +touch tests/unit/__init__.py +``` + +--- + +## 💡 Tips de Productividad + +### 1. Usar VSCode Tasks + +Crear `.vscode/tasks.json`: +```json +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Run Tests", + "type": "shell", + "command": "pytest tests/ -v", + "group": "test" + }, + { + "label": "Type Check", + "type": "shell", + "command": "mypy app/", + "group": "test" + } + ] +} +``` + +### 2. Pre-commit Hooks + +```bash +# Instalar pre-commit +pip install pre-commit + +# Crear .pre-commit-config.yaml +cat > .pre-commit-config.yaml << EOF +repos: + - repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + - repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 +EOF + +# Instalar hooks +pre-commit install +``` + +### 3. Snippets para Interfaces + +VSCode snippet para crear interfaces rápidamente: +```json +{ + "Python Interface": { + "prefix": "interface", + "body": [ + "from abc import ABC, abstractmethod", + "", + "class I${1:Name}(ABC):", + " \"\"\"${2:Description}\"\"\"", + " ", + " @abstractmethod", + " def ${3:method}(self, ${4:params}):", + " pass" + ] + } +} +``` + +--- + +## 📞 Siguiente Paso + +Una vez completado el checklist de hoy: + +1. **Leer**: `PLAN_REFACTORIZACION.md` - Fase 2 +2. **Implementar**: Modelos de Dominio (TimeSeries, ForecastConfig) +3. **Commit**: Código con mensaje descriptivo +4. **Review**: Auto-review del código antes de continuar + +--- + +## 🎉 ¡Felicitaciones! + +Has completado el setup inicial. Ahora tienes: +- ✅ Estructura de carpetas organizada +- ✅ Configuración centralizada +- ✅ Logger consistente +- ✅ Primera interface (abstracción) +- ✅ Primeros tests + +**Tiempo invertido**: 2-3 horas +**Progreso**: ~5% de la refactorización +**Próxima sesión**: Modelos de Dominio (4-5 horas) + +--- + +**Actualizado**: 2025-11-09 +**Versión**: 1.0 +**Autor**: Claude AI diff --git a/docs/README_REFACTORING.md b/docs/README_REFACTORING.md new file mode 100644 index 0000000000000000000000000000000000000000..952601131c15b1915d5a11b7139050e217101896 --- /dev/null +++ b/docs/README_REFACTORING.md @@ -0,0 +1,420 @@ +# 📚 Documentación de Refactorización SOLID + +**Versión**: 3.0.0 +**Estado**: En Planificación +**Fecha**: 2025-11-09 + +--- + +## 📖 Introducción + +Este directorio contiene la documentación completa para la refactorización del proyecto Chronos2 Server aplicando principios SOLID y Clean Architecture. + +--- + +## 🗂️ Índice de Documentos + +### 1. Análisis y Planificación + +#### **ANALISIS_SOLID.md** (Raíz del proyecto) +**¿Qué contiene?** +- Análisis completo del código actual +- Violaciones de principios SOLID identificadas +- Métricas de calidad de código +- Problemas específicos por archivo +- Deuda técnica cuantificada + +**¿Cuándo leerlo?** +- Antes de iniciar la refactorización +- Para entender el "por qué" del cambio +- Al justificar decisiones de diseño + +**Tiempo de lectura**: 20 minutos + +--- + +#### **PLAN_REFACTORIZACION.md** (Raíz del proyecto) +**¿Qué contiene?** +- Nueva arquitectura propuesta (Clean Architecture) +- Estructura de carpetas detallada +- Código de ejemplo para cada capa +- Plan de implementación por fases +- Cronograma de trabajo +- Estrategia de migración + +**¿Cuándo leerlo?** +- Al planificar el trabajo +- Como referencia durante implementación +- Para entender la arquitectura objetivo + +**Tiempo de lectura**: 45 minutos + +--- + +### 2. Guías Prácticas + +#### **QUICK_START_REFACTORING.md** (Este directorio) +**¿Qué contiene?** +- Setup inicial paso a paso +- Primeras tareas a implementar +- Tests básicos +- Problemas comunes y soluciones +- Tips de productividad + +**¿Cuándo leerlo?** +- PRIMER DÍA de refactorización +- Como referencia rápida +- Cuando tengas dudas de setup + +**Tiempo de lectura**: 15 minutos + +--- + +### 3. Documentación Técnica (A crear durante refactorización) + +#### **ARCHITECTURE.md** (Pendiente) +**Contendrá**: +- Diagrama de arquitectura detallado +- Explicación de cada capa +- Flujo de datos +- Patrones de diseño utilizados +- Decisiones arquitectónicas (ADRs) + +**Cuándo crearlo**: Durante Fase 2 + +--- + +#### **API.md** (Pendiente) +**Contendrá**: +- Documentación de API interna +- Interfaces y contratos +- Ejemplos de uso +- Guía de extensión + +**Cuándo crearlo**: Durante Fase 4 + +--- + +#### **DEVELOPMENT.md** (Pendiente) +**Contendrá**: +- Guía para contribuidores +- Estándares de código +- Proceso de testing +- CI/CD pipeline +- Guía de debugging + +**Cuándo crearlo**: Durante Fase 6 + +--- + +## 🎯 Flujo de Lectura Recomendado + +### Para Desarrollador Nuevo en el Proyecto + +``` +1. README.md (raíz) → 10 min - Contexto general +2. ANALISIS_SOLID.md → 20 min - Entender problemas +3. PLAN_REFACTORIZACION.md → 45 min - Ver solución +4. QUICK_START_REFACTORING.md → 15 min - Comenzar a trabajar + +Total: ~90 minutos +``` + +### Para Desarrollador Experimentado + +``` +1. ANALISIS_SOLID.md → 15 min - Scan rápido +2. PLAN_REFACTORIZACION.md → 30 min - Focus en arquitectura +3. Directo a código → Comenzar implementación + +Total: ~45 minutos +``` + +### Para Reviewer/Arquitecto + +``` +1. ANALISIS_SOLID.md → 20 min - Problemas identificados +2. PLAN_REFACTORIZACION.md → 45 min - Solución propuesta +3. ARCHITECTURE.md (futuro) → 20 min - Decisiones arquitectónicas + +Total: ~85 minutos +``` + +--- + +## 📊 Matriz de Documentos + +| Documento | Audiencia | Cuándo | Propósito | Tiempo | +|-----------|-----------|--------|-----------|--------| +| README.md (raíz) | Todos | Al inicio | Contexto general | 10 min | +| ANALISIS_SOLID.md | Dev, Arquitecto | Antes de refactor | Entender problemas | 20 min | +| PLAN_REFACTORIZACION.md | Dev, Arquitecto | Planificación | Ver solución | 45 min | +| QUICK_START.md | Dev | Día 1 | Setup inicial | 15 min | +| ARCHITECTURE.md | Arquitecto, Dev Senior | Durante refactor | Decisiones técnicas | 20 min | +| API.md | Dev | Durante desarrollo | Referencia de API | 15 min | +| DEVELOPMENT.md | Contribuidores | Antes de PR | Estándares | 20 min | + +--- + +## 🏗️ Estructura de Código (Objetivo) + +``` +chronos2-server/ +├── docs/ ← ESTÁS AQUÍ +│ ├── README_REFACTORING.md ✅ Este archivo +│ ├── QUICK_START_REFACTORING.md ✅ Creado +│ ├── ARCHITECTURE.md ⏳ Pendiente (Fase 2) +│ ├── API.md ⏳ Pendiente (Fase 4) +│ └── DEVELOPMENT.md ⏳ Pendiente (Fase 6) +│ +├── ANALISIS_SOLID.md ✅ Creado +├── PLAN_REFACTORIZACION.md ✅ Creado +├── README.md ⚠️ Actualizar después +│ +├── app/ ⏳ A refactorizar +│ ├── api/ ⏳ Fase 4 +│ ├── domain/ ⏳ Fase 2 +│ ├── infrastructure/ ⏳ Fase 3 +│ ├── schemas/ ⏳ Fase 4 +│ └── utils/ ⏳ Fase 1 +│ +├── tests/ ⏳ Fase 6 +│ ├── unit/ +│ ├── integration/ +│ └── fixtures/ +│ +└── static/ ⏳ Fase 5 + └── taskpane/js/ +``` + +--- + +## 🎓 Conceptos Clave + +### Clean Architecture en 5 Minutos + +``` +Capa Externa (Infrastructure) + ↓ depende de +Capa Intermedia (Application/API) + ↓ depende de +Capa Interna (Domain) + ↑ NO depende de nada +``` + +**Regla de Oro**: Las dependencias apuntan HACIA ADENTRO. + +### SOLID en el Código + +#### ❌ ANTES (Violación SRP) +```python +# main.py hace TODO: +class MainApp: + def load_model(self): ... # Infraestructura + def validate_data(self): ... # Validación + def forecast(self): ... # Lógica de negocio + def format_response(self): ... # Presentación +``` + +#### ✅ DESPUÉS (Cumple SRP) +```python +# Cada clase UNA responsabilidad: +class ChronosModel: # Infraestructura + def load(self): ... + +class DataValidator: # Validación + def validate(self): ... + +class ForecastService: # Lógica de negocio + def forecast(self): ... + +class ForecastSerializer: # Presentación + def format(self): ... +``` + +--- + +## 🔧 Tools y Setup + +### IDEs Recomendados + +**VSCode**: +```bash +# Extensiones recomendadas: +- Python (Microsoft) +- Pylance +- Python Test Explorer +- Python Docstring Generator +- GitLens +``` + +**PyCharm**: +- Soporte nativo para Python +- Refactoring tools integrados +- Test runner visual + +### Linting y Formatting + +```bash +# Instalar herramientas +pip install black flake8 mypy isort + +# Formatear código +black app/ + +# Linting +flake8 app/ + +# Type checking +mypy app/ + +# Sort imports +isort app/ +``` + +### Testing + +```bash +# Instalar pytest +pip install pytest pytest-cov pytest-mock + +# Correr tests +pytest tests/ + +# Con coverage +pytest tests/ --cov=app --cov-report=html + +# Ver coverage +open htmlcov/index.html +``` + +--- + +## 📈 Métricas de Progreso + +### Checklist General + +- [ ] **Fase 1**: Infraestructura Base (6h) + - [ ] Settings centralizados + - [ ] Logger + - [ ] Estructura de carpetas + +- [ ] **Fase 2**: Domain Layer (8h) + - [ ] Interfaces + - [ ] Modelos de dominio + - [ ] Servicios + +- [ ] **Fase 3**: Infrastructure (6h) + - [ ] ChronosModel + - [ ] DataTransformer + - [ ] ModelFactory + +- [ ] **Fase 4**: API Layer (8h) + - [ ] Schemas + - [ ] Dependency Injection + - [ ] Routes + +- [ ] **Fase 5**: Frontend (8h) + - [ ] API Client + - [ ] Excel services + - [ ] Feature modules + +- [ ] **Fase 6**: Tests (10h) + - [ ] Unit tests (>60%) + - [ ] Integration tests + +- [ ] **Fase 7**: Documentación (4h) + - [ ] ARCHITECTURE.md + - [ ] API.md + - [ ] DEVELOPMENT.md + +**Total**: 50 horas (~6-7 semanas part-time) + +--- + +## 🚀 Quick Links + +### Documentación Externa + +- [SOLID Principles](https://en.wikipedia.org/wiki/SOLID) +- [Clean Architecture](https://blog.cleancoder.com/uncle-bob/2012/08/13/the-clean-architecture.html) +- [FastAPI Bigger Apps](https://fastapi.tiangolo.com/tutorial/bigger-applications/) +- [Python Testing](https://docs.pytest.org/) +- [Pydantic Settings](https://docs.pydantic.dev/latest/concepts/pydantic_settings/) + +### Código Actual + +- [HuggingFace Space](https://huggingface.co/spaces/ttzzs/chronos2-excel-forecasting-api) +- [API Docs](https://ttzzs-chronos2-excel-forecasting-api.hf.space/docs) + +--- + +## 💬 FAQ + +### ¿Por qué refactorizar si el código funciona? + +**Respuesta**: +- Mantenibilidad: Código actual difícil de entender +- Escalabilidad: Imposible agregar features sin romper todo +- Testabilidad: 0% de cobertura = bugs ocultos +- Profesionalismo: Código de calidad = empresa seria + +### ¿Cuánto tiempo tomará? + +**Respuesta**: +- **Full-time**: 1-2 semanas +- **Part-time (2h/día)**: 6-7 semanas +- **Weekends only**: 8-10 semanas + +### ¿Podemos hacer esto de forma incremental? + +**Respuesta**: SÍ (recomendado) +- Usar **Strangler Pattern** +- Migrar endpoint por endpoint +- Mantener versión actual funcionando +- Deprecar gradualmente + +### ¿Qué pasa si encontramos problemas? + +**Respuesta**: +- Git branch permite rollback fácil +- Tests aseguran que no rompemos funcionalidad +- Documentación detallada ayuda a debuggear + +--- + +## 📞 Soporte + +### Durante Refactorización + +Si encuentras problemas: + +1. **Revisar**: `QUICK_START_REFACTORING.md` - Problemas comunes +2. **Debuggear**: Usar logger para trace +3. **Tests**: Escribir test que reproduzca el problema +4. **Documentar**: Agregar solución a docs + +### Contacto + +- **Issues**: Crear issue en GitHub (si aplica) +- **Docs**: Esta documentación +- **Code**: Comentarios en el código + +--- + +## ✅ Siguiente Paso + +**Para comenzar**: + +1. Leer `ANALISIS_SOLID.md` (20 min) +2. Leer `PLAN_REFACTORIZACION.md` (45 min) +3. Seguir `QUICK_START_REFACTORING.md` (2-3 horas implementación) + +**¡Éxito en la refactorización!** 🚀 + +--- + +**Versión**: 1.0 +**Actualizado**: 2025-11-09 +**Autor**: Claude AI +**Licencia**: MIT diff --git a/static/taskpane/taskpane_v2.1.1_backup.js b/static/taskpane/taskpane_v2.1.1_backup.js new file mode 100644 index 0000000000000000000000000000000000000000..edf70c99733f867012f966366a0fbd32ae450a5e --- /dev/null +++ b/static/taskpane/taskpane_v2.1.1_backup.js @@ -0,0 +1,1104 @@ +/* global Office, Excel, console */ + +// ==================================================================== +// CHRONOS2 FORECASTING ADD-IN +// Office.js Task Pane Implementation +// ==================================================================== + +// URL del API en HuggingFace Spaces +const API_BASE_URL = 'https://ttzzs-chronos2-excel-forecasting-api.hf.space'; + +// Para desarrollo local, descomenta la siguiente línea: +// const API_BASE_URL = 'https://localhost:8000'; + +// Inicializar cuando Office esté listo +Office.onReady((info) => { + if (info.host === Office.HostType.Excel) { + console.log('Chronos2 Add-in loaded successfully'); + checkServerStatus(); + + // Auto-check cada 30 segundos + setInterval(checkServerStatus, 30000); + } +}); + +// ==================================================================== +// UTILIDADES +// ==================================================================== + +function log(message, type = 'info') { + const resultsDiv = document.getElementById('results'); + const timestamp = new Date().toLocaleTimeString(); + const icon = type === 'success' ? '✅' : type === 'error' ? '❌' : 'ℹ️'; + + const entry = document.createElement('div'); + entry.className = `log-entry log-${type}`; + entry.innerHTML = `${timestamp} ${icon} ${message}`; + + resultsDiv.insertBefore(entry, resultsDiv.firstChild); + + // Limitar a 20 entries + while (resultsDiv.children.length > 20) { + resultsDiv.removeChild(resultsDiv.lastChild); + } +} + +async function checkServerStatus() { + try { + const response = await fetch(`${API_BASE_URL}/health`, { + method: 'GET', + headers: { 'Content-Type': 'application/json' } + }); + + const data = await response.json(); + + if (response.ok) { + updateServerStatus(true, `Connected - ${data.model_id}`); + } else { + updateServerStatus(false, 'Server error'); + } + } catch (error) { + updateServerStatus(false, 'Server offline'); + } +} + +function updateServerStatus(isOnline, message) { + const statusEl = document.getElementById('serverStatus'); + const textEl = document.getElementById('statusText'); + + statusEl.className = `status-indicator ${isOnline ? 'online' : 'offline'}`; + textEl.textContent = message; +} + +function showTab(tabName) { + // Ocultar todos los tabs + const tabs = document.querySelectorAll('.tab-content'); + tabs.forEach(tab => tab.classList.remove('active')); + + const buttons = document.querySelectorAll('.tab'); + buttons.forEach(btn => btn.classList.remove('active')); + + // Mostrar el tab seleccionado + document.getElementById(`tab-${tabName}`).classList.add('active'); + event.target.classList.add('active'); +} + +// ==================================================================== +// FUNCIONES DE EXCEL (Office.js) +// ==================================================================== + +async function getSelectedRange() { + return Excel.run(async (context) => { + const range = context.workbook.getSelectedRange(); + range.load('values, address'); + await context.sync(); + + return { + values: range.values, + address: range.address + }; + }); +} + +async function writeToRange(data, startCell) { + return Excel.run(async (context) => { + try { + console.log(`[writeToRange] Writing ${data?.length || 0} rows to ${startCell}`); + console.log('[writeToRange] Data:', JSON.stringify(data).substring(0, 200)); + + if (!data || data.length === 0) { + throw new Error('No data to write'); + } + + if (!data[0] || data[0].length === 0) { + throw new Error('Invalid data structure: empty first row'); + } + + const sheet = context.workbook.worksheets.getActiveWorksheet(); + const numRows = data.length; + const numCols = data[0].length; + + console.log(`[writeToRange] Creating range: ${numRows} rows x ${numCols} cols from ${startCell}`); + + const range = sheet.getRange(startCell).getResizedRange(numRows - 1, numCols - 1); + + range.values = data; + range.format.autofitColumns(); + + await context.sync(); + + console.log('[writeToRange] ✅ Data written successfully'); + } catch (error) { + console.error('[writeToRange] ❌ Error:', error); + console.error('[writeToRange] Stack:', error.stack); + throw error; + } + }); +} + +async function writeForecastResults(timestamps, median, q10, q90, startRow) { + return Excel.run(async (context) => { + try { + console.log('[writeForecastResults] Starting...'); + console.log(`[writeForecastResults] timestamps: ${timestamps?.length || 0} items`); + console.log(`[writeForecastResults] median: ${median?.length || 0} items`); + console.log(`[writeForecastResults] q10: ${q10?.length || 0} items`); + console.log(`[writeForecastResults] q90: ${q90?.length || 0} items`); + console.log(`[writeForecastResults] startRow: ${startRow}`); + + // VALIDACIÓN + if (!timestamps || !median) { + throw new Error('Invalid data: timestamps or median is undefined'); + } + + if (timestamps.length === 0) { + throw new Error('No forecast data received (empty timestamps)'); + } + + if (timestamps.length !== median.length) { + throw new Error(`Data mismatch: ${timestamps.length} timestamps vs ${median.length} median values`); + } + + const sheet = context.workbook.worksheets.getActiveWorksheet(); + + // Preparar datos + const data = []; + data.push(['Timestamp', 'Median', 'Q10', 'Q90']); // Headers + + for (let i = 0; i < timestamps.length; i++) { + data.push([ + timestamps[i], + median[i], + q10 ? q10[i] : '', + q90 ? q90[i] : '' + ]); + } + + console.log(`[writeForecastResults] Prepared ${data.length} rows (including header)`); + + // Escribir en columnas D-G a partir de la fila especificada + const startCell = `D${startRow}`; + console.log(`[writeForecastResults] Writing to ${startCell}`); + + await writeToRange(data, startCell); + + // Aplicar formato + const headerRange = sheet.getRange(`D${startRow}:G${startRow}`); + headerRange.format.font.bold = true; + headerRange.format.fill.color = '#4472C4'; + headerRange.format.font.color = 'white'; + + await context.sync(); + + console.log('[writeForecastResults] ✅ Forecast results written successfully'); + } catch (error) { + console.error('[writeForecastResults] ❌ Error:', error); + console.error('[writeForecastResults] Stack:', error.stack); + throw error; + } + }); +} + +// ==================================================================== +// FUNCIÓN 1: PRONÓSTICO UNIVARIANTE +// ==================================================================== + +async function forecastUnivariate() { + log('Starting univariate forecast...'); + + try { + // Leer rango seleccionado + const selection = await getSelectedRange(); + const values = selection.values.flat().filter(v => v !== '' && !isNaN(v)); + + if (values.length < 3) { + log('Error: Select at least 3 data points', 'error'); + return; + } + + log(`Selected ${values.length} data points from ${selection.address}`); + + // Obtener parámetros + const predictionLength = parseInt(document.getElementById('predictionLength').value); + const frequency = document.getElementById('frequency').value; + + // Construir request + const requestBody = { + prediction_length: predictionLength, + series: { values: values }, + start_timestamp: new Date().toISOString().split('T')[0], + freq: frequency, + quantile_levels: [0.1, 0.5, 0.9] + }; + + log('Sending request to API...'); + + // Llamar a la API + const response = await fetch(`${API_BASE_URL}/forecast_univariate`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody) + }); + + if (!response.ok) { + throw new Error(`API error: ${response.statusText}`); + } + + const data = await response.json(); + + log(`Received forecast for ${data.timestamps.length} periods`, 'success'); + + // Escribir resultados + await Excel.run(async (context) => { + const selection = context.workbook.getSelectedRange(); + selection.load('rowIndex, rowCount'); + await context.sync(); + + const startRow = selection.rowIndex + selection.rowCount + 2; + + await writeForecastResults( + data.timestamps, + data.median, + data.quantiles['0.1'], + data.quantiles['0.9'], + startRow + ); + }); + + log('✨ Forecast written to spreadsheet', 'success'); + + } catch (error) { + log(`Error: ${error.message}`, 'error'); + console.error(error); + } +} + +// ==================================================================== +// FUNCIÓN 2: DETECCIÓN DE ANOMALÍAS +// ==================================================================== + +async function detectAnomalies() { + log('Starting anomaly detection...'); + + try { + const selection = await getSelectedRange(); + const values = selection.values.flat().filter(v => v !== '' && !isNaN(v)); + + const contextLength = parseInt(document.getElementById('contextLength').value); + const recentPoints = parseInt(document.getElementById('recentPoints').value); + + if (values.length < contextLength + recentPoints) { + log(`Error: Need at least ${contextLength + recentPoints} points`, 'error'); + return; + } + + const context = values.slice(0, contextLength); + const recent = values.slice(contextLength, contextLength + recentPoints); + + const requestBody = { + context: { values: context }, + recent_observed: recent, + prediction_length: recentPoints, + quantile_low: 0.05, + quantile_high: 0.95 + }; + + log('Analyzing data...'); + + const response = await fetch(`${API_BASE_URL}/detect_anomalies`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody) + }); + + if (!response.ok) { + throw new Error(`API error: ${response.statusText}`); + } + + const data = await response.json(); + const anomalyCount = data.anomalies.filter(a => a.is_anomaly).length; + + if (anomalyCount > 0) { + log(`⚠️ Found ${anomalyCount} anomalies!`, 'error'); + } else { + log('No anomalies detected ✓', 'success'); + } + + // Escribir resultados en Excel + await Excel.run(async (context) => { + const selection = context.workbook.getSelectedRange(); + selection.load('rowIndex, rowCount'); + await context.sync(); + + const startRow = selection.rowIndex + selection.rowCount + 2; + const sheet = context.workbook.worksheets.getActiveWorksheet(); + + // Preparar datos + const tableData = [['Index', 'Value', 'Expected', 'Lower', 'Upper', 'Is Anomaly']]; + + data.anomalies.forEach(a => { + tableData.push([ + a.index, + parseFloat(a.value.toFixed(2)), + parseFloat(a.predicted_median.toFixed(2)), + parseFloat(a.lower.toFixed(2)), + parseFloat(a.upper.toFixed(2)), + a.is_anomaly ? 'YES' : 'No' + ]); + }); + + const range = sheet.getRangeByIndexes(startRow, 0, tableData.length, 6); + range.values = tableData; + range.format.autofitColumns(); + + // Format header + const headerRange = sheet.getRangeByIndexes(startRow, 0, 1, 6); + headerRange.format.font.bold = true; + headerRange.format.fill.color = '#4472C4'; + headerRange.format.font.color = 'white'; + + // Highlight anomalies + for (let i = 0; i < data.anomalies.length; i++) { + if (data.anomalies[i].is_anomaly) { + const anomalyRange = sheet.getRangeByIndexes(startRow + i + 1, 0, 1, 6); + anomalyRange.format.fill.color = '#FFC7CE'; + } + } + + await context.sync(); + }); + + log('✨ Anomaly results written to spreadsheet', 'success'); + + } catch (error) { + log(`Error: ${error.message}`, 'error'); + console.error(error); + } +} + +// ==================================================================== +// FUNCIÓN 3: BACKTEST +// ==================================================================== + +async function runBacktest() { + log('Running backtest...'); + + try { + const selection = await getSelectedRange(); + const values = selection.values.flat().filter(v => v !== '' && !isNaN(v)); + + const testLength = parseInt(document.getElementById('testLength').value); + + if (values.length <= testLength) { + log('Error: Series must be longer than test length', 'error'); + return; + } + + const requestBody = { + series: { values: values }, + prediction_length: testLength, + test_length: testLength + }; + + log('Evaluating model...'); + + const response = await fetch(`${API_BASE_URL}/backtest_simple`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody) + }); + + if (!response.ok) { + throw new Error(`API error: ${response.statusText}`); + } + + const data = await response.json(); + const metrics = data.metrics; + + log(`📊 Backtest Results: MAE=${metrics.mae.toFixed(2)}, MAPE=${metrics.mape.toFixed(2)}%`, 'success'); + + // Escribir resultados en Excel + await Excel.run(async (context) => { + const selection = context.workbook.getSelectedRange(); + selection.load('rowIndex, rowCount'); + await context.sync(); + + const startRow = selection.rowIndex + selection.rowCount + 2; + const sheet = context.workbook.worksheets.getActiveWorksheet(); + + // Tabla de métricas + const metricsData = [ + ['Metric', 'Value'], + ['MAE', parseFloat(metrics.mae.toFixed(2))], + ['MAPE', metrics.mape.toFixed(2) + '%'], + ['RMSE', parseFloat(metrics.rmse.toFixed(2))], + ['WQL', parseFloat(metrics.wql.toFixed(3))] + ]; + + const metricsRange = sheet.getRangeByIndexes(startRow, 0, metricsData.length, 2); + metricsRange.values = metricsData; + metricsRange.format.autofitColumns(); + + // Format header + const headerRange = sheet.getRangeByIndexes(startRow, 0, 1, 2); + headerRange.format.font.bold = true; + headerRange.format.fill.color = '#70AD47'; + headerRange.format.font.color = 'white'; + + // Forecast vs Actuals si están disponibles + if (data.forecast_median && data.actuals) { + const forecastData = [['Timestamp', 'Forecast', 'Actual', 'Error']]; + + for (let i = 0; i < data.forecast_median.length; i++) { + const error = Math.abs(data.forecast_median[i] - data.actuals[i]); + forecastData.push([ + data.forecast_timestamps[i] || `t+${i+1}`, + parseFloat(data.forecast_median[i].toFixed(2)), + parseFloat(data.actuals[i].toFixed(2)), + parseFloat(error.toFixed(2)) + ]); + } + + const forecastRange = sheet.getRangeByIndexes( + startRow + metricsData.length + 2, + 0, + forecastData.length, + 4 + ); + forecastRange.values = forecastData; + forecastRange.format.autofitColumns(); + + const forecastHeaderRange = sheet.getRangeByIndexes( + startRow + metricsData.length + 2, + 0, + 1, + 4 + ); + forecastHeaderRange.format.font.bold = true; + forecastHeaderRange.format.fill.color = '#4472C4'; + forecastHeaderRange.format.font.color = 'white'; + } + + await context.sync(); + }); + + log('✨ Backtest results written to spreadsheet', 'success'); + + } catch (error) { + log(`Error: ${error.message}`, 'error'); + console.error(error); + } +} + +// ==================================================================== +// FUNCIÓN 4: MULTI-SERIES +// ==================================================================== + +async function forecastMultiSeries() { + log('Starting multi-series forecast...'); + + try { + const selection = await getSelectedRange(); + const data = selection.values; + + // Agrupar por series_id (columna A) + const seriesMap = {}; + + for (let i = 1; i < data.length; i++) { // Skip header + const seriesId = data[i][0]; + const value = data[i][2]; // Columna C + + if (seriesId && value !== '' && !isNaN(value)) { + if (!seriesMap[seriesId]) { + seriesMap[seriesId] = []; + } + seriesMap[seriesId].push(parseFloat(value)); + } + } + + const seriesList = Object.entries(seriesMap).map(([id, values]) => ({ + series_id: id, + values: values + })); + + if (seriesList.length === 0) { + log('Error: No valid series found', 'error'); + return; + } + + log(`Found ${seriesList.length} series`); + + const predictionLength = parseInt(document.getElementById('multiPredLength').value); + + const requestBody = { + prediction_length: predictionLength, + series_list: seriesList, + start_timestamp: new Date().toISOString().split('T')[0], + freq: 'D', + quantile_levels: [0.1, 0.5, 0.9] + }; + + log('Forecasting all series...'); + + const response = await fetch(`${API_BASE_URL}/forecast_multi_id`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody) + }); + + if (!response.ok) { + throw new Error(`API error: ${response.statusText}`); + } + + const result = await response.json(); + + log(`✨ Generated forecasts for ${result.forecasts.length} series`, 'success'); + + // Escribir resultados en Excel + await Excel.run(async (context) => { + const selection = context.workbook.getSelectedRange(); + selection.load('rowIndex, rowCount'); + await context.sync(); + + const startRow = selection.rowIndex + selection.rowCount + 2; + const sheet = context.workbook.worksheets.getActiveWorksheet(); + + let currentRow = startRow; + + // Escribir cada serie + result.forecasts.forEach(forecast => { + // Header de la serie + const seriesHeaderRange = sheet.getRangeByIndexes(currentRow, 0, 1, 1); + seriesHeaderRange.values = [[`Series: ${forecast.series_id}`]]; + seriesHeaderRange.format.font.bold = true; + seriesHeaderRange.format.fill.color = '#4472C4'; + seriesHeaderRange.format.font.color = 'white'; + currentRow++; + + // Datos de la serie + const tableData = [['Timestamp', 'Median', 'Q10', 'Q90']]; + + for (let i = 0; i < forecast.timestamps.length; i++) { + tableData.push([ + forecast.timestamps[i], + parseFloat(forecast.median[i].toFixed(2)), + parseFloat(forecast.quantiles['0.1'][i].toFixed(2)), + parseFloat(forecast.quantiles['0.9'][i].toFixed(2)) + ]); + } + + const dataRange = sheet.getRangeByIndexes( + currentRow, + 0, + tableData.length, + 4 + ); + dataRange.values = tableData; + dataRange.format.autofitColumns(); + + // Format header + const headerRange = sheet.getRangeByIndexes(currentRow, 0, 1, 4); + headerRange.format.font.bold = true; + headerRange.format.fill.color = '#D9E1F2'; + + currentRow += tableData.length + 1; // +1 para separación + }); + + await context.sync(); + }); + + log('✨ Multi-series forecasts written to spreadsheet', 'success'); + + } catch (error) { + log(`Error: ${error.message}`, 'error'); + console.error(error); + } +} + +// ==================================================================== +// FUNCIÓN 5: COVARIABLES +// ==================================================================== + +async function forecastWithCovariates() { + log('Starting forecast with covariates...'); + + try { + const selection = await getSelectedRange(); + const data = selection.values; + + if (data.length < 3) { + log('Error: Need at least 3 rows of data', 'error'); + return; + } + + // Obtener parámetros + const predictionLength = parseInt(document.getElementById('covPredLength').value); + const covariateNamesInput = document.getElementById('covariateNames').value; + const covariateNames = covariateNamesInput.split(',').map(s => s.trim()); + + log(`Reading data with ${covariateNames.length} covariates: ${covariateNames.join(', ')}`); + + // Estructura esperada: + // Col A: Date/Timestamp + // Col B: Target value + // Col C+: Covariates + + const context = []; + const future = []; + + for (let i = 1; i < data.length; i++) { // Skip header + const timestamp = data[i][0] ? data[i][0].toString() : null; + const target = data[i][1]; + + // Leer covariables + const covariates = {}; + for (let j = 0; j < covariateNames.length && j < data[i].length - 2; j++) { + const covValue = data[i][j + 2]; + if (covValue !== '' && !isNaN(covValue)) { + covariates[covariateNames[j]] = parseFloat(covValue); + } + } + + // Si tiene target, es contexto histórico + if (target !== '' && !isNaN(target)) { + context.push({ + timestamp: timestamp, + target: parseFloat(target), + covariates: covariates + }); + } + // Si no tiene target pero sí covariables, son valores futuros + else if (Object.keys(covariates).length > 0) { + future.push({ + timestamp: timestamp, + covariates: covariates + }); + } + } + + if (context.length === 0) { + log('Error: No historical data found', 'error'); + return; + } + + log(`Context: ${context.length} points, Future: ${future.length} points`); + + const requestBody = { + context: context, + future: future.length > 0 ? future : null, + prediction_length: predictionLength, + quantile_levels: [0.1, 0.5, 0.9] + }; + + log('Calling API with covariates...'); + + const response = await fetch(`${API_BASE_URL}/forecast_with_covariates`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody) + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`API error: ${response.statusText} - ${errorText}`); + } + + const result = await response.json(); + + log(`✨ Forecast generated with ${result.pred_df.length} predictions`, 'success'); + + // Escribir resultados en una nueva ubicación + await Excel.run(async (context) => { + const selection = context.workbook.getSelectedRange(); + selection.load('rowIndex, rowCount, columnCount'); + await context.sync(); + + const startRow = selection.rowIndex + selection.rowCount + 2; + const startCol = 0; + + // Crear tabla con los resultados + const sheet = context.workbook.worksheets.getActiveWorksheet(); + + // Headers + const headers = Object.keys(result.pred_df[0]); + const tableData = [headers]; + + // Data rows + result.pred_df.forEach(row => { + const rowData = headers.map(h => row[h]); + tableData.push(rowData); + }); + + const outputRange = sheet.getRangeByIndexes( + startRow, + startCol, + tableData.length, + headers.length + ); + + outputRange.values = tableData; + outputRange.format.autofitColumns(); + + // Format header + const headerRange = sheet.getRangeByIndexes(startRow, startCol, 1, headers.length); + headerRange.format.font.bold = true; + headerRange.format.fill.color = '#4472C4'; + headerRange.format.font.color = 'white'; + + await context.sync(); + }); + + log('✨ Results written to spreadsheet', 'success'); + + } catch (error) { + log(`Error: ${error.message}`, 'error'); + console.error(error); + } +} + +// ==================================================================== +// FUNCIÓN 6: ESCENARIOS +// ==================================================================== + +async function generateScenarios() { + log('Starting scenario generation...'); + + try { + const selection = await getSelectedRange(); + const data = selection.values; + + if (data.length < 3) { + log('Error: Need at least 3 rows of data', 'error'); + return; + } + + const numScenarios = parseInt(document.getElementById('numScenarios').value); + + // Estructura esperada similar a covariates: + // Col A: Date, Col B: Target, Col C+: Covariates + // Para escenarios, generaremos variaciones de las covariables + + const context = []; + const covariateNames = []; + + // Detectar nombres de covariables del header + for (let j = 2; j < data[0].length; j++) { + if (data[0][j]) { + covariateNames.push(data[0][j].toString()); + } + } + + log(`Detected covariates: ${covariateNames.join(', ')}`); + + // Leer contexto histórico + for (let i = 1; i < data.length; i++) { + const timestamp = data[i][0] ? data[i][0].toString() : null; + const target = data[i][1]; + + if (target !== '' && !isNaN(target)) { + const covariates = {}; + for (let j = 0; j < covariateNames.length && j < data[i].length - 2; j++) { + const covValue = data[i][j + 2]; + if (covValue !== '' && !isNaN(covValue)) { + covariates[covariateNames[j]] = parseFloat(covValue); + } + } + + context.push({ + timestamp: timestamp, + target: parseFloat(target), + covariates: covariates + }); + } + } + + if (context.length === 0) { + log('Error: No historical data found', 'error'); + return; + } + + // Generar escenarios automáticamente + const predictionLength = 7; + const scenarios = []; + + // Calcular valores promedio de covariables para generar variaciones + const avgCovariates = {}; + covariateNames.forEach(name => { + const values = context + .map(p => p.covariates[name]) + .filter(v => v !== undefined); + avgCovariates[name] = values.length > 0 + ? values.reduce((a, b) => a + b, 0) / values.length + : 0; + }); + + // Escenario 1: Baseline (promedios) + const baselineScenario = { + name: 'Baseline', + future_covariates: [] + }; + + for (let i = 0; i < predictionLength; i++) { + baselineScenario.future_covariates.push({ + timestamp: `future_${i+1}`, + covariates: {...avgCovariates} + }); + } + scenarios.push(baselineScenario); + + // Escenario 2: Optimista (+20%) + if (numScenarios >= 2) { + const optimisticScenario = { + name: 'Optimistic (+20%)', + future_covariates: [] + }; + + for (let i = 0; i < predictionLength; i++) { + const covs = {}; + covariateNames.forEach(name => { + covs[name] = avgCovariates[name] * 1.2; + }); + optimisticScenario.future_covariates.push({ + timestamp: `future_${i+1}`, + covariates: covs + }); + } + scenarios.push(optimisticScenario); + } + + // Escenario 3: Pesimista (-20%) + if (numScenarios >= 3) { + const pessimisticScenario = { + name: 'Pessimistic (-20%)', + future_covariates: [] + }; + + for (let i = 0; i < predictionLength; i++) { + const covs = {}; + covariateNames.forEach(name => { + covs[name] = avgCovariates[name] * 0.8; + }); + pessimisticScenario.future_covariates.push({ + timestamp: `future_${i+1}`, + covariates: covs + }); + } + scenarios.push(pessimisticScenario); + } + + log(`Generated ${scenarios.length} scenarios`); + + const requestBody = { + context: context, + scenarios: scenarios, + prediction_length: predictionLength, + quantile_levels: [0.1, 0.5, 0.9] + }; + + log('Calling scenarios API...'); + + const response = await fetch(`${API_BASE_URL}/forecast_scenarios`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody) + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`API error: ${response.statusText} - ${errorText}`); + } + + const result = await response.json(); + + log(`✨ Generated ${result.scenarios.length} scenario forecasts`, 'success'); + + // Escribir resultados + await Excel.run(async (context) => { + const selection = context.workbook.getSelectedRange(); + selection.load('rowIndex, rowCount'); + await context.sync(); + + const startRow = selection.rowIndex + selection.rowCount + 2; + const sheet = context.workbook.worksheets.getActiveWorksheet(); + + let currentRow = startRow; + + // Escribir cada escenario + result.scenarios.forEach(scenario => { + // Header del escenario + const scenarioHeaderRange = sheet.getRangeByIndexes(currentRow, 0, 1, 1); + scenarioHeaderRange.values = [[`Scenario: ${scenario.name}`]]; + scenarioHeaderRange.format.font.bold = true; + scenarioHeaderRange.format.fill.color = '#70AD47'; + scenarioHeaderRange.format.font.color = 'white'; + currentRow++; + + // Datos del escenario + if (scenario.pred_df && scenario.pred_df.length > 0) { + const headers = Object.keys(scenario.pred_df[0]); + const tableData = [headers]; + + scenario.pred_df.forEach(row => { + tableData.push(headers.map(h => row[h])); + }); + + const dataRange = sheet.getRangeByIndexes( + currentRow, + 0, + tableData.length, + headers.length + ); + dataRange.values = tableData; + dataRange.format.autofitColumns(); + + currentRow += tableData.length + 1; // +1 para separación + } + }); + + await context.sync(); + }); + + log('✨ Scenarios written to spreadsheet', 'success'); + + } catch (error) { + log(`Error: ${error.message}`, 'error'); + console.error(error); + } +} + +// ==================================================================== +// FUNCIÓN 7: MULTIVARIANTE +// ==================================================================== + +async function forecastMultivariate() { + log('Starting multivariate forecast...'); + + try { + const selection = await getSelectedRange(); + const data = selection.values; + + if (data.length < 3) { + log('Error: Need at least 3 rows of data', 'error'); + return; + } + + // Obtener parámetros + const predictionLength = parseInt(document.getElementById('multivarPredLength').value); + const targetColumnsInput = document.getElementById('targetColumns').value; + const targetColumns = targetColumnsInput.split(',').map(s => s.trim()); + + log(`Forecasting ${targetColumns.length} target variables: ${targetColumns.join(', ')}`); + + // Estructura esperada: + // Col A: Date/Timestamp + // Col B+: Target variables (múltiples columnas que queremos predecir) + + const context = []; + + // Validar que hay suficientes columnas + if (data[0].length < targetColumns.length + 1) { + log(`Error: Expected ${targetColumns.length + 1} columns but found ${data[0].length}`, 'error'); + return; + } + + // Leer datos + for (let i = 1; i < data.length; i++) { // Skip header + const timestamp = data[i][0] ? data[i][0].toString() : null; + + // Leer todos los targets + const targets = {}; + let hasValidData = false; + + for (let j = 0; j < targetColumns.length && j < data[i].length - 1; j++) { + const value = data[i][j + 1]; + if (value !== '' && !isNaN(value)) { + targets[targetColumns[j]] = parseFloat(value); + hasValidData = true; + } + } + + if (hasValidData) { + context.push({ + timestamp: timestamp, + targets: targets, + covariates: {} // Sin covariables por ahora + }); + } + } + + if (context.length === 0) { + log('Error: No valid data found', 'error'); + return; + } + + log(`Read ${context.length} data points`); + + const requestBody = { + context: context, + target_columns: targetColumns, + prediction_length: predictionLength, + quantile_levels: [0.1, 0.5, 0.9] + }; + + log('Calling multivariate forecast API...'); + + const response = await fetch(`${API_BASE_URL}/forecast_multivariate`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody) + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`API error: ${response.statusText} - ${errorText}`); + } + + const result = await response.json(); + + log(`✨ Generated multivariate forecast with ${result.pred_df.length} predictions`, 'success'); + + // Escribir resultados + await Excel.run(async (context) => { + const selection = context.workbook.getSelectedRange(); + selection.load('rowIndex, rowCount'); + await context.sync(); + + const startRow = selection.rowIndex + selection.rowCount + 2; + const sheet = context.workbook.worksheets.getActiveWorksheet(); + + // Crear tabla con resultados + if (result.pred_df && result.pred_df.length > 0) { + const headers = Object.keys(result.pred_df[0]); + const tableData = [headers]; + + result.pred_df.forEach(row => { + tableData.push(headers.map(h => row[h])); + }); + + const outputRange = sheet.getRangeByIndexes( + startRow, + 0, + tableData.length, + headers.length + ); + + outputRange.values = tableData; + outputRange.format.autofitColumns(); + + // Format header + const headerRange = sheet.getRangeByIndexes(startRow, 0, 1, headers.length); + headerRange.format.font.bold = true; + headerRange.format.fill.color = '#4472C4'; + headerRange.format.font.color = 'white'; + + await context.sync(); + } + }); + + log('✨ Multivariate forecast written to spreadsheet', 'success'); + + } catch (error) { + log(`Error: ${error.message}`, 'error'); + console.error(error); + } +}