Spaces:
Sleeping
Sleeping
Evgueni Poloukarov
feat: Phase 1 complete - Master CNEC list + synchronized feature engineering
d4939ce
| """ | |
| CNEC Border Extraction Utility | |
| ================================ | |
| Extracts commercial border information from CNEC EIC codes, TSO fields, | |
| and PTDF profiles using a hierarchical approach. | |
| Strategy: | |
| 1. Parse EIC codes (10T-XX-YY-NNNNNN format) - Primary, 33% coverage | |
| 2. Special case mapping (Alegro CNECs) - 8 CNECs | |
| 3. TSO + neighbor PTDF analysis - Fallback, ~67% coverage | |
| 4. Manual review for remaining cases | |
| Author: Claude + Evgueni Poloukarov | |
| Date: 2025-11-08 | |
| """ | |
| from typing import Dict, Optional | |
| # TSO to Country/Zone Mapping | |
| TSO_TO_ZONE: Dict[str, str] = { | |
| # Germany (4 TSOs) | |
| '50Hertz': 'DE', | |
| 'Amprion': 'DE', | |
| 'TennetGmbh': 'DE', | |
| 'TransnetBw': 'DE', | |
| # Other countries | |
| 'Rte': 'FR', # France | |
| 'Elia': 'BE', # Belgium | |
| 'TennetBv': 'NL', # Netherlands | |
| 'Apg': 'AT', # Austria | |
| 'Ceps': 'CZ', # Czech Republic | |
| 'Pse': 'PL', # Poland | |
| 'Mavir': 'HU', # Hungary | |
| 'Seps': 'SK', # Slovakia | |
| 'Transelectrica': 'RO', # Romania | |
| 'Hops': 'HR', # Croatia | |
| 'Eles': 'SI', # Slovenia | |
| } | |
| # FBMC Border Neighbors (from ENTSO-E BORDERS list) | |
| ZONE_NEIGHBORS: Dict[str, list] = { | |
| 'DE': ['NL', 'FR', 'BE', 'AT', 'CZ', 'PL'], # DE_LU treated as DE | |
| 'FR': ['DE', 'BE', 'ES', 'CH'], # ES/CH external but affect FBMC | |
| 'AT': ['DE', 'CZ', 'HU', 'SI', 'CH'], | |
| 'CZ': ['DE', 'AT', 'SK', 'PL'], | |
| 'HU': ['AT', 'SK', 'RO', 'HR'], | |
| 'SK': ['CZ', 'HU', 'PL'], | |
| 'PL': ['DE', 'CZ', 'SK'], | |
| 'RO': ['HU'], | |
| 'HR': ['HU', 'SI'], | |
| 'SI': ['AT', 'HR'], | |
| 'BE': ['DE', 'FR', 'NL'], | |
| 'NL': ['DE', 'BE'], | |
| } | |
| # Special case mappings (Alegro cable + edge cases) | |
| SPECIAL_BORDER_MAPPING: Dict[str, str] = { | |
| # Alegro DC cable (Belgium - Germany) | |
| 'ALEGRO_EXTERNAL_BE_IMPORT': 'BE_DE', | |
| 'ALEGRO_EXTERNAL_DE_EXPORT': 'BE_DE', | |
| 'ALEGRO_EXTERNAL_DE_IMPORT': 'BE_DE', | |
| 'ALEGRO_EXTERNAL_BE_EXPORT': 'BE_DE', | |
| 'ALEGRO_INTERNAL_DE_IMPORT': 'BE_DE', | |
| 'ALEGRO_INTERNAL_BE_EXPORT': 'BE_DE', | |
| 'ALEGRO_INTERNAL_BE_IMPORT': 'BE_DE', | |
| 'ALEGRO_INTERNAL_DE_EXPORT': 'BE_DE', | |
| } | |
| def extract_border_from_eic(eic: str) -> Optional[str]: | |
| """ | |
| Extract border from EIC code with 10T-XX-YY-NNNNNN format. | |
| This is the most reliable method as border is explicitly encoded. | |
| Args: | |
| eic: CNEC EIC code | |
| Returns: | |
| Border string (e.g., "DE_FR", "AT_SI") or None if not parseable | |
| Examples: | |
| >>> extract_border_from_eic("10T-DE-FR-000068") | |
| "DE_FR" | |
| >>> extract_border_from_eic("10T-AT-SI-00003P") | |
| "AT_SI" | |
| >>> extract_border_from_eic("17T0000000215642") | |
| None | |
| """ | |
| if not eic.startswith('10T-'): | |
| return None | |
| parts = eic.split('-') | |
| if len(parts) < 3: | |
| return None | |
| zone1, zone2 = parts[1], parts[2] | |
| # Normalize to alphabetical order for consistency | |
| border = f"{min(zone1, zone2)}_{max(zone1, zone2)}" | |
| return border | |
| def get_special_border(eic: str) -> Optional[str]: | |
| """ | |
| Get border for special case CNECs (Alegro cable, etc.). | |
| Args: | |
| eic: CNEC EIC code | |
| Returns: | |
| Border string or None if not a special case | |
| """ | |
| return SPECIAL_BORDER_MAPPING.get(eic) | |
| def infer_border_from_tso_and_ptdf( | |
| tso: str, | |
| ptdf_dict: Dict[str, float] | |
| ) -> Optional[str]: | |
| """ | |
| Infer border using TSO home zone + highest PTDF in neighbor zones. | |
| This is a fallback method when EIC doesn't encode border explicitly. | |
| Uses TSO to identify home country, then finds neighbor with highest | |
| |PTDF| value. | |
| Args: | |
| tso: TSO name (e.g., "Apg", "Rte", "Amprion") | |
| ptdf_dict: Dictionary of PTDF values | |
| Format: {"ptdf_AT": -0.45, "ptdf_DE": 0.12, ...} | |
| Returns: | |
| Border string or None if cannot be determined | |
| Example: | |
| >>> ptdfs = {"ptdf_AT": -0.45, "ptdf_SI": 0.38, "ptdf_DE": 0.12} | |
| >>> infer_border_from_tso_and_ptdf("Apg", ptdfs) | |
| "AT_SI" # Apg is Austrian TSO, SI has highest |PTDF| among neighbors | |
| """ | |
| home_zone = TSO_TO_ZONE.get(tso) | |
| if not home_zone: | |
| return None | |
| neighbors = ZONE_NEIGHBORS.get(home_zone, []) | |
| if not neighbors: | |
| return None | |
| # Find neighbor with highest |PTDF| | |
| neighbor_ptdfs = {} | |
| for neighbor in neighbors: | |
| ptdf_key = f'ptdf_{neighbor}' | |
| if ptdf_key in ptdf_dict: | |
| neighbor_ptdfs[neighbor] = abs(ptdf_dict[ptdf_key]) | |
| if not neighbor_ptdfs: | |
| return None | |
| # Get neighbor with maximum absolute PTDF | |
| max_neighbor = max(neighbor_ptdfs, key=neighbor_ptdfs.get) | |
| # Normalize border to alphabetical order | |
| border = f"{min(home_zone, max_neighbor)}_{max(home_zone, max_neighbor)}" | |
| return border | |
| def extract_cnec_border( | |
| cnec_eic: str, | |
| tso: str, | |
| ptdf_dict: Optional[Dict[str, float]] = None | |
| ) -> str: | |
| """ | |
| Extract border for a CNEC using hierarchical strategy. | |
| Tries methods in order: | |
| 1. Parse EIC (10T-XX-YY format) - most reliable | |
| 2. Special case mapping (Alegro, etc.) | |
| 3. TSO + neighbor PTDF analysis - fallback | |
| 4. Return "UNKNOWN" if all methods fail | |
| Args: | |
| cnec_eic: CNEC EIC code | |
| tso: TSO name | |
| ptdf_dict: Optional dictionary of PTDF values | |
| Format: {"ptdf_AT": -0.45, "ptdf_BE": 0.12, ...} | |
| Returns: | |
| Border string (e.g., "DE_FR", "AT_SI") or "UNKNOWN" | |
| Examples: | |
| >>> extract_cnec_border("10T-DE-FR-000068", "Amprion") | |
| "DE_FR" | |
| >>> extract_cnec_border("ALEGRO_EXTERNAL_BE_IMPORT", "Elia") | |
| "BE_DE" | |
| >>> ptdfs = {"ptdf_AT": -0.45, "ptdf_SI": 0.38} | |
| >>> extract_cnec_border("17T0000000215642", "Apg", ptdfs) | |
| "AT_SI" | |
| """ | |
| # Method 1: Parse EIC for 10T- pattern | |
| border = extract_border_from_eic(cnec_eic) | |
| if border: | |
| return border | |
| # Method 2: Special cases (Alegro) | |
| border = get_special_border(cnec_eic) | |
| if border: | |
| return border | |
| # Method 3: TSO + PTDF neighbor analysis | |
| if ptdf_dict: | |
| border = infer_border_from_tso_and_ptdf(tso, ptdf_dict) | |
| if border: | |
| return border | |
| # Method 4: TSO-only fallback (use first alphabetical neighbor) | |
| # This is very approximate but better than UNKNOWN | |
| home_zone = TSO_TO_ZONE.get(tso) | |
| if home_zone: | |
| neighbors = ZONE_NEIGHBORS.get(home_zone, []) | |
| if neighbors: | |
| # Use first alphabetical neighbor as guess | |
| first_neighbor = sorted(neighbors)[0] | |
| border = f"{min(home_zone, first_neighbor)}_{max(home_zone, first_neighbor)}" | |
| return border | |
| return "UNKNOWN" | |
| def validate_border_assignment( | |
| border: str, | |
| ptdf_dict: Dict[str, float], | |
| threshold: float = 0.05 | |
| ) -> bool: | |
| """ | |
| Validate border assignment using PTDF sanity check. | |
| For a border XX_YY, at least one of ptdf_XX or ptdf_YY should have | |
| significant magnitude (|PTDF| > threshold). | |
| Args: | |
| border: Assigned border (e.g., "DE_FR") | |
| ptdf_dict: Dictionary of PTDF values | |
| threshold: Minimum |PTDF| to consider significant (default 0.05) | |
| Returns: | |
| True if validation passes, False otherwise | |
| Example: | |
| >>> validate_border_assignment("DE_FR", {"ptdf_DE": -0.42, "ptdf_FR": 0.38}) | |
| True | |
| >>> validate_border_assignment("DE_FR", {"ptdf_DE": 0.01, "ptdf_FR": 0.02}) | |
| False | |
| """ | |
| if border == "UNKNOWN": | |
| return False | |
| zones = border.split('_') | |
| if len(zones) != 2: | |
| return False | |
| zone1, zone2 = zones | |
| ptdf1 = abs(ptdf_dict.get(f'ptdf_{zone1}', 0.0)) | |
| ptdf2 = abs(ptdf_dict.get(f'ptdf_{zone2}', 0.0)) | |
| # At least one zone should have significant PTDF | |
| return (ptdf1 > threshold) or (ptdf2 > threshold) | |
| def get_border_statistics(borders: list) -> Dict[str, int]: | |
| """ | |
| Get frequency statistics for border assignments. | |
| Useful for validating that major FBMC borders are well-represented. | |
| Args: | |
| borders: List of border assignments | |
| Returns: | |
| Dictionary mapping border → count | |
| Example: | |
| >>> get_border_statistics(["DE_FR", "AT_SI", "DE_FR", "UNKNOWN"]) | |
| {"DE_FR": 2, "AT_SI": 1, "UNKNOWN": 1} | |
| """ | |
| stats = {} | |
| for border in borders: | |
| stats[border] = stats.get(border, 0) + 1 | |
| # Sort by count (descending) | |
| return dict(sorted(stats.items(), key=lambda x: x[1], reverse=True)) | |