Spaces:

evgueni-p
/

fbmc-chronos2

Sleeping

File size: 8,525 Bytes

d4939ce

"""
CNEC Border Extraction Utility
================================

Extracts commercial border information from CNEC EIC codes, TSO fields,
and PTDF profiles using a hierarchical approach.

Strategy:
1. Parse EIC codes (10T-XX-YY-NNNNNN format) - Primary, 33% coverage
2. Special case mapping (Alegro CNECs) - 8 CNECs
3. TSO + neighbor PTDF analysis - Fallback, ~67% coverage
4. Manual review for remaining cases

Author: Claude + Evgueni Poloukarov
Date: 2025-11-08
"""

from typing import Dict, Optional


# TSO to Country/Zone Mapping
TSO_TO_ZONE: Dict[str, str] = {
    # Germany (4 TSOs)
    '50Hertz': 'DE',
    'Amprion': 'DE',
    'TennetGmbh': 'DE',
    'TransnetBw': 'DE',

    # Other countries
    'Rte': 'FR',              # France
    'Elia': 'BE',             # Belgium
    'TennetBv': 'NL',         # Netherlands
    'Apg': 'AT',              # Austria
    'Ceps': 'CZ',             # Czech Republic
    'Pse': 'PL',              # Poland
    'Mavir': 'HU',            # Hungary
    'Seps': 'SK',             # Slovakia
    'Transelectrica': 'RO',   # Romania
    'Hops': 'HR',             # Croatia
    'Eles': 'SI',             # Slovenia
}


# FBMC Border Neighbors (from ENTSO-E BORDERS list)
ZONE_NEIGHBORS: Dict[str, list] = {
    'DE': ['NL', 'FR', 'BE', 'AT', 'CZ', 'PL'],  # DE_LU treated as DE
    'FR': ['DE', 'BE', 'ES', 'CH'],              # ES/CH external but affect FBMC
    'AT': ['DE', 'CZ', 'HU', 'SI', 'CH'],
    'CZ': ['DE', 'AT', 'SK', 'PL'],
    'HU': ['AT', 'SK', 'RO', 'HR'],
    'SK': ['CZ', 'HU', 'PL'],
    'PL': ['DE', 'CZ', 'SK'],
    'RO': ['HU'],
    'HR': ['HU', 'SI'],
    'SI': ['AT', 'HR'],
    'BE': ['DE', 'FR', 'NL'],
    'NL': ['DE', 'BE'],
}


# Special case mappings (Alegro cable + edge cases)
SPECIAL_BORDER_MAPPING: Dict[str, str] = {
    # Alegro DC cable (Belgium - Germany)
    'ALEGRO_EXTERNAL_BE_IMPORT': 'BE_DE',
    'ALEGRO_EXTERNAL_DE_EXPORT': 'BE_DE',
    'ALEGRO_EXTERNAL_DE_IMPORT': 'BE_DE',
    'ALEGRO_EXTERNAL_BE_EXPORT': 'BE_DE',
    'ALEGRO_INTERNAL_DE_IMPORT': 'BE_DE',
    'ALEGRO_INTERNAL_BE_EXPORT': 'BE_DE',
    'ALEGRO_INTERNAL_BE_IMPORT': 'BE_DE',
    'ALEGRO_INTERNAL_DE_EXPORT': 'BE_DE',
}


def extract_border_from_eic(eic: str) -> Optional[str]:
    """
    Extract border from EIC code with 10T-XX-YY-NNNNNN format.

    This is the most reliable method as border is explicitly encoded.

    Args:
        eic: CNEC EIC code

    Returns:
        Border string (e.g., "DE_FR", "AT_SI") or None if not parseable

    Examples:
        >>> extract_border_from_eic("10T-DE-FR-000068")
        "DE_FR"
        >>> extract_border_from_eic("10T-AT-SI-00003P")
        "AT_SI"
        >>> extract_border_from_eic("17T0000000215642")
        None
    """
    if not eic.startswith('10T-'):
        return None

    parts = eic.split('-')
    if len(parts) < 3:
        return None

    zone1, zone2 = parts[1], parts[2]

    # Normalize to alphabetical order for consistency
    border = f"{min(zone1, zone2)}_{max(zone1, zone2)}"

    return border


def get_special_border(eic: str) -> Optional[str]:
    """
    Get border for special case CNECs (Alegro cable, etc.).

    Args:
        eic: CNEC EIC code

    Returns:
        Border string or None if not a special case
    """
    return SPECIAL_BORDER_MAPPING.get(eic)


def infer_border_from_tso_and_ptdf(
    tso: str,
    ptdf_dict: Dict[str, float]
) -> Optional[str]:
    """
    Infer border using TSO home zone + highest PTDF in neighbor zones.

    This is a fallback method when EIC doesn't encode border explicitly.
    Uses TSO to identify home country, then finds neighbor with highest
    |PTDF| value.

    Args:
        tso: TSO name (e.g., "Apg", "Rte", "Amprion")
        ptdf_dict: Dictionary of PTDF values
                   Format: {"ptdf_AT": -0.45, "ptdf_DE": 0.12, ...}

    Returns:
        Border string or None if cannot be determined

    Example:
        >>> ptdfs = {"ptdf_AT": -0.45, "ptdf_SI": 0.38, "ptdf_DE": 0.12}
        >>> infer_border_from_tso_and_ptdf("Apg", ptdfs)
        "AT_SI"  # Apg is Austrian TSO, SI has highest |PTDF| among neighbors
    """
    home_zone = TSO_TO_ZONE.get(tso)
    if not home_zone:
        return None

    neighbors = ZONE_NEIGHBORS.get(home_zone, [])
    if not neighbors:
        return None

    # Find neighbor with highest |PTDF|
    neighbor_ptdfs = {}
    for neighbor in neighbors:
        ptdf_key = f'ptdf_{neighbor}'
        if ptdf_key in ptdf_dict:
            neighbor_ptdfs[neighbor] = abs(ptdf_dict[ptdf_key])

    if not neighbor_ptdfs:
        return None

    # Get neighbor with maximum absolute PTDF
    max_neighbor = max(neighbor_ptdfs, key=neighbor_ptdfs.get)

    # Normalize border to alphabetical order
    border = f"{min(home_zone, max_neighbor)}_{max(home_zone, max_neighbor)}"

    return border


def extract_cnec_border(
    cnec_eic: str,
    tso: str,
    ptdf_dict: Optional[Dict[str, float]] = None
) -> str:
    """
    Extract border for a CNEC using hierarchical strategy.

    Tries methods in order:
    1. Parse EIC (10T-XX-YY format) - most reliable
    2. Special case mapping (Alegro, etc.)
    3. TSO + neighbor PTDF analysis - fallback
    4. Return "UNKNOWN" if all methods fail

    Args:
        cnec_eic: CNEC EIC code
        tso: TSO name
        ptdf_dict: Optional dictionary of PTDF values
                   Format: {"ptdf_AT": -0.45, "ptdf_BE": 0.12, ...}

    Returns:
        Border string (e.g., "DE_FR", "AT_SI") or "UNKNOWN"

    Examples:
        >>> extract_cnec_border("10T-DE-FR-000068", "Amprion")
        "DE_FR"

        >>> extract_cnec_border("ALEGRO_EXTERNAL_BE_IMPORT", "Elia")
        "BE_DE"

        >>> ptdfs = {"ptdf_AT": -0.45, "ptdf_SI": 0.38}
        >>> extract_cnec_border("17T0000000215642", "Apg", ptdfs)
        "AT_SI"
    """
    # Method 1: Parse EIC for 10T- pattern
    border = extract_border_from_eic(cnec_eic)
    if border:
        return border

    # Method 2: Special cases (Alegro)
    border = get_special_border(cnec_eic)
    if border:
        return border

    # Method 3: TSO + PTDF neighbor analysis
    if ptdf_dict:
        border = infer_border_from_tso_and_ptdf(tso, ptdf_dict)
        if border:
            return border

    # Method 4: TSO-only fallback (use first alphabetical neighbor)
    # This is very approximate but better than UNKNOWN
    home_zone = TSO_TO_ZONE.get(tso)
    if home_zone:
        neighbors = ZONE_NEIGHBORS.get(home_zone, [])
        if neighbors:
            # Use first alphabetical neighbor as guess
            first_neighbor = sorted(neighbors)[0]
            border = f"{min(home_zone, first_neighbor)}_{max(home_zone, first_neighbor)}"
            return border

    return "UNKNOWN"


def validate_border_assignment(
    border: str,
    ptdf_dict: Dict[str, float],
    threshold: float = 0.05
) -> bool:
    """
    Validate border assignment using PTDF sanity check.

    For a border XX_YY, at least one of ptdf_XX or ptdf_YY should have
    significant magnitude (|PTDF| > threshold).

    Args:
        border: Assigned border (e.g., "DE_FR")
        ptdf_dict: Dictionary of PTDF values
        threshold: Minimum |PTDF| to consider significant (default 0.05)

    Returns:
        True if validation passes, False otherwise

    Example:
        >>> validate_border_assignment("DE_FR", {"ptdf_DE": -0.42, "ptdf_FR": 0.38})
        True

        >>> validate_border_assignment("DE_FR", {"ptdf_DE": 0.01, "ptdf_FR": 0.02})
        False
    """
    if border == "UNKNOWN":
        return False

    zones = border.split('_')
    if len(zones) != 2:
        return False

    zone1, zone2 = zones

    ptdf1 = abs(ptdf_dict.get(f'ptdf_{zone1}', 0.0))
    ptdf2 = abs(ptdf_dict.get(f'ptdf_{zone2}', 0.0))

    # At least one zone should have significant PTDF
    return (ptdf1 > threshold) or (ptdf2 > threshold)


def get_border_statistics(borders: list) -> Dict[str, int]:
    """
    Get frequency statistics for border assignments.

    Useful for validating that major FBMC borders are well-represented.

    Args:
        borders: List of border assignments

    Returns:
        Dictionary mapping border → count

    Example:
        >>> get_border_statistics(["DE_FR", "AT_SI", "DE_FR", "UNKNOWN"])
        {"DE_FR": 2, "AT_SI": 1, "UNKNOWN": 1}
    """
    stats = {}
    for border in borders:
        stats[border] = stats.get(border, 0) + 1

    # Sort by count (descending)
    return dict(sorted(stats.items(), key=lambda x: x[1], reverse=True))