Evgueni Poloukarov Claude commited on
Commit
a57b996
·
1 Parent(s): 7f2c237

feat: add 396 volatility features for zero-shot forecast improvement

Browse files

Added per-border volatility metrics to help model capture hour-to-hour variations:
- 132 hour-over-hour delta features (target_delta_h1_*)
- 132 24-hour rolling volatility features (target_vol_24h_*)
- 132 6-hour range features (target_range_6h_*)

Total features: 2,647 -> 3,043 columns
Dataset uploaded to HuggingFace: evgueni-p/fbmc-features-24month

Initial evaluation: NO improvement (0.0% MAE change)
Likely cause: Chronos-2 ignores target-derived features or feature explosion

Co-Authored-By: Claude <[email protected]>

src/feature_engineering/engineer_jao_features.py CHANGED
@@ -618,6 +618,48 @@ def engineer_jao_features(
618
  unified[col].alias(target_name)
619
  ])
620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
  # Remove duplicates if any
622
  if 'mtu_right' in all_features.columns:
623
  all_features = all_features.drop([c for c in all_features.columns if c.endswith('_right')])
 
618
  unified[col].alias(target_name)
619
  ])
620
 
621
+ print(f"\n[INFO] Adding volatility features for {len(directional_cols)} directional borders...")
622
+ print("[INFO] Volatility features will help model capture hour-to-hour variations")
623
+
624
+ # Add volatility features for EACH directional border
625
+ # These features teach the model about hourly change patterns and volatility magnitude
626
+ for col in sorted(directional_cols):
627
+ from_country, to_country = col.split('>')
628
+ border_code = f'{from_country}_{to_country}'
629
+
630
+ # Get the target series
631
+ target_col = f'target_border_{border_code}'
632
+
633
+ # 1. Hour-over-hour delta (captures immediate hourly changes)
634
+ # Formula: target[t] - target[t-1]
635
+ # Helps model learn rapid swings and ramps
636
+ delta_col = f'target_delta_h1_{border_code}'
637
+ all_features = all_features.with_columns([
638
+ (all_features[target_col] - all_features[target_col].shift(1)).alias(delta_col)
639
+ ])
640
+
641
+ # 2. 24-hour rolling volatility (captures daily volatility patterns)
642
+ # Formula: rolling_std(target, window=24h)
643
+ # Informs model about magnitude of daily variation
644
+ vol_col = f'target_vol_24h_{border_code}'
645
+ all_features = all_features.with_columns([
646
+ all_features[target_col].rolling_std(window_size=24, min_periods=1).alias(vol_col)
647
+ ])
648
+
649
+ # 3. 6-hour range (captures intraday swings - morning/evening ramps)
650
+ # Formula: rolling_max(target, 6h) - rolling_min(target, 6h)
651
+ # Detects peak/off-peak transitions
652
+ range_col = f'target_range_6h_{border_code}'
653
+ all_features = all_features.with_columns([
654
+ (all_features[target_col].rolling_max(window_size=6, min_periods=1) -
655
+ all_features[target_col].rolling_min(window_size=6, min_periods=1)).alias(range_col)
656
+ ])
657
+
658
+ print(f"[OK] Added {len(directional_cols) * 3} volatility features:")
659
+ print(f" - {len(directional_cols)} hour-over-hour delta features")
660
+ print(f" - {len(directional_cols)} 24-hour rolling volatility features")
661
+ print(f" - {len(directional_cols)} 6-hour range features")
662
+
663
  # Remove duplicates if any
664
  if 'mtu_right' in all_features.columns:
665
  all_features = all_features.drop([c for c in all_features.columns if c.endswith('_right')])
upload_to_hf.py CHANGED
@@ -37,16 +37,16 @@ def upload_extended_dataset():
37
  login(token=hf_token)
38
  print(" [OK] Logged in")
39
 
40
- # Load extended dataset
41
- extended_file = Path("data/processed/features_unified_extended.parquet")
42
- if not extended_file.exists():
43
- raise FileNotFoundError(f"Extended dataset not found: {extended_file}")
44
 
45
- print(f"\nLoading extended dataset...")
46
- df = pl.read_parquet(extended_file)
47
  print(f" Shape: {df.shape}")
48
  print(f" Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
49
- print(f" File size: {extended_file.stat().st_size / 1024 / 1024:.1f} MB")
50
 
51
  # Convert to HuggingFace Dataset
52
  print("\nConverting to HuggingFace Dataset format...")
 
37
  login(token=hf_token)
38
  print(" [OK] Logged in")
39
 
40
+ # Load unified dataset with volatility features
41
+ unified_file = Path("data/processed/features_unified_24month.parquet")
42
+ if not unified_file.exists():
43
+ raise FileNotFoundError(f"Unified dataset not found: {unified_file}")
44
 
45
+ print(f"\nLoading unified dataset with volatility features...")
46
+ df = pl.read_parquet(unified_file)
47
  print(f" Shape: {df.shape}")
48
  print(f" Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
49
+ print(f" File size: {unified_file.stat().st_size / 1024 / 1024:.1f} MB")
50
 
51
  # Convert to HuggingFace Dataset
52
  print("\nConverting to HuggingFace Dataset format...")