2
2
3
3
import re
4
4
from importlib .metadata import version
5
- from typing import List
5
+ from typing import List , Optional
6
6
7
7
import numpy as np
8
8
import pandas as pd
@@ -315,10 +315,37 @@ def test_prepare_reporter(test_context):
315
315
assert 14299 <= len (rep .graph ) - N
316
316
317
317
318
+ # Filters for comparison
319
+ PE0 = r"Primary Energy\|(Coal|Gas|Hydro|Nuclear|Solar|Wind)"
320
+ PE1 = r"Primary Energy\|(Coal|Gas|Solar|Wind)"
321
+ E = (
322
+ r"Emissions\|CO2\|Energy\|Demand\|Transportation\|Road Rail and Domestic "
323
+ "Shipping"
324
+ )
325
+
326
+ IGNORE = [
327
+ # Other 'variable' codes are missing from `obs`
328
+ re .compile (f"variable='(?!{ PE0 } ).*': no right data" ),
329
+ # 'variable' codes with further parts are missing from `obs`
330
+ re .compile (f"variable='{ PE0 } .*': no right data" ),
331
+ # For `pe1` (NB: not Hydro or Solar) units and most values differ
332
+ re .compile (f"variable='{ PE1 } .*': units mismatch .*EJ/yr.*'', nan" ),
333
+ re .compile (r"variable='Primary Energy|Coal': 220 of 240 values with \|diff" ),
334
+ re .compile (r"variable='Primary Energy|Gas': 234 of 240 values with \|diff" ),
335
+ re .compile (r"variable='Primary Energy|Solar': 191 of 240 values with \|diff" ),
336
+ re .compile (r"variable='Primary Energy|Wind': 179 of 240 values with \|diff" ),
337
+ # For `e` units and most values differ
338
+ re .compile (f"variable='{ E } ': units mismatch: .*Mt CO2/yr.*Mt / a" ),
339
+ re .compile (rf"variable='{ E } ': 20 missing right entries" ),
340
+ re .compile (rf"variable='{ E } ': 220 of 240 values with \|diff" ),
341
+ ]
342
+
343
+
318
344
@to_simulate .minimum_version
319
345
def test_compare (test_context ):
320
346
"""Compare the output of genno-based and legacy reporting."""
321
- key = "pe test"
347
+ key = "all::iamc"
348
+ # key = "pe test"
322
349
323
350
# Obtain the output from reporting `key` on `snapshot_id`
324
351
snapshot_id : int = 1
@@ -340,24 +367,8 @@ def test_compare(test_context):
340
367
engine = "pyarrow" ,
341
368
)
342
369
343
- # Filters for comparison
344
- pe0 = r"Primary Energy\|(Coal|Gas|Hydro|Nuclear|Solar|Wind)"
345
- pe1 = r"Primary Energy\|(Coal|Gas|Solar|Wind)"
346
- ignore = [
347
- # Other 'variable' codes are missing from `obs`
348
- re .compile (f"variable='(?!{ pe0 } ).*': no right data" ),
349
- # 'variable' codes with further parts are missing from `obs`
350
- re .compile (f"variable='{ pe0 } .*': no right data" ),
351
- # For `pe1` (NB: not Hydro or Solar) units and most values differ
352
- re .compile (f"variable='{ pe1 } .*': units mismatch .*EJ/yr.*'', nan" ),
353
- re .compile (r"variable='Primary Energy|Coal': 220 of 240 values with \|diff" ),
354
- re .compile (r"variable='Primary Energy|Gas': 234 of 240 values with \|diff" ),
355
- re .compile (r"variable='Primary Energy|Solar': 191 of 240 values with \|diff" ),
356
- re .compile (r"variable='Primary Energy|Wind': 179 of 240 values with \|diff" ),
357
- ]
358
-
359
370
# Perform the comparison, ignoring some messages
360
- if messages := compare_iamc (exp , obs , ignore = ignore ):
371
+ if messages := compare_iamc (exp , obs , ignore = IGNORE ):
361
372
# Other messages that were not explicitly ignored → some error
362
373
print ("\n " .join (messages ))
363
374
assert False
@@ -369,8 +380,8 @@ def compare_iamc(
369
380
"""Compare IAMC-structured data in `left` and `right`; return a list of messages."""
370
381
result = []
371
382
372
- def record (message : str ) -> None :
373
- if any (p .match (message ) for p in ignore ):
383
+ def record (message : str , condition : Optional [ bool ] = True ) -> None :
384
+ if not condition or any (p .match (message ) for p in ignore ):
374
385
return
375
386
result .append (message )
376
387
@@ -388,16 +399,29 @@ def checks(df: pd.DataFrame):
388
399
"value_rel = value_diff / value_left"
389
400
)
390
401
402
+ na_left = tmp .isna ()[["unit_left" , "value_left" ]]
403
+ if na_left .any (axis = None ):
404
+ record (f"{ prefix } { na_left .sum (axis = 0 ).max ()} missing left entries" )
405
+ tmp = tmp [~ na_left .any (axis = 1 )]
406
+ na_right = tmp .isna ()[["unit_right" , "value_right" ]]
407
+ if na_right .any (axis = None ):
408
+ record (f"{ prefix } { na_right .sum (axis = 0 ).max ()} missing right entries" )
409
+ tmp = tmp [~ na_right .any (axis = 1 )]
410
+
391
411
units_left = set (tmp .unit_left .unique ())
392
412
units_right = set (tmp .unit_right .unique ())
393
- if units_left != units_right :
394
- record (f"{ prefix } units mismatch: { units_left } != { units_right } " )
413
+ record (
414
+ condition = units_left != units_right ,
415
+ message = f"{ prefix } units mismatch: { units_left } != { units_right } " ,
416
+ )
395
417
396
418
N0 = len (df )
397
419
398
420
mask1 = tmp .query ("abs(value_diff) > @atol" )
399
- if len (mask1 ):
400
- record (f"{ prefix } { len (mask1 )} of { N0 } values with |diff| > { atol } " )
421
+ record (
422
+ condition = len (mask1 ),
423
+ message = f"{ prefix } { len (mask1 )} of { N0 } values with |diff| > { atol } " ,
424
+ )
401
425
402
426
for (model , scenario ), group_0 in left .merge (
403
427
right ,
0 commit comments