2
2
3
3
import re
4
4
from importlib .metadata import version
5
- from typing import List
5
+ from typing import List , Optional
6
6
7
7
import numpy as np
8
8
import pandas as pd
@@ -323,10 +323,37 @@ def test_prepare_reporter(test_context):
323
323
assert 14299 <= len (rep .graph ) - N
324
324
325
325
326
+ # Filters for comparison
327
+ PE0 = r"Primary Energy\|(Coal|Gas|Hydro|Nuclear|Solar|Wind)"
328
+ PE1 = r"Primary Energy\|(Coal|Gas|Solar|Wind)"
329
+ E = (
330
+ r"Emissions\|CO2\|Energy\|Demand\|Transportation\|Road Rail and Domestic "
331
+ "Shipping"
332
+ )
333
+
334
+ IGNORE = [
335
+ # Other 'variable' codes are missing from `obs`
336
+ re .compile (f"variable='(?!{ PE0 } ).*': no right data" ),
337
+ # 'variable' codes with further parts are missing from `obs`
338
+ re .compile (f"variable='{ PE0 } .*': no right data" ),
339
+ # For `pe1` (NB: not Hydro or Solar) units and most values differ
340
+ re .compile (f"variable='{ PE1 } .*': units mismatch .*EJ/yr.*'', nan" ),
341
+ re .compile (r"variable='Primary Energy|Coal': 220 of 240 values with \|diff" ),
342
+ re .compile (r"variable='Primary Energy|Gas': 234 of 240 values with \|diff" ),
343
+ re .compile (r"variable='Primary Energy|Solar': 191 of 240 values with \|diff" ),
344
+ re .compile (r"variable='Primary Energy|Wind': 179 of 240 values with \|diff" ),
345
+ # For `e` units and most values differ
346
+ re .compile (f"variable='{ E } ': units mismatch: .*Mt CO2/yr.*Mt / a" ),
347
+ re .compile (rf"variable='{ E } ': 20 missing right entries" ),
348
+ re .compile (rf"variable='{ E } ': 220 of 240 values with \|diff" ),
349
+ ]
350
+
351
+
326
352
@to_simulate .minimum_version
327
353
def test_compare (test_context ):
328
354
"""Compare the output of genno-based and legacy reporting."""
329
- key = "pe test"
355
+ key = "all::iamc"
356
+ # key = "pe test"
330
357
331
358
# Obtain the output from reporting `key` on `snapshot_id`
332
359
snapshot_id : int = 1
@@ -348,24 +375,8 @@ def test_compare(test_context):
348
375
engine = "pyarrow" ,
349
376
)
350
377
351
- # Filters for comparison
352
- pe0 = r"Primary Energy\|(Coal|Gas|Hydro|Nuclear|Solar|Wind)"
353
- pe1 = r"Primary Energy\|(Coal|Gas|Solar|Wind)"
354
- ignore = [
355
- # Other 'variable' codes are missing from `obs`
356
- re .compile (f"variable='(?!{ pe0 } ).*': no right data" ),
357
- # 'variable' codes with further parts are missing from `obs`
358
- re .compile (f"variable='{ pe0 } .*': no right data" ),
359
- # For `pe1` (NB: not Hydro or Solar) units and most values differ
360
- re .compile (f"variable='{ pe1 } .*': units mismatch .*EJ/yr.*'', nan" ),
361
- re .compile (r"variable='Primary Energy|Coal': 220 of 240 values with \|diff" ),
362
- re .compile (r"variable='Primary Energy|Gas': 234 of 240 values with \|diff" ),
363
- re .compile (r"variable='Primary Energy|Solar': 191 of 240 values with \|diff" ),
364
- re .compile (r"variable='Primary Energy|Wind': 179 of 240 values with \|diff" ),
365
- ]
366
-
367
378
# Perform the comparison, ignoring some messages
368
- if messages := compare_iamc (exp , obs , ignore = ignore ):
379
+ if messages := compare_iamc (exp , obs , ignore = IGNORE ):
369
380
# Other messages that were not explicitly ignored → some error
370
381
print ("\n " .join (messages ))
371
382
assert False
@@ -377,8 +388,8 @@ def compare_iamc(
377
388
"""Compare IAMC-structured data in `left` and `right`; return a list of messages."""
378
389
result = []
379
390
380
- def record (message : str ) -> None :
381
- if any (p .match (message ) for p in ignore ):
391
+ def record (message : str , condition : Optional [ bool ] = True ) -> None :
392
+ if not condition or any (p .match (message ) for p in ignore ):
382
393
return
383
394
result .append (message )
384
395
@@ -396,16 +407,29 @@ def checks(df: pd.DataFrame):
396
407
"value_rel = value_diff / value_left"
397
408
)
398
409
410
+ na_left = tmp .isna ()[["unit_left" , "value_left" ]]
411
+ if na_left .any (axis = None ):
412
+ record (f"{ prefix } { na_left .sum (axis = 0 ).max ()} missing left entries" )
413
+ tmp = tmp [~ na_left .any (axis = 1 )]
414
+ na_right = tmp .isna ()[["unit_right" , "value_right" ]]
415
+ if na_right .any (axis = None ):
416
+ record (f"{ prefix } { na_right .sum (axis = 0 ).max ()} missing right entries" )
417
+ tmp = tmp [~ na_right .any (axis = 1 )]
418
+
399
419
units_left = set (tmp .unit_left .unique ())
400
420
units_right = set (tmp .unit_right .unique ())
401
- if units_left != units_right :
402
- record (f"{ prefix } units mismatch: { units_left } != { units_right } " )
421
+ record (
422
+ condition = units_left != units_right ,
423
+ message = f"{ prefix } units mismatch: { units_left } != { units_right } " ,
424
+ )
403
425
404
426
N0 = len (df )
405
427
406
428
mask1 = tmp .query ("abs(value_diff) > @atol" )
407
- if len (mask1 ):
408
- record (f"{ prefix } { len (mask1 )} of { N0 } values with |diff| > { atol } " )
429
+ record (
430
+ condition = len (mask1 ),
431
+ message = f"{ prefix } { len (mask1 )} of { N0 } values with |diff| > { atol } " ,
432
+ )
409
433
410
434
for (model , scenario ), group_0 in left .merge (
411
435
right ,
0 commit comments