Skip to content

Commit 5987d27

Browse files
bmalezieuxBenoît MalézieuxBenoît Malézieuxkevinmessiaenrabah-khalek
authored
JSON report generation from scan results (#1929)
* ENH: adding json report generation from scan results and test suite results * TEST: add test for json report * UPD: add issue level to json report * FIX: detector name now part of Issue properties * FIX: minor fixes after review * FIX: bug in test for json report * FIX: pre-commit * FIX: clarify loop in json generation + add test on detector in report generation + change output format * FIX: pre-commit --------- Co-authored-by: Benoît Malézieux <[email protected]> Co-authored-by: Benoît Malézieux <[email protected]> Co-authored-by: Kevin Messiaen <[email protected]> Co-authored-by: Rabah Khalek <[email protected]>
1 parent 892443e commit 5987d27

17 files changed

+92
-2
lines changed

giskard/core/suite.py

+14
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
44

55
import inspect
6+
import json
67
import logging
78
import traceback
89
import warnings
@@ -219,6 +220,19 @@ def _to_dto(self, label: Optional[str], client: GiskardClient, project_key: str)
219220
completionDate=self.completion_date.isoformat(),
220221
)
221222

223+
def to_json(self, filename=None):
224+
results = {}
225+
for suite_result in self.results:
226+
results[suite_result.test_name] = {
227+
"result": "Passed" if suite_result.result.passed else "Failed",
228+
"metric_value": suite_result.result.metric,
229+
}
230+
if filename is not None:
231+
with open(filename, "w") as json_file:
232+
json.dump(results, json_file, indent=4)
233+
else:
234+
return json.dumps(results, indent=4)
235+
222236
def to_mlflow(self, mlflow_client: MlflowClient = None, mlflow_run_id: str = None):
223237
import mlflow
224238

giskard/scanner/calibration/overconfidence_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def _find_issues(
105105
tests=_generate_overconfidence_tests,
106106
importance=relative_delta,
107107
taxonomy=["avid-effect:performance:P0204"],
108+
detector_name=self.__class__.__name__,
108109
)
109110

110111
# Add examples

giskard/scanner/calibration/underconfidence_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def _find_issues(
105105
importance=relative_delta,
106106
tests=_generate_underconfidence_tests,
107107
taxonomy=["avid-effect:performance:P0204"],
108+
detector_name=self.__class__.__name__,
108109
)
109110

110111
# Add examples

giskard/scanner/correlation/spurious_correlation_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]):
9696
importance=metric_value,
9797
tests=_generate_spurious_corr_tests,
9898
taxonomy=["avid-effect:performance:P0103"],
99+
detector_name=self.__class__.__name__,
99100
)
100101

101102
extractor = ExampleExtractor(issue)

giskard/scanner/data_leakage/data_leakage_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def slice_fn(df):
5454
examples=fail_samples,
5555
meta={"domain": "Whole dataset"},
5656
taxonomy=["avid-effect:performance:P0103"],
57+
detector_name=self.__class__.__name__,
5758
)
5859
]
5960

giskard/scanner/issues.py

+9
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def __init__(
123123
taxonomy: List[str] = None,
124124
scan_examples: Optional[ScanExamples] = None,
125125
display_footer_info: bool = True,
126+
detector_name: str = None,
126127
):
127128
"""Issue represents a single model vulnerability detected by Giskard.
128129
@@ -179,6 +180,7 @@ def __init__(
179180
self.scan_examples = DataFrameScanExamples() if scan_examples is None else scan_examples
180181
if examples is not None:
181182
self.scan_examples.extend(examples)
183+
self._detector_name = detector_name
182184

183185
def __repr__(self):
184186
return f"<{self.__class__.__name__} group='{self.group.name}' level='{self.level}'>"
@@ -218,6 +220,13 @@ def description(self):
218220
**self.meta,
219221
)
220222

223+
@property
224+
def detector_name(self):
225+
return self._detector_name
226+
227+
def set_detector_name(self, detector_name):
228+
self._detector_name = detector_name
229+
221230
def examples(self, n=3) -> Any:
222231
return self.scan_examples.head(n)
223232

giskard/scanner/llm/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def make_issue(self, model: BaseModel, dataset: Dataset, requirement: str, eval_
117117
},
118118
tests=_generate_output_requirement_tests,
119119
taxonomy=self._taxonomy,
120+
detector_name=self.__class__.__name__,
120121
)
121122

122123

giskard/scanner/llm/llm_basic_sycophancy_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def run(self, model: BaseModel, dataset: Dataset, features=None) -> Sequence[Iss
126126
},
127127
tests=_generate_sycophancy_tests,
128128
taxonomy=["avid-effect:ethics:E0402"],
129+
detector_name=self.__class__.__name__,
129130
)
130131
]
131132

giskard/scanner/llm/llm_chars_injection_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]) -> Se
137137
examples=examples,
138138
tests=_generate_char_injection_tests,
139139
taxonomy=["avid-effect:performance:P0201", "avid-effect:security:S0403"],
140+
detector_name=self.__class__.__name__,
140141
)
141142

142143
issues.append(issue)

giskard/scanner/llm/llm_implausible_output_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def run(self, model: BaseModel, dataset: Dataset, features=None) -> Sequence[Iss
100100
examples=examples,
101101
tests=_generate_implausible_output_tests,
102102
taxonomy=["avid-effect:performance:P0204"],
103+
detector_name=self.__class__.__name__,
103104
)
104105
]
105106

giskard/scanner/llm/llm_prompt_injection_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]) -> Se
106106
examples=examples,
107107
tests=_generate_prompt_injection_tests,
108108
taxonomy=["avid-effect:security:S0403"],
109+
detector_name=self.__class__.__name__,
109110
)
110111
)
111112
return issues

giskard/scanner/performance/performance_bias_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ def _detect_for_metric(
241241
importance=-relative_delta if metric.greater_is_better else relative_delta,
242242
tests=_generate_performance_tests,
243243
taxonomy=["avid-effect:performance:P0204"],
244+
detector_name="PerformanceBiasDetector",
244245
)
245246

246247
# Add failure examples

giskard/scanner/report.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from typing import TYPE_CHECKING, Optional
44

5+
import json
56
import random
67
import string
78
import tempfile
@@ -18,7 +19,7 @@
1819

1920

2021
class ScanReport:
21-
def __init__(self, issues, model=None, dataset=None, as_html: bool = True):
22+
def __init__(self, issues, model=None, dataset=None, detectors_names=None, as_html: bool = True):
2223
"""The scan report contains the results of the scan.
2324
2425
Note that this object is not meant to be instantiated directly. Instead, it is returned by the
@@ -32,13 +33,16 @@ def __init__(self, issues, model=None, dataset=None, as_html: bool = True):
3233
A Giskard model object.
3334
dataset : Dataset
3435
A Giskard dataset object.
36+
detectors_names : list
37+
A list of names corresponding to the detectors used
3538
as_html : bool
3639
Whether to render the report widget as HTML.
3740
"""
3841
self.issues = issues
3942
self.as_html = as_html
4043
self.model = model
4144
self.dataset = dataset
45+
self.detectors_names = detectors_names
4246

4347
def has_issues(self):
4448
return len(self.issues) > 0
@@ -67,6 +71,30 @@ def _repr_html_(self):
6771
def _repr_markdown_(self):
6872
return self.to_markdown()
6973

74+
def to_json(self, filename=None):
75+
"""Renders the scan report as json
76+
77+
Parameters
78+
----------
79+
filename : Optional[str]
80+
If provided, the json will be written to the file.
81+
"""
82+
results = {}
83+
if self.detectors_names is None:
84+
return results
85+
for detector_name in self.detectors_names:
86+
results[detector_name] = {}
87+
for issue in self.issues:
88+
if issue.detector_name in results:
89+
if issue.level not in results[issue.detector_name]:
90+
results[issue.detector_name][issue.level] = []
91+
results[issue.detector_name][issue.level].append(issue.description)
92+
if filename is not None:
93+
with open(filename, "w") as json_file:
94+
json.dump(results, json_file, indent=4)
95+
else:
96+
return json.dumps(results, indent=4)
97+
7098
def to_html(self, filename=None, embed=False):
7199
"""Renders the scan report as HTML.
72100

giskard/scanner/robustness/base_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ def _detect_issues(
192192
importance=fail_rate,
193193
tests=_generate_robustness_tests,
194194
taxonomy=self._taxonomy,
195+
detector_name=self.__class__.__name__,
195196
)
196197

197198
# Add examples

giskard/scanner/scanner.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def analyze(
113113

114114
# Collect the detectors
115115
detectors = self.get_detectors(tags=[model.meta.model_type.value])
116+
detectors_names = [detector.__class__.__name__ for detector in detectors]
116117

117118
# Print cost estimate
118119
if verbose:
@@ -135,7 +136,7 @@ def analyze(
135136

136137
self._collect_analytics(model, dataset, issues, elapsed, model_validation_time, detectors)
137138

138-
return ScanReport(issues, model=model, dataset=dataset)
139+
return ScanReport(issues, model=model, dataset=dataset, detectors_names=detectors_names)
139140

140141
def _run_detectors(self, detectors, model, dataset, features, verbose=True, raise_exceptions=False):
141142
if not detectors:

giskard/scanner/stochasticity/stochasticity_detector.py

+1
Original file line numberDiff line numberDiff line change
@@ -55,5 +55,6 @@ def run(self, model: BaseModel, dataset: Dataset, features=None):
5555
},
5656
examples=fail_samples,
5757
taxonomy=["avid-effect:performance:P0201"],
58+
detector_name=self.__class__.__name__,
5859
)
5960
]

tests/scan/test_scan_report.py

+26
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,29 @@ def test_scan_report_exports_to_markdown():
5151
assert dest.exists()
5252
assert dest.is_file()
5353
assert dest.read_text() == markdown
54+
55+
56+
def test_scan_report_to_json():
57+
model = Mock()
58+
dataset = Mock()
59+
60+
report = ScanReport(
61+
issues=[Issue(model, dataset, Robustness, IssueLevel.MAJOR, detector_name="RobustnessDetector")],
62+
detectors_names=["RobustnessDetector"],
63+
)
64+
65+
# JSON report
66+
json_report = report.to_json()
67+
68+
assert json_report is not None
69+
assert isinstance(json_report, str)
70+
assert "RobustnessDetector" in json_report
71+
72+
# Save to a file
73+
with tempfile.TemporaryDirectory() as tmpdir:
74+
dest = Path(tmpdir).joinpath("report.json")
75+
report.to_json(dest)
76+
77+
assert dest.exists()
78+
assert dest.is_file()
79+
assert dest.read_text() == json_report

0 commit comments

Comments
 (0)