Giskard-AI · Jan 30, 2025
diff --git a/‎giskard/scanner/robustness/base_detector.py
+201-90 b/‎giskard/scanner/robustness/base_detector.py
+201-90
diff --git a/‎giskard/scanner/robustness/base_perturbation_function.py
+31 b/‎giskard/scanner/robustness/base_perturbation_function.py
+31
diff --git a/‎giskard/scanner/robustness/ethical_bias_detector.py
+1-3 b/‎giskard/scanner/robustness/ethical_bias_detector.py
+1-3
diff --git a/‎giskard/scanner/robustness/numerical_perturbation_detector.py
+40 b/‎giskard/scanner/robustness/numerical_perturbation_detector.py
+40
diff --git a/‎giskard/scanner/robustness/numerical_transformations.py
+44 b/‎giskard/scanner/robustness/numerical_transformations.py
+44
diff --git a/‎giskard/scanner/robustness/text_perturbation_detector.py
+1-3 b/‎giskard/scanner/robustness/text_perturbation_detector.py
+1-3
diff --git a/‎giskard/scanner/robustness/text_transformations.py
+26-42 b/‎giskard/scanner/robustness/text_transformations.py
+26-42
diff --git a/‎tests/scan/test_numerical_perturbation_detector.py
+89 b/‎tests/scan/test_numerical_perturbation_detector.py
+89
@@ -1,38 +1,86 @@
 from typing import Optional, Sequence
 
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 
 import numpy as np
 import pandas as pd
 
 from ...datasets.base import Dataset
 from ...llm import LLMImportError
 from ...models.base import BaseModel
+from ...models.base.model_prediction import ModelPredictionResults
 from ..issues import Issue, IssueLevel, Robustness
 from ..logger import logger
 from ..registry import Detector
+from .base_perturbation_function import PerturbationFunction
+from .numerical_transformations import NumericalTransformation
 from .text_transformations import TextTransformation
 
 
-class BaseTextPerturbationDetector(Detector):
-    """Base class for metamorphic detectors based on text transformations."""
+def _relative_delta(actual: np.ndarray, reference: np.ndarray) -> np.ndarray:
+    """
+    Computes elementwise relative delta. If reference[i] == 0, we replace it with epsilon
+    to avoid division by zero.
+    """
+    epsilon = 1e-9
+    safe_ref = np.where(reference == 0, epsilon, reference)
+    return (actual - reference) / safe_ref
+
+
+def _get_default_num_samples(model) -> int:
+    if model.is_text_generation:
+        return 10
+    return 1_000
+
+
+def _get_default_output_sensitivity(model) -> float:
+    if model.is_text_generation:
+        return 0.15
+    return 0.05
+
+
+def _get_default_threshold(model) -> float:
+    if model.is_text_generation:
+        return 0.10
+    return 0.05
+
+
+def _generate_robustness_tests(issue: Issue):
+    from ...testing.tests.metamorphic import test_metamorphic_invariance
+
+    # Only generates a single metamorphic test
+    return {
+        f"Invariance to “{issue.transformation_fn}”": test_metamorphic_invariance(
+            transformation_function=issue.transformation_fn,
+            slicing_function=None,
+            threshold=1 - issue.meta["threshold"],
+            output_sensitivity=issue.meta.get("output_sentitivity", None),
+        )
+    }
+
+
+class BasePerturbationDetector(Detector, ABC):
+    """
+    Common parent class for metamorphic perturbation detectors (both text and numerical).
+    """
 
     _issue_group = Robustness
     _taxonomy = ["avid-effect:performance:P0201"]
 
     def __init__(
         self,
-        transformations: Optional[Sequence[TextTransformation]] = None,
+        transformations: Optional[Sequence[PerturbationFunction]] = None,
         threshold: Optional[float] = None,
-        output_sensitivity=None,
+        output_sensitivity: Optional[float] = None,
         num_samples: Optional[int] = None,
     ):
-        """Creates a new instance of the detector.
+        """
+        Creates a new instance of the detector.
 
         Parameters
         ----------
-        transformations: Optional[Sequence[TextTransformation]]
-            The text transformations used in the metamorphic testing. See :ref:`transformation_functions` for details
+        transformations: Optional[Sequence[PerturbationFunction]]
+            The transformations used in the metamorphic testing. See :ref:`transformation_functions` for details
             about the available transformations. If not provided, a default set of transformations will be used.
         threshold: Optional[float]
             The threshold for the fail rate, which is defined as the proportion of samples for which the model
@@ -52,53 +100,103 @@ def __init__(
         self.num_samples = num_samples
         self.output_sensitivity = output_sensitivity
 
-    def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]) -> Sequence[Issue]:
-        transformations = self.transformations or self._get_default_transformations(model, dataset)
+    @abstractmethod
+    def _select_features(self, dataset: Dataset, features: Sequence[str]) -> Sequence[str]:
+        raise NotImplementedError
 
-        # Only analyze text features
-        text_features = [
-            f
-            for f in features
-            if dataset.column_types[f] == "text" and pd.api.types.is_string_dtype(dataset.df[f].dtype)
-        ]
+    @abstractmethod
+    def _get_default_transformations(self) -> Sequence[PerturbationFunction]:
+        raise NotImplementedError
 
-        logger.info(
-            f"{self.__class__.__name__}: Running with transformations={[t.name for t in transformations]} "
-            f"threshold={self.threshold} output_sensitivity={self.output_sensitivity} num_samples={self.num_samples}"
-        )
+    @abstractmethod
+    def _supports_text_generation(self) -> bool:
+        raise NotImplementedError
 
-        issues = []
-        for transformation in transformations:
-            issues.extend(self._detect_issues(model, dataset, transformation, text_features))
+    def _compute_passed(
+        self,
+        model: BaseModel,
+        original_pred: ModelPredictionResults,
+        perturbed_pred: ModelPredictionResults,
+        output_sensitivity: float,
+    ) -> np.ndarray:
+        if model.is_classification:
+            return original_pred.raw_prediction == perturbed_pred.raw_prediction
+
+        elif model.is_regression:
+            rel_delta = _relative_delta(perturbed_pred.raw_prediction, original_pred.raw_prediction)
+            return np.abs(rel_delta) < output_sensitivity
+
+        elif model.is_text_generation:
+            if not self._supports_text_generation():
+                raise NotImplementedError("Text generation is not supported by this detector.")
+            try:
+                import evaluate
+            except ImportError as err:
+                raise LLMImportError() from err
+
+            scorer = evaluate.load("bertscore")
+            score = scorer.compute(
+                predictions=perturbed_pred.prediction,
+                references=original_pred.prediction,
+                model_type="distilbert-base-multilingual-cased",
+                idf=True,
+            )
+            return np.array(score["f1"]) > 1 - output_sensitivity
 
-        return [i for i in issues if i is not None]
+        else:
+            raise NotImplementedError("Only classification, regression, or text generation models are supported.")
 
-    @abstractmethod
-    def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[TextTransformation]:
-        ...
+    def _create_examples(
+        self,
+        original_data: Dataset,
+        original_pred: ModelPredictionResults,
+        perturbed_data: Dataset,
+        perturbed_pred: ModelPredictionResults,
+        feature: str,
+        passed: np.ndarray,
+        model: BaseModel,
+        transformation_fn,
+    ) -> pd.DataFrame:
+        examples = original_data.df.loc[~passed, [feature]].copy()
+        examples[f"{transformation_fn.name}({feature})"] = perturbed_data.df.loc[~passed, feature]
+
+        examples["Original prediction"] = original_pred.prediction[~passed]
+        examples["Prediction after perturbation"] = perturbed_pred.prediction[~passed]
+
+        if model.is_classification:
+            examples["Original prediction"] = examples["Original prediction"].astype(str)
+            examples["Prediction after perturbation"] = examples["Prediction after perturbation"].astype(str)
+
+            ps_before = pd.Series(original_pred.probabilities[~passed], index=examples.index)
+            ps_after = pd.Series(perturbed_pred.probabilities[~passed], index=examples.index)
+
+            examples["Original prediction"] += ps_before.apply(lambda p: f" (p={p:.2f})")
+            examples["Prediction after perturbation"] += ps_after.apply(lambda p: f" (p={p:.2f})")
+
+        return examples
 
     def _detect_issues(
         self,
         model: BaseModel,
         dataset: Dataset,
-        transformation: TextTransformation,
+        transformation,
         features: Sequence[str],
     ) -> Sequence[Issue]:
+        # Fall back to defaults if not explicitly set
         num_samples = self.num_samples if self.num_samples is not None else _get_default_num_samples(model)
+        threshold = self.threshold if self.threshold is not None else _get_default_threshold(model)
         output_sensitivity = (
             self.output_sensitivity if self.output_sensitivity is not None else _get_default_output_sensitivity(model)
         )
-        threshold = self.threshold if self.threshold is not None else _get_default_threshold(model)
 
         issues = []
-        # @TODO: integrate this with Giskard metamorphic tests already present
         for feature in features:
+            # Build transformation function for this feature
             transformation_fn = transformation(column=feature)
             transformed = dataset.transform(transformation_fn)
 
             # Select only the records which were changed
             changed_idx = dataset.df.index[transformed.df[feature] != dataset.df[feature]]
-
             if changed_idx.empty:
                 continue
 
@@ -107,6 +205,7 @@ def _detect_issues(
                 rng = np.random.default_rng(747)
                 changed_idx = changed_idx[rng.choice(len(changed_idx), num_samples, replace=False)]
 
+            # Build original vs. perturbed datasets
             original_data = Dataset(
                 dataset.df.loc[changed_idx],
                 target=dataset.target,
@@ -124,27 +223,12 @@ def _detect_issues(
             original_pred = model.predict(original_data)
             perturbed_pred = model.predict(perturbed_data)
 
-            if model.is_classification:
-                passed = original_pred.raw_prediction == perturbed_pred.raw_prediction
-            elif model.is_regression:
-                rel_delta = _relative_delta(perturbed_pred.raw_prediction, original_pred.raw_prediction)
-                passed = np.abs(rel_delta) < output_sensitivity
-            elif model.is_text_generation:
-                try:
-                    import evaluate
-                except ImportError as err:
-                    raise LLMImportError() from err
-
-                scorer = evaluate.load("bertscore")
-                score = scorer.compute(
-                    predictions=perturbed_pred.prediction,
-                    references=original_pred.prediction,
-                    model_type="distilbert-base-multilingual-cased",
-                    idf=True,
-                )
-                passed = np.array(score["f1"]) > 1 - output_sensitivity
-            else:
-                raise NotImplementedError("Only classification, regression, or text generation models are supported.")
+            passed = self._compute_passed(
+                model=model,
+                original_pred=original_pred,
+                perturbed_pred=perturbed_pred,
+                output_sensitivity=output_sensitivity,
+            )
 
             pass_rate = passed.mean()
             fail_rate = 1 - pass_rate
@@ -196,61 +280,88 @@ def _detect_issues(
                 )
 
                 # Add examples
-                examples = original_data.df.loc[~passed, (feature,)].copy()
-                examples[f"{transformation_fn.name}({feature})"] = perturbed_data.df.loc[~passed, feature]
-
-                examples["Original prediction"] = original_pred.prediction[~passed]
-                examples["Prediction after perturbation"] = perturbed_pred.prediction[~passed]
-
-                if model.is_classification:
-                    examples["Original prediction"] = examples["Original prediction"].astype(str)
-                    examples["Prediction after perturbation"] = examples["Prediction after perturbation"].astype(str)
-                    ps_before = pd.Series(original_pred.probabilities[~passed], index=examples.index)
-                    ps_after = pd.Series(perturbed_pred.probabilities[~passed], index=examples.index)
-                    examples["Original prediction"] += ps_before.apply(lambda p: f" (p = {p:.2f})")
-                    examples["Prediction after perturbation"] += ps_after.apply(lambda p: f" (p = {p:.2f})")
-
+                examples = self._create_examples(
+                    original_data,
+                    original_pred,
+                    perturbed_data,
+                    perturbed_pred,
+                    feature,
+                    passed,
+                    model,
+                    transformation_fn,
+                )
                 issue.add_examples(examples)
 
                 issues.append(issue)
 
         return issues
 
+    def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]) -> Sequence[Issue]:
+        """
+        Runs the perturbation detector on the given model and dataset.
 
-def _generate_robustness_tests(issue: Issue):
-    from ...testing.tests.metamorphic import test_metamorphic_invariance
+        Parameters
+        ----------
+        model: BaseModel
+            The model to test.
+        dataset: Dataset
+            The dataset to use for testing.
+        features: Sequence[str]
+            The features (columns) to test.
+
+        Returns
+        -------
+        Sequence[Issue]
+            A list of issues found during the testing.
+        """
+        transformations = self.transformations or self._get_default_transformations()
+        selected_features = self._select_features(dataset, features)
 
-    # Only generates a single metamorphic test
-    return {
-        f"Invariance to “{issue.transformation_fn}”": test_metamorphic_invariance(
-            transformation_function=issue.transformation_fn,
-            slicing_function=None,
-            threshold=1 - issue.meta["threshold"],
-            output_sensitivity=issue.meta["output_sentitivity"],
+        logger.info(
+            f"{self.__class__.__name__}: Running with transformations={[t.name for t in transformations]} "
+            f"threshold={self.threshold} output_sensitivity={self.output_sensitivity} num_samples={self.num_samples}"
         )
-    }
 
+        issues = []
+        for transformation in transformations:
+            issues.extend(self._detect_issues(model, dataset, transformation, selected_features))
 
-def _relative_delta(actual, reference):
-    return (actual - reference) / reference
+        return [i for i in issues if i is not None]
 
 
-def _get_default_num_samples(model) -> int:
-    if model.is_text_generation:
-        return 10
+class BaseTextPerturbationDetector(BasePerturbationDetector):
+    """
+    Base class for metamorphic detectors based on text transformations.
+    """
 
-    return 1_000
+    def _select_features(self, dataset: Dataset, features: Sequence[str]) -> Sequence[str]:
+        # Only analyze text features
+        return [
+            f
+            for f in features
+            if dataset.column_types[f] == "text" and pd.api.types.is_string_dtype(dataset.df[f].dtype)
+        ]
 
+    @abstractmethod
+    def _get_default_transformations(self) -> Sequence[TextTransformation]:
+        raise NotImplementedError
 
-def _get_default_output_sensitivity(model) -> float:
-    if model.is_text_generation:
-        return 0.15
+    def _supports_text_generation(self) -> bool:
+        return True
 
-    return 0.05
 
+class BaseNumericalPerturbationDetector(BasePerturbationDetector):
+    """
+    Base class for metamorphic detectors based on numerical feature perturbations.
+    """
 
-def _get_default_threshold(model) -> float:
-    if model.is_text_generation:
-        return 0.10
+    def _select_features(self, dataset: Dataset, features: Sequence[str]) -> Sequence[str]:
+        # Only analyze numeric features
+        return [f for f in features if dataset.column_types[f] == "numeric"]
 
-    return 0.05
+    @abstractmethod
+    def _get_default_transformations(self) -> Sequence[NumericalTransformation]:
+        raise NotImplementedError
+
+    def _supports_text_generation(self) -> bool:
+        return False
@@ -0,0 +1,31 @@
+from typing import Any
+
+import pandas as pd
+
+from ...core.core import DatasetProcessFunctionMeta
+from ...registry.registry import get_object_uuid
+from ...registry.transformation_function import TransformationFunction
+
+
+class PerturbationFunction(TransformationFunction):
+    name: str
+
+    def __init__(self, column: str, needs_dataset: bool = False) -> None:
+        super().__init__(None, row_level=False, cell_level=False, needs_dataset=needs_dataset)
+        self.column = column
+        self.meta = DatasetProcessFunctionMeta(type="TRANSFORMATION")
+        self.meta.uuid = get_object_uuid(self)
+        self.meta.code = self.name
+        self.meta.name = self.name
+        self.meta.display_name = self.name
+        self.meta.tags = ["pickle", "scan"]
+        self.meta.doc = self.meta.default_doc("Automatically generated transformation function")
+
+    def __str__(self) -> str:
+        return self.name
+
+    def make_perturbation(self, data_or_series: Any) -> Any:
+        raise NotImplementedError()
+
+    def execute(self, data: pd.DataFrame) -> pd.DataFrame:
+        raise NotImplementedError()
@@ -1,7 +1,5 @@
 from typing import Sequence
 
-from ...datasets.base import Dataset
-from ...models.base import BaseModel
 from ..decorators import detector
 from ..issues import Ethical
 from .base_detector import BaseTextPerturbationDetector
@@ -28,7 +26,7 @@ class EthicalBiasDetector(BaseTextPerturbationDetector):
     _issue_group = Ethical
     _taxonomy = ["avid-effect:ethics:E0101", "avid-effect:performance:P0201"]
 
-    def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[TextTransformation]:
+    def _get_default_transformations(self) -> Sequence[TextTransformation]:
         from .text_transformations import (
             TextGenderTransformation,
             TextNationalityTransformation,
 
@@ -0,0 +1,40 @@
+from typing import Sequence
+
+from ..decorators import detector
+from .base_detector import BaseNumericalPerturbationDetector
+from .numerical_transformations import NumericalTransformation
+
+
+class BoundClassWrapper:
+    def __init__(self, cls, **bound_kwargs):
+        self.cls = cls
+        self.bound_kwargs = bound_kwargs
+
+    def __call__(self, *args, **kwargs):
+        return self.cls(*args, **self.bound_kwargs, **kwargs)
+
+    def __getattr__(self, attr):
+        # Forward attribute access to the wrapped class
+        return getattr(self.cls, attr)
+
+
+@detector(
+    name="numerical_perturbation",
+    tags=[
+        "numerical_perturbation",
+        "robustness",
+        "classification",
+        "regression",
+    ],
+)
+class NumericalPerturbationDetector(BaseNumericalPerturbationDetector):
+    """Detects robustness problems in a model by applying numerical perturbations to the numerical features."""
+
+    def _get_default_transformations(self) -> Sequence[NumericalTransformation]:
+        from .numerical_transformations import AddGaussianNoise, MultiplyByFactor
+
+        return [
+            BoundClassWrapper(MultiplyByFactor, factor=1.01),
+            BoundClassWrapper(MultiplyByFactor, factor=0.99),
+            BoundClassWrapper(AddGaussianNoise, mean=0, std=0.01),
+        ]
@@ -0,0 +1,44 @@
+import numpy as np
+import pandas as pd
+
+from .base_perturbation_function import PerturbationFunction
+
+
+class NumericalTransformation(PerturbationFunction):
+    def __init__(self, column: str, needs_dataset: bool = False) -> None:
+        super().__init__(column, needs_dataset=needs_dataset)
+
+    def execute(self, data: pd.DataFrame) -> pd.DataFrame:
+        feature_data = data[self.column].dropna()
+        data.loc[feature_data.index, self.column] = self.make_perturbation(feature_data)
+        return data
+
+
+class MultiplyByFactor(NumericalTransformation):
+    name = "Multiply by factor"
+
+    def __init__(self, column: str, factor: float) -> None:
+        super().__init__(column)
+        self.factor = factor
+
+    def make_perturbation(self, values: pd.Series) -> pd.Series:
+        # Round if the column is an integer type
+        if np.issubdtype(values.dtype, np.integer):
+            return np.round(values * self.factor).astype(values.dtype)
+        return values * self.factor
+
+
+class AddGaussianNoise(NumericalTransformation):
+    name = "Add Gaussian noise"
+
+    def __init__(self, column: str, mean: float = 0, std: float = 0.01, rng_seed: int = 1729) -> None:
+        super().__init__(column)
+        self.mean = mean
+        self.std = std
+        self.rng = np.random.default_rng(seed=rng_seed)
+
+    def make_perturbation(self, values: pd.Series) -> pd.Series:
+        noise = self.rng.normal(self.mean, self.std, values.shape)
+        if np.issubdtype(values.dtype, np.integer):
+            return np.round(values + noise).astype(values.dtype)
+        return values + noise
@@ -1,7 +1,5 @@
 from typing import Sequence
 
-from ...datasets.base import Dataset
-from ...models.base import BaseModel
 from ..decorators import detector
 from .base_detector import BaseTextPerturbationDetector
 from .text_transformations import TextTransformation
@@ -25,7 +23,7 @@ class TextPerturbationDetector(BaseTextPerturbationDetector):
     e.g. transforming to uppercase, lowercase, or title case, or by introducing typos.
     """
 
-    def _get_default_transformations(self, model: BaseModel, dataset: Dataset) -> Sequence[TextTransformation]:
+    def _get_default_transformations(self) -> Sequence[TextTransformation]:
         from .text_transformations import (
             TextAccentRemovalTransformation,
             TextLowercase,
 
@@ -1,3 +1,5 @@
+from typing import Any, Optional
+
 import itertools
 import json
 import re
@@ -8,38 +10,20 @@
 import pandas as pd
 from num2words import num2words
 
-from ...core.core import DatasetProcessFunctionMeta
 from ...datasets import Dataset
 from ...functions.transformation import gruber
-from ...registry.registry import get_object_uuid
-from ...registry.transformation_function import TransformationFunction
-
+from .base_perturbation_function import PerturbationFunction
 
-class TextTransformation(TransformationFunction):
-    name: str
 
-    def __init__(self, column, needs_dataset=False):
-        super().__init__(None, row_level=False, cell_level=False, needs_dataset=needs_dataset)
-        self.column = column
-        self.meta = DatasetProcessFunctionMeta(type="TRANSFORMATION")
-        self.meta.uuid = get_object_uuid(self)
-        self.meta.code = self.name
-        self.meta.name = self.name
-        self.meta.display_name = self.name
-        self.meta.tags = ["pickle", "scan"]
-        self.meta.doc = self.meta.default_doc("Automatically generated transformation function")
-
-    def __str__(self):
-        return self.name
+class TextTransformation(PerturbationFunction):
+    def __init__(self, column: str, needs_dataset: bool = False) -> None:
+        super().__init__(column, needs_dataset=needs_dataset)
 
     def execute(self, data: pd.DataFrame) -> pd.DataFrame:
         feature_data = data[self.column].dropna().astype(str)
         data.loc[feature_data.index, self.column] = feature_data.apply(self.make_perturbation)
         return data
 
-    def make_perturbation(self, text: str) -> str:
-        raise NotImplementedError()
-
 
 class TextUppercase(TextTransformation):
     name = "Transform to uppercase"
@@ -71,7 +55,7 @@ def execute(self, data: pd.DataFrame) -> pd.DataFrame:
 class TextTypoTransformation(TextTransformation):
     name = "Add typos"
 
-    def __init__(self, column, rate=0.05, min_length=10, rng_seed=1729):
+    def __init__(self, column: str, rate: float = 0.05, min_length: int = 10, rng_seed: int = 1729):
         super().__init__(column)
         from .entity_swap import typos
 
@@ -80,7 +64,7 @@ def __init__(self, column, rate=0.05, min_length=10, rng_seed=1729):
         self._key_typos = typos
         self.rng = np.random.default_rng(seed=rng_seed)
 
-    def make_perturbation(self, x):
+    def make_perturbation(self, x: str) -> str:
         # Skip if the text is too short
         if len(x) < self.min_length:
             return x
@@ -118,7 +102,7 @@ def make_perturbation(self, x):
                     x = x[:i] + x[i + 1] + x[i] + x[i + 2 :]
         return x
 
-    def _random_key_typo(self, char):
+    def _random_key_typo(self, char: str):
         if char.lower() in self._key_typos:
             typo = self.rng.choice(self._key_typos[char.lower()])
             return typo if char.islower() else typo.upper()
@@ -128,7 +112,7 @@ def _random_key_typo(self, char):
 class TextFromOCRTypoTransformation(TextTransformation):
     name = "Add typos from OCR"
 
-    def __init__(self, column, rate=0.05, min_length=10, rng_seed=1729):
+    def __init__(self, column: str, rate: float = 0.05, min_length: int = 10, rng_seed: int = 1729):
         super().__init__(column)
         from .entity_swap import ocr_typos
 
@@ -137,7 +121,7 @@ def __init__(self, column, rate=0.05, min_length=10, rng_seed=1729):
         self._ocr_typos = ocr_typos
         self.rng = np.random.default_rng(seed=rng_seed)
 
-    def make_perturbation(self, x):
+    def make_perturbation(self, x: str) -> str:
         # Check if the input is None
         if x is None:
             return None
@@ -165,7 +149,7 @@ def make_perturbation(self, x):
                     x = x[:i] + x[i + 1 :]
         return x
 
-    def _random_ocr_typo(self, char):
+    def _random_ocr_typo(self, char: str) -> str:
         if char.lower() in self._ocr_typos:
             typo = self.rng.choice(self._ocr_typos[char.lower()])
             return typo if char.islower() else typo.upper()
@@ -182,7 +166,7 @@ def __init__(self, *args, **kwargs):
         self._trans_table = str.maketrans("", "", self._punctuation)
         self._regex = re.compile(rf"\b[{re.escape(self._punctuation)}]+\b")
 
-    def make_perturbation(self, text):
+    def make_perturbation(self, text: str) -> str:
         # Split URLs so that they are not affected by the transformation
         pieces = gruber.split(text)
 
@@ -198,12 +182,12 @@ def make_perturbation(self, text):
 class TextAccentRemovalTransformation(TextTransformation):
     name = "Accent Removal"
 
-    def __init__(self, column, rate=1.0, rng_seed=1729):
+    def __init__(self, column: str, rate: float = 1.0, rng_seed: int = 1729):
         super().__init__(column)
         self.rate = rate
         self.rng = np.random.default_rng(seed=rng_seed)
 
-    def make_perturbation(self, text):
+    def make_perturbation(self, text: str) -> str:
         return "".join(
             char
             for char in unicodedata.normalize("NFD", text)
@@ -212,7 +196,7 @@ def make_perturbation(self, text):
 
 
 class TextLanguageBasedTransformation(TextTransformation):
-    def __init__(self, column, rng_seed=1729):
+    def __init__(self, column: str, rng_seed: int = 1729):
         super().__init__(column, needs_dataset=True)
         self._lang_dictionary = dict()
         self._load_dictionaries()
@@ -228,13 +212,13 @@ def execute(self, dataset: Dataset) -> pd.DataFrame:
         dataset.df.loc[feature_data.index, self.column] = feature_data.apply(self.make_perturbation, axis=1)
         return dataset.df
 
-    def make_perturbation(self, row):
+    def make_perturbation(self, row: pd.Series) -> Any:
         raise NotImplementedError()
 
-    def _switch(self, word, language):
+    def _switch(self, word: str, language: str) -> Optional[tuple[str, str]]:
         raise NotImplementedError()
 
-    def _select_dict(self, language):
+    def _select_dict(self, language: str) -> Optional[Any]:
         try:
             return self._lang_dictionary[language]
         except KeyError:
@@ -249,7 +233,7 @@ def _load_dictionaries(self):
 
         self._lang_dictionary = {"en": gender_switch_en, "fr": gender_switch_fr}
 
-    def make_perturbation(self, row):
+    def make_perturbation(self, row: pd.Series) -> str:
         text = row[self.column]
         language = row["language__gsk__meta"]
 
@@ -265,7 +249,7 @@ def make_perturbation(self, row):
 
         return new_text
 
-    def _switch(self, word, language):
+    def _switch(self, word: str, language: str) -> Optional[tuple[str, str]]:
         try:
             return (word, self._lang_dictionary[language][word.lower()])
         except KeyError:
@@ -279,7 +263,7 @@ def _load_dictionaries(self):
         # Regex to match numbers in text
         self._regex = re.compile(r"(?<!\d/)(?<!\d\.)\b\d+(?:\.\d+)?\b(?!(?:\.\d+)?@|\d?/?\d)")
 
-    def make_perturbation(self, row):
+    def make_perturbation(self, row: pd.Series) -> str:
         # Replace numbers with words
         value = row[self.column]
         if pd.isna(value):
@@ -307,7 +291,7 @@ def _load_dictionaries(self):
 
         self._lang_dictionary = {"en": religion_dict_en, "fr": religion_dict_fr}
 
-    def make_perturbation(self, row):
+    def make_perturbation(self, row: pd.Series) -> str:
         # Get text
         text = row[self.column]
 
@@ -345,7 +329,7 @@ def _load_dictionaries(self):
             nationalities_dict = json.load(f)
         self._lang_dictionary = {"en": nationalities_dict["en"], "fr": nationalities_dict["fr"]}
 
-    def make_perturbation(self, row):
+    def make_perturbation(self, row: pd.Series) -> str:
         text = row[self.column]
         language = row["language__gsk__meta"]
         nationalities_word_dict = self._select_dict(language)
@@ -381,7 +365,7 @@ def make_perturbation(self, row):
 class TextFromSpeechTypoTransformation(TextLanguageBasedTransformation):
     name = "Add text from speech typos"
 
-    def __init__(self, column, rng_seed=1729, min_length=10):
+    def __init__(self, column: str, rng_seed: int = 1729, min_length: int = 10):
         super().__init__(column, rng_seed=rng_seed)
 
         self.min_length = min_length
@@ -391,7 +375,7 @@ def _load_dictionaries(self):
 
         self._word_typos = speech_typos
 
-    def make_perturbation(self, row):
+    def make_perturbation(self, row: pd.Series) -> str:
         text = row[self.column]
         language = row["language__gsk__meta"]
 
 
@@ -0,0 +1,89 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+import giskard
+from giskard.scanner.robustness.numerical_perturbation_detector import NumericalPerturbationDetector
+
+
+class MockClassificationModel:
+    def predict(self, df):
+        # Randomly assign predictions, introducing some variability
+        return np.random.choice([0, 1], size=len(df))
+
+
+class MockRegressionModel:
+    def predict(self, df):
+        # For simplicity, use a linear relationship plus some noise
+        return 2 * df["feature_1"] + 3 * df["feature_2"] + np.random.normal(0, 5, len(df))
+
+
+def test_numerical_perturbation_classification():
+    # Creating a simple mock classification dataset
+    df = pd.DataFrame(
+        {"feature_1": [1.0, 2.0, 3.0, 4.0, 5.0], "feature_2": [10.0, 20.0, 30.0, 40.0, 50.0], "target": [0, 1, 1, 0, 0]}
+    )
+    dataset = giskard.Dataset(df=df, target="target", column_types={"feature_1": "numeric", "feature_2": "numeric"})
+
+    # Creating a mock model with some variability in predictions
+    model = giskard.Model(MockClassificationModel().predict, model_type="classification", classification_labels=[0, 1])
+
+    # Running the Numerical Perturbation Detector
+    analyzer = NumericalPerturbationDetector(threshold=0.01)
+    issues = analyzer.run(model, dataset, features=["feature_1", "feature_2"])
+
+    assert issues  # Ensure that the detector identifies some issues
+
+
+def test_numerical_perturbation_skips_non_numerical_dtypes():
+    # Mock dataset with a text feature, but declared as numeric
+    df = pd.DataFrame({"feature": ["a", "b", "c", "d", "e"], "target": [0, 1, 0, 1, 0]})
+    dataset = giskard.Dataset(df, target="target", column_types={"feature": "text"})
+
+    # Creating a mock model that always predicts 1
+    model = giskard.Model(lambda df: np.ones(len(df)), model_type="classification", classification_labels=[0, 1])
+
+    # Running the Numerical Perturbation Detector
+    analyzer = NumericalPerturbationDetector(threshold=0.001, output_sensitivity=1.0, num_samples=100)
+    issues = analyzer.run(model, dataset, features=["feature"])
+
+    assert not issues  # Since the feature is non-numeric, no issues should be detected
+
+
+def test_numerical_perturbation_works_with_nan_values():
+    # Mock dataset with NaN values in numeric feature
+    df = pd.DataFrame({"feature": [1.0, 2.0, np.nan, 4.0, 5.0], "target": [0, 1, 0, 1, 0]})
+    dataset = giskard.Dataset(df, target="target", column_types={"feature": "numeric"})
+
+    # Creating a mock model with some variability in predictions
+    model = giskard.Model(
+        lambda df: np.random.choice([0, 1], size=len(df)), model_type="classification", classification_labels=[0, 1]
+    )
+
+    # Running the Numerical Perturbation Detector
+    analyzer = NumericalPerturbationDetector(threshold=0.01)
+    issues = analyzer.run(model, dataset, features=["feature"])
+
+    assert issues  # Ensure that the detector identifies some issues
+
+
+@pytest.mark.memory_expensive
+def test_numerical_perturbation_on_regression():
+    # Mock regression dataset
+    df = pd.DataFrame(
+        {
+            "feature_1": [1.0, 2.0, 3.0, 4.0, 5.0],
+            "feature_2": [10.0, 20.0, 30.0, 40.0, 50.0],
+            "target": [15.0, 25.0, 35.0, 45.0, 55.0],
+        }
+    )
+    dataset = giskard.Dataset(df, target="target", column_types={"feature_1": "numeric", "feature_2": "numeric"})
+
+    # Creating a mock model with a linear relationship
+    model = giskard.Model(MockRegressionModel().predict, model_type="regression")
+
+    # Running the Numerical Perturbation Detector
+    analyzer = NumericalPerturbationDetector(threshold=0.01, output_sensitivity=0.1)
+    issues = analyzer.run(model, dataset, features=["feature_1", "feature_2"])
+
+    assert issues  # Ensure that the detector identifies some issues