Skip to content

Commit b5561dd

Browse files
committed
Remove dataset sample when saving dataset
1 parent c325215 commit b5561dd

File tree

1 file changed

+1
-10
lines changed

1 file changed

+1
-10
lines changed

giskard/datasets/base/__init__.py

+1-10
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@
3434
if TYPE_CHECKING:
3535
from mlflow import MlflowClient
3636

37-
SAMPLE_SIZE = 1000
38-
3937
logger = logging.getLogger(__name__)
4038

4139

@@ -556,19 +554,12 @@ def cat_columns(self):
556554
return self._cat_columns(self.meta)
557555

558556
def save(self, local_path: str):
559-
with (
560-
open(Path(local_path) / "data.csv.zst", "wb") as f,
561-
open(Path(local_path) / "data.sample.csv.zst", "wb") as f_sample,
562-
):
557+
with (open(Path(local_path) / "data.csv.zst", "wb") as f,):
563558
uncompressed_bytes = save_df(self.df)
564559
compressed_bytes = compress(uncompressed_bytes)
565560
f.write(compressed_bytes)
566561
original_size_bytes, compressed_size_bytes = len(uncompressed_bytes), len(compressed_bytes)
567562

568-
uncompressed_bytes = save_df(self.df.sample(min(SAMPLE_SIZE, len(self.df.index))))
569-
compressed_bytes = compress(uncompressed_bytes)
570-
f_sample.write(compressed_bytes)
571-
572563
with open(Path(local_path) / "giskard-dataset-meta.yaml", "w") as meta_f:
573564
yaml.dump(
574565
{

0 commit comments

Comments
 (0)