@@ -72,6 +72,7 @@ def test_categorical_full_outer_join() -> None:
72
72
assert df ["key_right" ].cast (pl .String ).to_list () == ["bar" , "baz" , None ]
73
73
74
74
75
+ @pytest .mark .usefixtures ("test_global_and_local" )
75
76
def test_read_csv_categorical () -> None :
76
77
f = io .BytesIO ()
77
78
f .write (b"col1,col2,col3,col4,col5,col6\n 'foo',2,3,4,5,6\n 'bar',8,9,10,11,12" )
@@ -80,6 +81,7 @@ def test_read_csv_categorical() -> None:
80
81
assert df ["col1" ].dtype == pl .Categorical
81
82
82
83
84
+ @pytest .mark .usefixtures ("test_global_and_local" )
83
85
def test_cat_to_dummies () -> None :
84
86
df = pl .DataFrame ({"foo" : [1 , 2 , 3 , 4 ], "bar" : ["a" , "b" , "a" , "c" ]})
85
87
df = df .with_columns (pl .col ("bar" ).cast (pl .Categorical ))
@@ -94,7 +96,7 @@ def test_cat_to_dummies() -> None:
94
96
}
95
97
96
98
97
- @StringCache ( )
99
+ @pytest . mark . usefixtures ( "test_global_and_local" )
98
100
def test_categorical_is_in_list () -> None :
99
101
# this requires type coercion to cast.
100
102
# we should not cast within the function as this would be expensive within a
@@ -110,7 +112,7 @@ def test_categorical_is_in_list() -> None:
110
112
}
111
113
112
114
113
- @StringCache ( )
115
+ @pytest . mark . usefixtures ( "test_global_and_local" )
114
116
def test_unset_sorted_on_append () -> None :
115
117
df1 = pl .DataFrame (
116
118
[
@@ -137,6 +139,7 @@ def test_unset_sorted_on_append() -> None:
137
139
(pl .Series .eq_missing , pl .Series ([True , True , True , False , False , False ])),
138
140
],
139
141
)
142
+ @pytest .mark .usefixtures ("test_global_and_local" )
140
143
def test_categorical_equality (
141
144
op : Callable [[pl .Series , pl .Series ], pl .Series ], expected : pl .Series
142
145
) -> None :
@@ -272,6 +275,7 @@ def test_categorical_global_ordering_broadcast_lhs(
272
275
(operator .gt , pl .Series ([False , False , False , True , False , False ])),
273
276
],
274
277
)
278
+ @pytest .mark .usefixtures ("test_global_and_local" )
275
279
def test_categorical_ordering (
276
280
op : Callable [[pl .Series , pl .Series ], pl .Series ], expected : pl .Series
277
281
) -> None :
@@ -289,6 +293,7 @@ def test_categorical_ordering(
289
293
(operator .gt , pl .Series ([None , False , False , False , False , False ])),
290
294
],
291
295
)
296
+ @pytest .mark .usefixtures ("test_global_and_local" )
292
297
def test_compare_categorical (
293
298
op : Callable [[pl .Series , pl .Series ], pl .Series ], expected : pl .Series
294
299
) -> None :
@@ -311,6 +316,7 @@ def test_compare_categorical(
311
316
(pl .Series .ne_missing , pl .Series ([True , True , False , True , False , True ])),
312
317
],
313
318
)
319
+ @pytest .mark .usefixtures ("test_global_and_local" )
314
320
def test_compare_categorical_single (
315
321
op : Callable [[pl .Series , pl .Series ], pl .Series ], expected : pl .Series
316
322
) -> None :
@@ -400,6 +406,7 @@ def test_categorical_error_on_local_cmp() -> None:
400
406
df_cat .filter (pl .col ("a_cat" ) == pl .col ("b_cat" ))
401
407
402
408
409
+ @pytest .mark .usefixtures ("test_global_and_local" )
403
410
def test_cast_null_to_categorical () -> None :
404
411
assert pl .DataFrame ().with_columns (
405
412
pl .lit (None ).cast (pl .Categorical ).alias ("nullable_enum" )
@@ -454,6 +461,7 @@ def create_lazy(data: dict) -> pl.LazyFrame: # type: ignore[type-arg]
454
461
assert pl .using_string_cache () is False
455
462
456
463
464
+ @pytest .mark .usefixtures ("test_global_and_local" )
457
465
def test_categorical_in_struct_nulls () -> None :
458
466
s = pl .Series (
459
467
"job" , ["doctor" , "waiter" , None , None , None , "doctor" ], pl .Categorical
@@ -466,6 +474,7 @@ def test_categorical_in_struct_nulls() -> None:
466
474
assert s [2 ] == {"job" : "waiter" , "count" : 1 }
467
475
468
476
477
+ @pytest .mark .usefixtures ("test_global_and_local" )
469
478
def test_cast_inner_categorical () -> None :
470
479
dtype = pl .List (pl .Categorical )
471
480
out = pl .Series ("foo" , [["a" ], ["a" , "b" ]]).cast (dtype )
@@ -501,6 +510,7 @@ def test_stringcache() -> None:
501
510
(pl .Categorical ("lexical" ), ["bar" , "baz" , "foo" ]),
502
511
],
503
512
)
513
+ @pytest .mark .usefixtures ("test_global_and_local" )
504
514
def test_categorical_sort_order_by_parameter (
505
515
dtype : PolarsDataType , outcome : list [str ]
506
516
) -> None :
@@ -557,12 +567,14 @@ def test_err_on_categorical_asof_join_by_arg() -> None:
557
567
df1 .join_asof (df2 , on = pl .col ("time" ).set_sorted (), by = "cat" )
558
568
559
569
570
+ @pytest .mark .usefixtures ("test_global_and_local" )
560
571
def test_categorical_list_get_item () -> None :
561
572
out = pl .Series ([["a" ]]).cast (pl .List (pl .Categorical )).item ()
562
573
assert isinstance (out , pl .Series )
563
574
assert out .dtype == pl .Categorical
564
575
565
576
577
+ @pytest .mark .usefixtures ("test_global_and_local" )
566
578
def test_nested_categorical_aggregation_7848 () -> None :
567
579
# a double categorical aggregation
568
580
assert pl .DataFrame (
@@ -580,6 +592,7 @@ def test_nested_categorical_aggregation_7848() -> None:
580
592
}
581
593
582
594
595
+ @pytest .mark .usefixtures ("test_global_and_local" )
583
596
def test_nested_categorical_cast () -> None :
584
597
values = [["x" ], ["y" ], ["x" ]]
585
598
dtype = pl .List (pl .Categorical )
@@ -588,6 +601,7 @@ def test_nested_categorical_cast() -> None:
588
601
assert s .to_list () == values
589
602
590
603
604
+ @pytest .mark .usefixtures ("test_global_and_local" )
591
605
def test_struct_categorical_nesting () -> None :
592
606
# this triggers a lot of materialization
593
607
df = pl .DataFrame (
@@ -610,7 +624,7 @@ def test_categorical_fill_null_existing_category() -> None:
610
624
assert result .to_dict (as_series = False ) == expected
611
625
612
626
613
- @StringCache ( )
627
+ @pytest . mark . usefixtures ( "test_global_and_local" )
614
628
def test_categorical_fill_null_stringcache () -> None :
615
629
df = pl .LazyFrame (
616
630
{"index" : [1 , 2 , 3 ], "cat" : ["a" , "b" , None ]},
@@ -622,6 +636,7 @@ def test_categorical_fill_null_stringcache() -> None:
622
636
assert a .dtypes == [pl .Categorical ]
623
637
624
638
639
+ @pytest .mark .usefixtures ("test_global_and_local" )
625
640
def test_fast_unique_flag_from_arrow () -> None :
626
641
df = pl .DataFrame (
627
642
{
@@ -633,6 +648,7 @@ def test_fast_unique_flag_from_arrow() -> None:
633
648
assert pl .from_arrow (filtered ).select (pl .col ("colB" ).n_unique ()).item () == 4 # type: ignore[union-attr]
634
649
635
650
651
+ @pytest .mark .usefixtures ("test_global_and_local" )
636
652
def test_construct_with_null () -> None :
637
653
# Example from https://github.com/pola-rs/polars/issues/7188
638
654
df = pl .from_dicts ([{"A" : None }, {"A" : "foo" }], schema = {"A" : pl .Categorical })
@@ -663,6 +679,7 @@ def test_list_builder_different_categorical_rev_maps() -> None:
663
679
}
664
680
665
681
682
+ @pytest .mark .usefixtures ("test_global_and_local" )
666
683
def test_categorical_collect_11408 () -> None :
667
684
df = pl .DataFrame (
668
685
data = {"groups" : ["a" , "b" , "c" ], "cats" : ["a" , "b" , "c" ], "amount" : [1 , 2 , 3 ]},
@@ -677,6 +694,7 @@ def test_categorical_collect_11408() -> None:
677
694
}
678
695
679
696
697
+ @pytest .mark .usefixtures ("test_global_and_local" )
680
698
def test_categorical_nested_cast_unchecked () -> None :
681
699
s = pl .Series ("cat" , [["cat" ]]).cast (pl .List (pl .Categorical ))
682
700
assert pl .Series ([s ]).to_list () == [[["cat" ]]]
@@ -751,6 +769,7 @@ def test_categorical_vstack_with_local_different_rev_map() -> None:
751
769
assert df3 .get_column ("a" ).cast (pl .UInt32 ).to_list () == [0 , 1 , 2 , 3 , 4 , 5 ]
752
770
753
771
772
+ @pytest .mark .usefixtures ("test_global_and_local" )
754
773
def test_shift_over_13041 () -> None :
755
774
df = pl .DataFrame (
756
775
{
@@ -768,6 +787,7 @@ def test_shift_over_13041() -> None:
768
787
769
788
@pytest .mark .parametrize ("context" , [pl .StringCache (), contextlib .nullcontext ()])
770
789
@pytest .mark .parametrize ("ordering" , ["physical" , "lexical" ])
790
+ @pytest .mark .usefixtures ("test_global_and_local" )
771
791
def test_sort_categorical_retain_none (
772
792
context : contextlib .AbstractContextManager , # type: ignore[type-arg]
773
793
ordering : Literal ["physical" , "lexical" ],
@@ -799,6 +819,7 @@ def test_sort_categorical_retain_none(
799
819
]
800
820
801
821
822
+ @pytest .mark .usefixtures ("test_global_and_local" )
802
823
def test_cast_from_cat_to_numeric () -> None :
803
824
cat_series = pl .Series (
804
825
"cat_series" ,
@@ -811,12 +832,14 @@ def test_cast_from_cat_to_numeric() -> None:
811
832
assert s .cast (pl .UInt8 ).sum () == 6
812
833
813
834
835
+ @pytest .mark .usefixtures ("test_global_and_local" )
814
836
def test_cat_preserve_lexical_ordering_on_clear () -> None :
815
837
s = pl .Series ("a" , ["a" , "b" ], dtype = pl .Categorical (ordering = "lexical" ))
816
838
s2 = s .clear ()
817
839
assert s .dtype == s2 .dtype
818
840
819
841
842
+ @pytest .mark .usefixtures ("test_global_and_local" )
820
843
def test_cat_preserve_lexical_ordering_on_concat () -> None :
821
844
dtype = pl .Categorical (ordering = "lexical" )
822
845
@@ -827,6 +850,7 @@ def test_cat_preserve_lexical_ordering_on_concat() -> None:
827
850
828
851
# TODO: Bug see: https://github.com/pola-rs/polars/issues/20440
829
852
@pytest .mark .may_fail_auto_streaming
853
+ @pytest .mark .usefixtures ("test_global_and_local" )
830
854
def test_cat_append_lexical_sorted_flag () -> None :
831
855
df = pl .DataFrame ({"x" : [0 , 1 , 1 ], "y" : ["B" , "B" , "A" ]}).with_columns (
832
856
pl .col ("y" ).cast (pl .Categorical (ordering = "lexical" ))
@@ -845,6 +869,7 @@ def test_cat_append_lexical_sorted_flag() -> None:
845
869
assert not (s1 .is_sorted ())
846
870
847
871
872
+ @pytest .mark .usefixtures ("test_global_and_local" )
848
873
def test_get_cat_categories_multiple_chunks () -> None :
849
874
df = pl .DataFrame (
850
875
[
@@ -877,6 +902,7 @@ def test_nested_categorical_concat(
877
902
pl .concat ([a , b ])
878
903
879
904
905
+ @pytest .mark .usefixtures ("test_global_and_local" )
880
906
def test_perfect_group_by_19452 () -> None :
881
907
n = 40
882
908
df2 = pl .DataFrame (
@@ -889,6 +915,7 @@ def test_perfect_group_by_19452() -> None:
889
915
assert df2 .with_columns (a = (pl .col ("b" )).over (pl .col ("a" )))["a" ].is_sorted ()
890
916
891
917
918
+ @pytest .mark .usefixtures ("test_global_and_local" )
892
919
def test_perfect_group_by_19950 () -> None :
893
920
dtype = pl .Enum (categories = ["a" , "b" , "c" ])
894
921
@@ -900,14 +927,14 @@ def test_perfect_group_by_19950() -> None:
900
927
}
901
928
902
929
903
- @StringCache ( )
930
+ @pytest . mark . usefixtures ( "test_global_and_local" )
904
931
def test_categorical_unique () -> None :
905
932
s = pl .Series (["a" , "b" , None ], dtype = pl .Categorical )
906
933
assert s .n_unique () == 3
907
934
assert s .unique ().sort ().to_list () == [None , "a" , "b" ]
908
935
909
936
910
- @StringCache ( )
937
+ @pytest . mark . usefixtures ( "test_global_and_local" )
911
938
def test_categorical_unique_20539 () -> None :
912
939
df = pl .DataFrame ({"number" : [1 , 1 , 2 , 2 , 3 ], "letter" : ["a" , "b" , "b" , "c" , "c" ]})
913
940
@@ -927,13 +954,10 @@ def test_categorical_unique_20539() -> None:
927
954
}
928
955
929
956
930
- @StringCache ()
931
957
@pytest .mark .may_fail_auto_streaming
958
+ @pytest .mark .usefixtures ("test_global_and_local" )
932
959
def test_categorical_prefill () -> None :
933
960
# https://github.com/pola-rs/polars/pull/20547#issuecomment-2569473443
934
- # prefill cache
935
- pl .Series (["aaa" , "bbb" , "ccc" ], dtype = pl .Categorical ) # pre-fill cache
936
-
937
961
# test_compare_categorical_single
938
962
assert (pl .Series (["a" ], dtype = pl .Categorical ) < "a" ).to_list () == [False ]
939
963
0 commit comments