forked from facebookresearch/AugLy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintensity.py
73 lines (47 loc) · 2.52 KB
/
intensity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
def apply_lambda_intensity(aug_function: str, **kwargs) -> float:
intensity_func = globals().get(f"{aug_function}_intensity")
return intensity_func(**kwargs) if intensity_func else 100.0
def get_baseline_intensity(**kwargs) -> float:
# get_baseline simply tokenizes and detokenizes text and at most adds extra spaces
return 0.0
def insert_punctuation_chars_intensity(
granularity: str, cadence: float, **kwargs
) -> float:
return char_insertion_intensity_helper(granularity, cadence)
def insert_zero_width_chars_intensity(
granularity: str, cadence: float, **kwargs
) -> float:
return char_insertion_intensity_helper(granularity, cadence)
def replace_bidirectional_intensity(**kwargs):
return 100.0
def replace_fun_fonts_intensity(
aug_p: float, aug_max: int, granularity: str, **kwargs
) -> float:
return 100.0 if granularity == "all" else replace_intensity_helper(aug_p, aug_max)
def replace_similar_chars_intensity(
aug_char_p: float, aug_word_p: float, aug_char_max: int, aug_word_max: int, **kwargs
) -> float:
# we only care if aug_*_max is zero or not, so it's okay to multiply the values here
return replace_intensity_helper(aug_word_p * aug_char_p, aug_word_max * aug_char_max)
def replace_similar_unicode_chars_intensity(
aug_char_p: float, aug_word_p: float, aug_char_max: int, aug_word_max: int, **kwargs
) -> float:
# we only care if aug_*_max is zero or not, so it's okay to multiply the values here
return replace_intensity_helper(aug_word_p * aug_char_p, aug_word_max * aug_char_max)
def replace_upside_down_intensity(
aug_p: float, aug_max: int, granularity: str, **kwargs
) -> float:
return 100.0 if granularity == "all" else replace_intensity_helper(aug_p, aug_max)
def simulate_typos_intensity(
aug_char_p: float, aug_word_p: float, aug_char_max: int, aug_word_max: int, **kwargs
) -> float:
# we only care if aug_*_max is zero or not, so it's okay to multiply the values here
return replace_intensity_helper(aug_word_p * aug_char_p, aug_word_max * aug_char_max)
def split_words_intensity(aug_word_p: float, aug_word_max: int, **kwargs) -> float:
return replace_intensity_helper(aug_word_p, aug_word_max)
def char_insertion_intensity_helper(granularity: str, cadence: float) -> float:
return 100.0 if granularity == "all" else (1 / cadence) * 100.0
def replace_intensity_helper(aug_p: float, aug_max: int) -> float:
return 0.0 if aug_max == 0 else aug_p * 100.0