forked from facebookresearch/AugLy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
281 lines (237 loc) · 10 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
import json
import math
import os
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple, Union
import augly.image.intensity as imintensity
import augly.utils as utils
import numpy as np
from PIL import Image
JPEG_EXTENSIONS = [".jpg", ".JPG", ".jpeg", ".JPEG"]
def validate_and_load_image(image: Union[str, Image.Image]) -> Image.Image:
"""
If image is a str, loads the image as a PIL Image and returns it. Otherwise,
we assert that image is a PIL Image and then return it.
"""
if isinstance(image, str):
local_path = utils.pathmgr.get_local_path(image)
utils.validate_image_path(local_path)
return Image.open(local_path)
assert isinstance(
image, Image.Image
), "Expected type PIL.Image.Image for variable 'image'"
return image
def ret_and_save_image(image: Image.Image, output_path: Optional[str]) -> Image.Image:
if output_path is not None:
if any(output_path.endswith(extension) for extension in JPEG_EXTENSIONS):
image = image.convert("RGB")
utils.validate_output_path(output_path)
image.save(output_path)
return image
def get_template_and_bbox(
template_filepath: str, template_bboxes_filepath: str
) -> Tuple[Image.Image, Tuple[int, int, int, int]]:
template_key = os.path.basename(template_filepath)
local_template_path = utils.pathmgr.get_local_path(template_filepath)
template = Image.open(local_template_path)
local_bbox_path = utils.pathmgr.get_local_path(template_bboxes_filepath)
bbox = json.load(open(local_bbox_path, "rb"))[template_key]
return template, bbox
def get_func_kwargs(
metadata: Optional[List[Dict[str, Any]]], local_kwargs: Dict[str, Any], **kwargs
) -> Dict[str, Any]:
if metadata is None:
return {}
func_kwargs = deepcopy(local_kwargs)
func_kwargs.pop("metadata")
func_kwargs.update(**kwargs)
return func_kwargs
def get_metadata(
metadata: Optional[List[Dict[str, Any]]],
function_name: str,
image: Optional[Image.Image] = None,
aug_image: Optional[Image.Image] = None,
**kwargs,
) -> None:
if metadata is None:
return
assert isinstance(
metadata, list
), "Expected `metadata` to be set to None or of type list"
assert (
image is not None
), "Expected `image` to be passed in if metadata was provided"
assert (
aug_image is not None
), "Expected `aug_image` to be passed in if metadata was provided"
# Json can't represent tuples, so they're represented as lists, which should
# be equivalent to tuples. So let's avoid tuples in the metadata by
# converting any tuples to lists here.
kwargs_types_fixed = dict(
(k, list(v)) if isinstance(v, tuple) else (k, v) for k, v in kwargs.items()
)
metadata.append(
{
"name": function_name,
"src_width": image.width,
"src_height": image.height,
"dst_width": aug_image.width,
"dst_height": aug_image.height,
**kwargs_types_fixed,
}
)
intensity_kwargs = {"metadata": metadata[-1], **kwargs}
metadata[-1]["intensity"] = getattr(
imintensity, f"{function_name}_intensity", lambda **_: 0.0
)(**intensity_kwargs)
def rotated_rect_with_max_area(image: Image.Image, angle: float) -> Tuple[float, float]:
"""
Computes the width and height of the largest possible axis-aligned
rectangle (maximal area) within the rotated rectangle
source:
https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders # noqa: B950
"""
w, h = image.size
width_is_longer = w >= h
side_long, side_short = (w, h) if width_is_longer else (h, w)
sin_a = abs(math.sin(math.radians(angle)))
cos_a = abs(math.cos(math.radians(angle)))
if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < 1e-10:
x = 0.5 * side_short
wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
else:
cos_2a = cos_a * cos_a - sin_a * sin_a
wr = (w * cos_a - h * sin_a) / cos_2a
hr = (h * cos_a - w * sin_a) / cos_2a
return wr, hr
def pad_with_black(src: Image.Image, w: int, h: int) -> Image.Image:
"""
Returns the image src with the x dimension padded to width w if it was
smaller than w (and likewise for the y dimension with height h)
"""
curr_w, curr_h = src.size
dx = max(0, (w - curr_w) // 2)
dy = max(0, (h - curr_h) // 2)
padded = Image.new("RGB", (w, h))
padded.paste(src, (dx, dy, curr_w + dx, curr_h + dy))
return padded
def resize_and_pad_to_given_size(
src: Image.Image, w: int, h: int, crop: bool
) -> Image.Image:
"""
Returns the image src resized & padded with black if needed for the screenshot
transformation (i.e. if the spot for the image in the template is too small or
too big for the src image). If crop is True, will crop the src image if necessary
to fit into the template image; otherwise, will resize if necessary
"""
curr_w, curr_h = src.size
if crop:
dx = (curr_w - w) // 2
dy = (curr_h - h) // 2
src = src.crop((dx, dy, w + dx, h + dy))
curr_w, curr_h = src.size
elif curr_w > w or curr_h > h:
resize_factor = min(w / curr_w, h / curr_h)
new_w = int(curr_w * resize_factor)
new_h = int(curr_h * resize_factor)
src = src.resize((new_w, new_h), resample=Image.BILINEAR)
curr_w, curr_h = src.size
if curr_w < w or curr_h < h:
src = pad_with_black(src, w, h)
return src
def scale_template_image(
src_w: int,
src_h: int,
template_image: Image.Image,
bbox: Tuple[int, int, int, int],
max_image_size_pixels: Optional[int],
crop: bool,
) -> Tuple[Image.Image, Tuple[int, int, int, int]]:
"""
Return template_image, and bbox resized to fit the src image. Takes in the
width & height of the src image plus the bounding box where the src image
will be inserted into template_image. If the template bounding box is
bigger than src image in both dimensions, template_image is scaled down
such that the dimension that was closest to src_image matches, without
changing the aspect ratio (and bbox is scaled proportionally). Similarly if
src image is bigger than the bbox in both dimensions, template_image and
the bbox are scaled up.
"""
template_w, template_h = template_image.size
left, upper, right, lower = bbox
bbox_w, bbox_h = right - left, lower - upper
# Scale up/down template_image & bbox
if crop:
resize_factor = min(src_w / bbox_w, src_h / bbox_h)
else:
resize_factor = max(src_w / bbox_w, src_h / bbox_h)
# If a max image size is provided & the resized template image would be too large,
# resize the template image to the max image size.
if max_image_size_pixels is not None:
template_size = template_w * template_h
if template_size * resize_factor ** 2 > max_image_size_pixels:
resize_factor = math.sqrt(max_image_size_pixels / template_size)
template_w = int(template_w * resize_factor)
template_h = int(template_h * resize_factor)
bbox_w, bbox_h = int(bbox_w * resize_factor), int(bbox_h * resize_factor)
left, upper = int(left * resize_factor), int(upper * resize_factor)
right, lower = left + bbox_w, upper + bbox_h
bbox = (left, upper, right, lower)
template_image = template_image.resize(
(template_w, template_h), resample=Image.BILINEAR
)
return template_image, bbox
def square_center_crop(src: Image.Image) -> Image.Image:
"""Returns a square crop of the center of the image"""
w, h = src.size
smallest_edge = min(w, h)
dx = (w - smallest_edge) // 2
dy = (h - smallest_edge) // 2
return src.crop((dx, dy, dx + smallest_edge, dy + smallest_edge))
def compute_transform_coeffs(
src_coords: List[Tuple[int, int]], dst_coords: List[Tuple[float, float]]
) -> np.ndarray:
"""
Given the starting & desired corner coordinates, computes the
coefficients required by the perspective transform.
"""
matrix = []
for sc, dc in zip(src_coords, dst_coords):
matrix.append([dc[0], dc[1], 1, 0, 0, 0, -sc[0] * dc[0], -sc[0] * dc[1]])
matrix.append([0, 0, 0, dc[0], dc[1], 1, -sc[1] * dc[0], -sc[1] * dc[1]])
A = np.matrix(matrix, dtype=np.float)
B = np.array(src_coords).reshape(8)
res = np.dot(np.linalg.inv(A.T * A) * A.T, B)
return np.array(res).reshape(8)
def compute_stripe_mask(
src_w: int, src_h: int, line_width: float, line_angle: float, line_density: float
) -> np.ndarray:
"""
Given stripe parameters such as stripe width, angle, and density, returns
a binary mask of the same size as the source image indicating the location
of stripes. This implementation is inspired by
https://stackoverflow.com/questions/34043381/how-to-create-diagonal-stripe-patterns-and-checkerboard-patterns
"""
line_angle *= math.pi / 180
line_distance = (1 - line_density) * min(src_w, src_h)
y_period = math.cos(line_angle) / line_distance
x_period = math.sin(line_angle) / line_distance
y_coord_range = np.arange(0, src_h) - src_h / 2
x_coord_range = np.arange(0, src_w) - src_w / 2
x_grid_coords, y_grid_coords = np.meshgrid(x_coord_range, y_coord_range)
if abs(line_angle) == math.pi / 2 or abs(line_angle) == 3 * math.pi / 2:
# Compute mask for vertical stripes
softmax_mask = (np.cos(2 * math.pi * x_period * x_grid_coords) + 1) / 2
elif line_angle == 0 or abs(line_angle) == math.pi:
# Compute mask for horizontal stripes
softmax_mask = (np.cos(2 * math.pi * y_period * y_grid_coords) + 1) / 2
else:
# Compute mask for diagonal stripes
softmax_mask = (
np.cos(2 * math.pi * (x_period * x_grid_coords + y_period * y_grid_coords))
+ 1
) / 2
binary_mask = softmax_mask > (math.cos(math.pi * line_width) + 1) / 2
return binary_mask