Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detection regressor #111

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
266 changes: 201 additions & 65 deletions gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
FONT_PATH = "UKNumberPlate.ttf"
FONT_HEIGHT = 32 # Pixel size to which the chars are resized

OUTPUT_SHAPE = (64, 128)
DETECT_OUTPUT_SHAPE = (64, 128)
READ_OUTPUT_SHAPE = (64, 128)

CHARS = common.CHARS + " "

Expand Down Expand Up @@ -106,55 +107,136 @@ def pick_colors():
return text_color, plate_color


def make_affine_transform(from_shape, to_shape,
min_scale, max_scale,
scale_variation=1.0,
rotation_variation=1.0,
translation_variation=1.0):
out_of_bounds = False

from_size = numpy.array([[from_shape[1], from_shape[0]]]).T
to_size = numpy.array([[to_shape[1], to_shape[0]]]).T

scale = random.uniform((min_scale + max_scale) * 0.5 -
(max_scale - min_scale) * 0.5 * scale_variation,
(min_scale + max_scale) * 0.5 +
(max_scale - min_scale) * 0.5 * scale_variation)
if scale > max_scale or scale < min_scale:
out_of_bounds = True
roll = random.uniform(-0.3, 0.3) * rotation_variation
pitch = random.uniform(-0.2, 0.2) * rotation_variation
yaw = random.uniform(-1.2, 1.2) * rotation_variation

# Compute a bounding box on the skewed input image (`from_shape`).
M = euler_to_mat(yaw, pitch, roll)[:2, :2]
h, w = from_shape
def get_transformed_shape_size(M, shape):
"""Return the size of the bounding box of a transformed shape."""
h, w = shape
corners = numpy.matrix([[-w, +w, -w, +w],
[-h, -h, +h, +h]]) * 0.5
skewed_size = numpy.array(numpy.max(M * corners, axis=1) -
numpy.min(M * corners, axis=1))
transformed_size = numpy.array(numpy.max(M * corners, axis=1) -
numpy.min(M * corners, axis=1)).flatten()
return transformed_size


def make_transform(yaw, pitch, roll, from_shape, bounds):
"""
Make a 2x2 transform from the given parameters.

:param yaw:
Yaw angle to rotate by.

:param pitch:
Pitch angle to rotate by.

# Set the scale as large as possible such that the skewed and scaled shape
# is less than or equal to the desired ratio in either dimension.
scale *= numpy.min(to_size / skewed_size)
:param roll:
Roll angle to rotate by.

# Set the translation such that the skewed and scaled image falls within
# the output shape's bounds.
trans = (numpy.random.random((2,1)) - 0.5) * translation_variation
trans = ((2.0 * trans) ** 5.0) / 2.0
if numpy.any(trans < -0.5) or numpy.any(trans > 0.5):
out_of_bounds = True
trans = (to_size - skewed_size * scale) * trans
:param from_shape:
Shape of the image being tranformed.

center_to = to_size / 2.
center_from = from_size / 2.
:param bounds:
The scale will be selected such that the resulting shape's size is
within these bounds.

:return:
A tuple `M`, `size` where `M` is the transformation, and `size` is the
size of the bounding box containing the transformed shape.

"""
M = euler_to_mat(yaw, pitch, roll)[:2, :2]
M *= scale
M = numpy.hstack([M, trans + center_to - M * center_from])
skewed_size = get_transformed_shape_size(M, from_shape)
scale = numpy.min(numpy.array(bounds) / skewed_size)

return M * scale, skewed_size * scale

return M, out_of_bounds

def make_affine_transform(from_shape, to_shape,
yaw_range, pitch_range, roll_range, scale_range,
completely_inside=False):
"""
Make a random affine transform for a shape, to fit within an output image.

A rotation (specified in terms of yaw/pitch/roll) are selected based on
ranges specified in the arguments.

Scale is similarly specified by a range. A scale of 1.0 corresponds with an
output image whose size equals that of the output image in exactly one
dimension, with the other dimension being smaller.

If `on_edge` is `False` translation is selected uniformly with the
constraint that the transformed shape's bounding box lies entirely within
the output shape. If `on_edge` is `True` translation is selected uniformly
with the constraint that the transformed shape's bounding box intersects
with one or more edges of the output shape.

:param from_shape:
The shape being transformed.

:param to_shape:
The shape of the output image.

:param yaw_range:
A (min, max) tuple defining the uniform distribution from which the yaw
is selected.

:param pitch_range:
A (min, max) tuple defining the uniform distribution from which the
pitch is selected.

:param roll_range:
A (min, max) tuple defining the uniform distribution from which the
roll is selected.

:param scale_range:
A (min, max) tuple defining the uniform distribution from which the
scale is selected. The maximum must be less than 1.0.

:param completely_inside:
Indicate whether the bounding box of the transformed shape's bounding
box should lie entirely within the output shape (`True`) or whether
only part of the part of the transformed shape's bounding box should
lie within the output shape (`False`).

:return:
A tuple `M`, `out_of_bounds`, `scale` where `M` is the 2x3 affine
transform described above, `out_of_bounds` indicates whether the
transformed shape's bounding box partially lies outside of the output
shape (note `out_of_bounds` is always `False` if `completely_inside` is
`True`), and `scale` indicates the scale that was chosen.

"""
yaw = random.uniform(*yaw_range)
pitch = random.uniform(*pitch_range)
roll = random.uniform(*roll_range)
scale = random.uniform(*scale_range)
bounds = scale * numpy.array([to_shape[1], to_shape[0]])

M, transformed_size = make_transform(yaw, pitch, roll, from_shape, bounds)

# Set `t` to the translation which puts the centre of the plate at 0, 0.
t = M * numpy.matrix([[-from_shape[1], -from_shape[0]]]).T * 0.5

# Determine out the x and y coordinates of the output shape centre.
if completely_inside:
x = random.uniform(transformed_size[0] / 2,
to_shape[1] - transformed_size[0] / 2)
y = random.uniform(transformed_size[1] / 2,
to_shape[0] - transformed_size[1] / 2)
out_of_bounds = False
else:
x = random.uniform(-transformed_size[0] / 2,
to_shape[1] + transformed_size[0] / 2)
y = random.uniform(-transformed_size[1] / 2,
to_shape[0] + transformed_size[1] / 2)
out_of_bounds = (x < transformed_size[0] / 2. or
x > to_shape[1] - transformed_size[0] / 2 or
y < transformed_size[1] / 2. or
y > to_shape[0] - transformed_size[1] / 2)

# Add the above to `t` to get the final translation.
t += numpy.matrix([[x], [y]])

return numpy.hstack([M, t]), out_of_bounds, scale


def generate_code():
return "{}{}{}{} {}{}{}".format(
Expand Down Expand Up @@ -213,49 +295,85 @@ def generate_plate(font_height, char_ims):
return plate, rounded_rect(out_shape, radius), code.replace(" ", "")


def generate_bg(num_bg_images):
def generate_bg(num_bg_images, output_shape):
found = False
while not found:
fname = "bgs/{:08d}.jpg".format(random.randint(0, num_bg_images - 1))
bg = cv2.imread(fname, cv2.CV_LOAD_IMAGE_GRAYSCALE) / 255.
if (bg.shape[1] >= OUTPUT_SHAPE[1] and
bg.shape[0] >= OUTPUT_SHAPE[0]):
if (bg.shape[1] >= output_shape[1] and
bg.shape[0] >= output_shape[0]):
found = True

x = random.randint(0, bg.shape[1] - OUTPUT_SHAPE[1])
y = random.randint(0, bg.shape[0] - OUTPUT_SHAPE[0])
bg = bg[y:y + OUTPUT_SHAPE[0], x:x + OUTPUT_SHAPE[1]]
x = random.randint(0, bg.shape[1] - output_shape[1])
y = random.randint(0, bg.shape[0] - output_shape[0])
bg = bg[y:y + output_shape[0], x:x + output_shape[1]]

return bg


def generate_im(char_ims, num_bg_images):
bg = generate_bg(num_bg_images)
def generate_detect_im(char_ims, num_bg_images, output_shape):
output_size = numpy.array([output_shape[1], output_shape[0]])

bg = generate_bg(num_bg_images, output_shape)

plate, plate_mask, code = generate_plate(FONT_HEIGHT, char_ims)
plate_shape = plate.shape
plate_size = numpy.array([[plate.shape[1], plate.shape[0]]]).T

M, out_of_bounds, scale = make_affine_transform(plate.shape,
output_shape,
roll_range=(-0.3, 0.3),
pitch_range=(-0.2, 0.2),
yaw_range=(-1.2, 1.2),
scale_range=(0.3, 1.0),
completely_inside=False)

plate = cv2.warpAffine(plate, M, (bg.shape[1], bg.shape[0]))
plate_mask = cv2.warpAffine(plate_mask, M, (bg.shape[1], bg.shape[0]))

out = plate * plate_mask + bg * (1.0 - plate_mask)

out = cv2.resize(out, (output_shape[1], output_shape[0]))

out += numpy.random.normal(scale=0.05, size=out.shape)
out = numpy.clip(out, 0., 1.)

plate_centre = numpy.array(M * numpy.concatenate([plate_size * 0.5,
[[1.]]])).T[0]
plate_centre = plate_centre/ output_size
skewed_size = get_transformed_shape_size(M[:, :2], plate_shape)
scale = numpy.max(skewed_size / output_size)

return out, plate_centre, scale


def generate_im(char_ims, num_bg_images, output_shape):
bg = generate_bg(num_bg_images, output_shape)

plate, plate_mask, code = generate_plate(FONT_HEIGHT, char_ims)

M, out_of_bounds = make_affine_transform(
from_shape=plate.shape,
to_shape=bg.shape,
min_scale=0.6,
max_scale=0.875,
rotation_variation=1.0,
scale_variation=1.5,
translation_variation=1.2)
M, out_of_bounds, scale = make_affine_transform(plate.shape,
output_shape,
roll_range=(-0.3, 0.3),
pitch_range=(-0.2, 0.2),
yaw_range=(-1.2, 1.2),
scale_range=(0.9, 1.0),
completely_inside=True)

plate = cv2.warpAffine(plate, M, (bg.shape[1], bg.shape[0]))
plate_mask = cv2.warpAffine(plate_mask, M, (bg.shape[1], bg.shape[0]))

out = plate * plate_mask + bg * (1.0 - plate_mask)

out = cv2.resize(out, (OUTPUT_SHAPE[1], OUTPUT_SHAPE[0]))
out = cv2.resize(out, (output_shape[1], output_shape[0]))

out += numpy.random.normal(scale=0.05, size=out.shape)
out = numpy.clip(out, 0., 1.)

return out, code, not out_of_bounds
return out, code, True


def generate_ims(num_images):
def generate_ims(num_images, output_shape):
"""
Generate a number of number plate images.

Expand All @@ -266,19 +384,37 @@ def generate_ims(num_images):
Iterable of number plate images.

"""
variation = 1.0
char_ims = dict(make_char_ims(FONT_HEIGHT))
num_bg_images = len(os.listdir("bgs"))
for i in range(num_images):
yield generate_im(char_ims, num_bg_images)
yield generate_im(char_ims, num_bg_images, output_shape)


def generate_detect_ims(num_images, output_shape):
char_ims = dict(make_char_ims(FONT_HEIGHT))
num_bg_images = len(os.listdir("bgs"))
for i in range(num_images):
yield generate_detect_im(char_ims, num_bg_images, output_shape)


if __name__ == "__main__":
os.mkdir("test")
im_gen = generate_ims(int(sys.argv[1]))

os.mkdir("test/read")
im_gen = generate_ims(int(sys.argv[1]), READ_OUTPUT_SHAPE)
for img_idx, (im, c, p) in enumerate(im_gen):
fname = "test/{:08d}_{}_{}.png".format(img_idx, c,
"1" if p else "0")
fname = "test/read/{:08d}_{}_{}.png".format(img_idx, c,
"1" if p else "0")
print fname
cv2.imwrite(fname, im * 255.)

os.mkdir("test/detect")
im_gen = generate_detect_ims(int(sys.argv[1]), DETECT_OUTPUT_SHAPE)
for img_idx, (im, centre, scale) in enumerate(im_gen):
centre_x, centre_y = centre.flatten()
fname = "test/detect/{:08d}_{:.3f}_{:.3f}_{:.3f}.png".format(img_idx,
centre_x,
centre_y,
scale)
print fname
cv2.imwrite(fname, im * 255.)