Source code for lightning_pose.data.augmentations
"""Functions to build augmentation pipeline."""
from typing import Any
import imgaug.augmenters as iaa
from omegaconf import DictConfig, ListConfig
from typeguard import typechecked
# to ignore imports for sphix-autoapidoc
__all__ = [
"imgaug_transform",
]
[docs]
@typechecked
def imgaug_transform(params_dict: dict | DictConfig) -> iaa.Sequential:
"""Create simple and flexible data transform pipeline that augments images and keypoints.
Args:
params_dict: each key must be the name of a transform importable from imgaug.augmenters,
e.g. "Affine", "Fliplr", etc. The value must be a dict with several optional keys:
- "p" (float): probability of applying transform (using imgaug.augmenters.Sometimes)
- "args" (list): arguments for transform
- "kwargs" (dict): keyword args for the transformation
Examples:
Create a pipeline with
- Affine transformation applied 50% of the time with rotation uniformly sampled from
(-25, 25) degrees
- MotionBlur transformation that is applied 25% of the time with a kernel size of 5 pixels
and blur direction uniformly sampled from (-90, 90) degrees
>>> params_dict = {
>>> 'Affine': {'p': 0.5, 'kwargs': {'rotate': (-25, 25)}},
>>> 'MotionBlur': {'p': 0.25, 'kwargs': {'k': 5, 'angle': (-90, 90)}},
>>> }
In a config file, this will look like:
>>> training:
>>> imgaug:
>>> Affine:
>>> p: 0.5
>>> kwargs:
>>> rotate: [-10, 10]
>>> MotionBlur:
>>> p: 0.25
>>> kwargs:
>>> k: 5
>>> angle: [-90, 90]
Create a pipeline with
- Rot90 transformation applied 100% of the time with rotations of 0, 90, 180, 270 degrees.
>>> params_dict = {
>>> 'Rot90': {'p': 1.0, 'kwargs': {'k': [[0, 1, 2, 3]]}}, # note required nested list
>>> }
In a config file, this will look like:
>>> training:
>>> imgaug:
>>> Rot90:
>>> p: 1.0
>>> kwargs:
>>> k: [0, 1, 2, 3]
NOTE: if you pass a list of exactly 2 values to Rot90 it will be parsed as a tuple and all
(discrete) rotations between the two values will be sampled uniformly.
For example, `k: [0, 2]` is equivalent to `k: [0, 1, 2]`.
If you need to _only_ sample two non-contiguous integers please raise an issue.
Returns:
imgaug pipeline
"""
data_transform = []
for transform_str, args in params_dict.items():
transform = getattr(iaa, transform_str)
apply_prob = args.get("p", 0.5)
transform_args = args.get("args", ())
transform_kwargs = args.get("kwargs", {})
# DictConfig cannot load tuples from yaml files
# make sure any lists are converted to tuples
# unless the list contains a single item, then pass through the item (hack for Rot90)
for kw, arg in transform_kwargs.items():
if isinstance(arg, list) or isinstance(arg, ListConfig):
if len(arg) == 1:
transform_kwargs[kw] = arg[0]
elif len(arg) == 2:
transform_kwargs[kw] = tuple(arg)
else:
transform_kwargs[kw] = arg
# add transform to pipeline
if apply_prob == 0.0:
pass
elif apply_prob < 1.0:
data_transform.append(
iaa.Sometimes(
apply_prob,
transform(*transform_args, **transform_kwargs),
)
)
else:
data_transform.append(transform(*transform_args, **transform_kwargs))
return iaa.Sequential(data_transform)
def expand_imgaug_str_to_dict(params: str) -> dict[str, Any]:
_allowed_imgaug_strs = [
"default",
"none",
"dlc",
"dlc-lr",
"dlc-top-down",
"dlc-mv",
]
params_dict = {}
if params in ["default", "none"]:
pass # no augmentations
elif params in ["dlc", "dlc-lr", "dlc-top-down", "dlc-mv"]:
# rotate 0 or 180 degrees
if params in ["dlc-lr"]:
params_dict["Rot90"] = {"p": 1.0, "kwargs": {"k": [[0, 2]]}}
# rotate 0, 90, 180, or 270 degrees
if params in ["dlc-top-down"]:
params_dict["Rot90"] = {"p": 1.0, "kwargs": {"k": [[0, 1, 2, 3]]}}
# rotate
if not params.endswith("mv"):
rotation = 25 # rotation uniformly sampled from (-rotation, +rotation)
params_dict["Affine"] = {"p": 0.4, "kwargs": {"rotate": (-rotation, rotation)}}
# motion blur
k = 5 # kernel size of blur
angle = 90 # blur direction uniformly sampled from (-angle, +angle)
params_dict["MotionBlur"] = {
"p": 0.5,
"kwargs": {"k": k, "angle": (-angle, angle)},
}
# coarse dropout
prct = 0.02 # drop `prct` of all pixels by converting them to black pixels
size_prct = 0.3 # drop pix on a low-res version of img that's `size_prct` of og
per_channel = 0.5 # per_channel transformations on `per_channel` frac of images
params_dict["CoarseDropout"] = {
"p": 0.5,
"kwargs": {
"p": prct,
"size_percent": size_prct,
"per_channel": per_channel,
},
}
# coarse salt and pepper
# bright reflections can often confuse the model into thinking they are paws
# (which can also just be bright blobs) - so include some additional transforms that
# put bright blobs (and dark blobs) into the image
# bigger chunks than coarse dropout
prct = 0.01 # probability of changing a pixel to salt/pepper noise
size_prct = (
0.05,
0.1,
) # drop pix on low-res version of img that's `size_prct` of og
params_dict["CoarseSalt"] = {
"p": 0.5,
"kwargs": {"p": prct, "size_percent": size_prct},
}
params_dict["CoarsePepper"] = {
"p": 0.5,
"kwargs": {"p": prct, "size_percent": size_prct},
}
# elastic transform
if not params.endswith("mv"):
alpha = (0, 10) # controls strength of displacement
sigma = 5 # cotnrols smoothness of displacement
params_dict["ElasticTransformation"] = {
"p": 0.5,
"kwargs": {"alpha": alpha, "sigma": sigma},
}
# hist eq
params_dict["AllChannelsHistogramEqualization"] = {"p": 0.1, "kwargs": {}}
# clahe (contrast limited adaptive histogram equalization) -
# hist eq over image patches
params_dict["AllChannelsCLAHE"] = {"p": 0.1, "kwargs": {}}
# emboss
alpha = (0, 0.5) # overlay embossed image on original with alpha in this range
strength = (0.5, 1.5) # strength of embossing lies in this range
params_dict["Emboss"] = {
"p": 0.1,
"kwargs": {"alpha": alpha, "strength": strength},
}
# crop
if not params.endswith("mv"):
crop_by = 0.15 # number of pix to crop on each side of img given as a fraction
params_dict["CropAndPad"] = {
"p": 0.4,
"kwargs": {"percent": (-crop_by, crop_by), "keep_size": False},
}
else:
raise NotImplementedError(
f"cfg.training.imgaug string {params} must be in {_allowed_imgaug_strs}"
)
return params_dict