DecodeImage: #从图像文件或内存buffer中加载图像,格式为RGB格式img_mode: BGRchannel_first: false
class DecodeImage(object):""" decode image """def __init__(self,img_mode='RGB',channel_first=False,ignore_orientation=False,**kwargs):self.img_mode = img_modeself.channel_first = channel_firstself.ignore_orientation = ignore_orientationdef __call__(self, data):img = data['image']if six.PY2:assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage"else:assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage"img = np.frombuffer(img, dtype='uint8')if self.ignore_orientation:img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION |cv2.IMREAD_COLOR)else:img = cv2.imdecode(img, 1)if img is None:return Noneif self.img_mode == 'GRAY':img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)elif self.img_mode == 'RGB':assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)img = img[:, :, ::-1]if self.channel_first:img = img.transpose((2, 0, 1))data['image'] = imgreturn data
DetLabelEncode: null #class DetLabelEncode(object):def __init__(self, **kwargs):passdef __call__(self, data):label = data['label']label = json.loads(label)nBox = len(label)boxes, txts, txt_tags = [], [], []for bno in range(0, nBox):box = label[bno]['points']txt = label[bno]['transcription']boxes.append(box)txts.append(txt)if txt in ['*', '###']:txt_tags.append(True)else:txt_tags.append(False)if len(boxes) == 0:return Noneboxes = self.expand_points_num(boxes)boxes = np.array(boxes, dtype=np.float32)txt_tags = np.array(txt_tags, dtype=np.bool_)data['polys'] = boxesdata['texts'] = txtsdata['ignore_tags'] = txt_tagsreturn datadef order_points_clockwise(self, pts):rect = np.zeros((4, 2), dtype="float32")s = pts.sum(axis=1)rect[0] = pts[np.argmin(s)]rect[2] = pts[np.argmax(s)]tmp = np.delete(pts, (np.argmin(s), np.argmax(s)), axis=0)diff = np.diff(np.array(tmp), axis=1)rect[1] = tmp[np.argmin(diff)]rect[3] = tmp[np.argmax(diff)]return rectdef expand_points_num(self, boxes):max_points_num = 0for box in boxes:if len(box) > max_points_num:max_points_num = len(box)ex_boxes = []for box in boxes:ex_box = box + [box[-1]] * (max_points_num - len(box))ex_boxes.append(ex_box)return ex_boxes
class CopyPaste(object):def __init__(self, objects_paste_ratio=0.2, limit_paste=True, **kwargs):self.ext_data_num = 1self.objects_paste_ratio = objects_paste_ratioself.limit_paste = limit_pasteaugmenter_args = [{'type': 'Resize', 'args': {'size': [0.5, 3]}}]self.aug = IaaAugment(augmenter_args)def __call__(self, data):point_num = data['polys'].shape[1]src_img = data['image']src_polys = data['polys'].tolist()src_texts = data['texts']src_ignores = data['ignore_tags'].tolist()ext_data = data['ext_data'][0]ext_image = ext_data['image']ext_polys = ext_data['polys']ext_texts = ext_data['texts']ext_ignores = ext_data['ignore_tags']indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]]select_num = max(1, min(int(self.objects_paste_ratio * len(ext_polys)), 30))random.shuffle(indexs)select_idxs = indexs[:select_num]select_polys = ext_polys[select_idxs]select_ignores = ext_ignores[select_idxs]src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB)src_img = Image.fromarray(src_img).convert('RGBA')for idx, poly, tag in zip(select_idxs, select_polys, select_ignores):box_img = get_rotate_crop_image(ext_image, poly)src_img, box = self.paste_img(src_img, box_img, src_polys)if box is not None:box = box.tolist() for _ in range(len(box), point_num):box.append(box[-1])src_polys.append(box)src_texts.append(ext_texts[idx])src_ignores.append(tag)src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR)h, w = src_img.shape[:2]src_polys = np.array(src_polys)src_polys[:, :, 0] = np.clip(src_polys[:, :, 0], 0, w)src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h)data['image'] = src_imgdata['polys'] = src_polysdata['texts'] = src_textsdata['ignore_tags'] = np.array(src_ignores)return datadef paste_img(self, src_img, box_img, src_polys):box_img_pil = Image.fromarray(box_img).convert('RGBA')src_w, src_h = src_img.sizebox_w, box_h = box_img_pil.sizeangle = np.random.randint(0, 360)box = np.array([[[0, 0], [box_w, 0], [box_w, box_h], [0, box_h]]])box = rotate_bbox(box_img, box, angle)[0]box_img_pil = box_img_pil.rotate(angle, expand=1)box_w, box_h = box_img_pil.width, box_img_pil.heightif src_w - box_w < 0 or src_h - box_h < 0:return src_img, Nonepaste_x, paste_y = self.select_coord(src_polys, box, src_w - box_w,src_h - box_h)if paste_x is None:return src_img, Nonebox[:, 0] += paste_xbox[:, 1] += paste_yr, g, b, A = box_img_pil.split()src_img.paste(box_img_pil, (paste_x, paste_y), mask=A)return src_img, boxdef select_coord(self, src_polys, box, endx, endy):if self.limit_paste:xmin, ymin, xmax, ymax = box[:, 0].min(), box[:, 1].min(), box[:, 0].max(), box[:, 1].max()for _ in range(50):paste_x = random.randint(0, endx)paste_y = random.randint(0, endy)xmin1 = xmin + paste_xxmax1 = xmax + paste_xymin1 = ymin + paste_yymax1 = ymax + paste_ynum_poly_in_rect = 0for poly in src_polys:if not is_poly_outside_rect(poly, xmin1, ymin1,xmax1 - xmin1, ymax1 - ymin1):num_poly_in_rect += 1breakif num_poly_in_rect == 0:return paste_x, paste_yreturn None, Noneelse:paste_x = random.randint(0, endx)paste_y = random.randint(0, endy)return paste_x, paste_y
class IaaAugment():def __init__(self, augmenter_args=None, **kwargs):if augmenter_args is None:augmenter_args = [{'type': 'Fliplr','args': {'p': 0.5}}, {'type': 'Affine','args': {'rotate': [-10, 10]}}, {'type': 'Resize','args': {'size': [0.5, 3]}}]self.augmenter = AugmenterBuilder().build(augmenter_args)def __call__(self, data):image = data['image']shape = image.shapeif self.augmenter:aug = self.augmenter.to_deterministic()data['image'] = aug.augment_image(image)data = self.may_augment_annotation(aug, data, shape)return datadef may_augment_annotation(self, aug, data, shape):if aug is None:return dataline_polys = []for poly in data['polys']:new_poly = self.may_augment_poly(aug, shape, poly)line_polys.append(new_poly)data['polys'] = np.array(line_polys)return datadef may_augment_poly(self, aug, img_shape, poly):keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]keypoints = aug.augment_keypoints([imgaug.KeypointsOnImage(keypoints, shape=img_shape)])[0].keypointspoly = [(p.x, p.y) for p in keypoints]return poly
class EastRandomCropData(object):def __init__(self,size=(640, 640),max_tries=10,min_crop_side_ratio=0.1,keep_ratio=True,**kwargs):self.size = sizeself.max_tries = max_triesself.min_crop_side_ratio = min_crop_side_ratioself.keep_ratio = keep_ratiodef __call__(self, data):img = data['image']text_polys = data['polys']ignore_tags = data['ignore_tags']texts = data['texts']all_care_polys = [text_polys[i] for i, tag in enumerate(ignore_tags) if not tag]# 计算crop区域crop_x, crop_y, crop_w, crop_h = crop_area(img, all_care_polys, self.min_crop_side_ratio, self.max_tries)# crop 图片 保持比例填充scale_w = self.size[0] / crop_wscale_h = self.size[1] / crop_hscale = min(scale_w, scale_h)h = int(crop_h * scale)w = int(crop_w * scale)if self.keep_ratio:padimg = np.zeros((self.size[1], self.size[0], img.shape[2]),img.dtype)padimg[:h, :w] = cv2.resize(img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))img = padimgelse:img = cv2.resize(img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],tuple(self.size))# crop 文本框text_polys_crop = []ignore_tags_crop = []texts_crop = []for poly, text, tag in zip(text_polys, texts, ignore_tags):poly = ((poly - (crop_x, crop_y)) * scale).tolist()if not is_poly_outside_rect(poly, 0, 0, w, h):text_polys_crop.append(poly)ignore_tags_crop.append(tag)texts_crop.append(text)data['image'] = imgdata['polys'] = np.array(text_polys_crop)data['ignore_tags'] = ignore_tags_cropdata['texts'] = texts_cropreturn data
class MakeBorderMap(object):def __init__(self,shrink_ratio=0.4,thresh_min=0.3,thresh_max=0.7,**kwargs):self.shrink_ratio = shrink_ratioself.thresh_min = thresh_minself.thresh_max = thresh_maxif 'total_epoch' in kwargs and 'epoch' in kwargs and kwargs['epoch'] != "None":self.shrink_ratio = self.shrink_ratio + 0.2 * kwargs['epoch'] / float(kwargs['total_epoch'])def __call__(self, data):img = data['image']text_polys = data['polys']ignore_tags = data['ignore_tags']canvas = np.zeros(img.shape[:2], dtype=np.float32)mask = np.zeros(img.shape[:2], dtype=np.float32)for i in range(len(text_polys)):if ignore_tags[i]:continueself.draw_border_map(text_polys[i], canvas, mask=mask)canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_mindata['threshold_map'] = canvasdata['threshold_mask'] = maskreturn datadef draw_border_map(self, polygon, canvas, mask):polygon = np.array(polygon)assert polygon.ndim == 2assert polygon.shape[1] == 2polygon_shape = Polygon(polygon)if polygon_shape.area <= 0:returndistance = polygon_shape.area * (1 - np.power(self.shrink_ratio, 2)) / polygon_shape.lengthsubject = [tuple(l) for l in polygon]padding = pyclipper.PyclipperOffset()padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)padded_polygon = np.array(padding.Execute(distance)[0])cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)xmin = padded_polygon[:, 0].min()xmax = padded_polygon[:, 0].max()ymin = padded_polygon[:, 1].min()ymax = padded_polygon[:, 1].max()width = xmax - xmin + 1height = ymax - ymin + 1polygon[:, 0] = polygon[:, 0] - xminpolygon[:, 1] = polygon[:, 1] - yminxs = np.broadcast_to(np.linspace(0, width - 1, num=width).reshape(1, width), (height, width))ys = np.broadcast_to(np.linspace(0, height - 1, num=height).reshape(height, 1), (height, width))distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)for i in range(polygon.shape[0]):j = (i + 1) % polygon.shape[0]absolute_distance = self._distance(xs, ys, polygon[i], polygon[j])distance_map[i] = np.clip(absolute_distance / distance, 0, 1)distance_map = distance_map.min(axis=0)xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,xmin_valid - xmin:xmax_valid - xmax + width],canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])def _distance(self, xs, ys, point_1, point_2):'''compute the distance from point to a lineys: coordinates in the first axisxs: coordinates in the second axispoint_1, point_2: (x, y), the end of the line'''height, width = xs.shape[:2]square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[1] - point_2[1])cosin = (square_distance - square_distance_1 - square_distance_2) / (2 * np.sqrt(square_distance_1 * square_distance_2))square_sin = 1 - np.square(cosin)square_sin = np.nan_to_num(square_sin)result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /square_distance)result[cosin <0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin< 0]# self.extend_line(point_1, point_2, result)return resultdef extend_line(self, point_1, point_2, result, shrink_ratio):ex_point_1 = (int(round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),int(round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))cv2.line(result,tuple(ex_point_1),tuple(point_1),4096.0,1,lineType=cv2.LINE_AA,shift=0)ex_point_2 = (int(round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),int(round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))cv2.line(result,tuple(ex_point_2),tuple(point_2),4096.0,1,lineType=cv2.LINE_AA,shift=0)return ex_point_1, ex_point_2
class MakeShrinkMap(object):r'''Making binary mask from detection data with ICDAR format.Typically following the process of class `MakeICDARData`.'''def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs):self.min_text_size = min_text_sizeself.shrink_ratio = shrink_ratioif 'total_epoch' in kwargs and 'epoch' in kwargs and kwargs['epoch'] != "None":self.shrink_ratio = self.shrink_ratio + 0.2 * kwargs['epoch'] / float(kwargs['total_epoch'])def __call__(self, data):image = data['image']text_polys = data['polys']ignore_tags = data['ignore_tags']h, w = image.shape[:2]text_polys, ignore_tags = self.validate_polygons(text_polys,ignore_tags, h, w)gt = np.zeros((h, w), dtype=np.float32)mask = np.ones((h, w), dtype=np.float32)for i in range(len(text_polys)):polygon = text_polys[i]height = max(polygon[:, 1]) - min(polygon[:, 1])width = max(polygon[:, 0]) - min(polygon[:, 0])if ignore_tags[i] or min(height, width) < self.min_text_size:cv2.fillPoly(mask,polygon.astype(np.int32)[np.newaxis, :, :], 0)ignore_tags[i] = Trueelse:polygon_shape = Polygon(polygon)subject = [tuple(l) for l in polygon]padding = pyclipper.PyclipperOffset()padding.AddPath(subject, pyclipper.JT_ROUND,pyclipper.ET_CLOSEDPOLYGON)shrinked = []# Increase the shrink ratio every time we get multiple polygon returned backpossible_ratios = np.arange(self.shrink_ratio, 1,self.shrink_ratio)np.append(possible_ratios, 1)# print(possible_ratios)for ratio in possible_ratios:# print(f"Change shrink ratio to {ratio}")distance = polygon_shape.area * (1 - np.power(ratio, 2)) / polygon_shape.lengthshrinked = padding.Execute(-distance)if len(shrinked) == 1:breakif shrinked == []:cv2.fillPoly(mask,polygon.astype(np.int32)[np.newaxis, :, :], 0)ignore_tags[i] = Truecontinuefor each_shirnk in shrinked:shirnk = np.array(each_shirnk).reshape(-1, 2)cv2.fillPoly(gt, [shirnk.astype(np.int32)], 1)data['shrink_map'] = gtdata['shrink_mask'] = maskreturn datadef validate_polygons(self, polygons, ignore_tags, h, w):'''polygons (numpy.array, required): of shape (num_instances, num_points, 2)'''if len(polygons) == 0:return polygons, ignore_tagsassert len(polygons) == len(ignore_tags)for polygon in polygons:polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)for i in range(len(polygons)):area = self.polygon_area(polygons[i])if abs(area) < 1:ignore_tags[i] = Trueif area > 0:polygons[i] = polygons[i][::-1, :]return polygons, ignore_tagsdef polygon_area(self, polygon):"""compute polygon area"""area = 0q = polygon[-1]for p in polygon:area += p[0] * q[1] - p[1] * q[0]q = preturn area / 2.0
class NormalizeImage(object):""" normalize image such as substract mean, divide std"""def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):if isinstance(scale, str):scale = eval(scale)self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)mean = mean if mean is not None else [0.485, 0.456, 0.406]std = std if std is not None else [0.229, 0.224, 0.225]shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)self.mean = np.array(mean).reshape(shape).astype('float32')self.std = np.array(std).reshape(shape).astype('float32')def __call__(self, data):img = data['image']from PIL import Imageif isinstance(img, Image.Image):img = np.array(img)assert isinstance(img,np.ndarray), "invalid input 'img' in NormalizeImage"data['image'] = (img.astype('float32') * self.scale - self.mean) / self.stdreturn data
class ToCHWImage(object):""" convert hwc image to chw image"""def __init__(self, **kwargs):passdef __call__(self, data):img = data['image']from PIL import Imageif isinstance(img, Image.Image):img = np.array(img)data['image'] = img.transpose((2, 0, 1))return data
class KeepKeys(object):def __init__(self, keep_keys, **kwargs):self.keep_keys = keep_keysdef __call__(self, data):data_list = []for key in self.keep_keys:data_list.append(data[key])return data_list