-
Notifications
You must be signed in to change notification settings - Fork 2
/
visualize.py
112 lines (95 loc) · 3.54 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# coding: utf-8
import random
import sys
import argparse
try:
import numpy as np
from PIL import Image
import cv2
except:
print(f'please install numpy, pillow and opencv libs at first: `pip install numpy pillow opencv-python`')
sys.exit(-1)
MF_CATEGORIES = ['embedding', 'isolated']
TYPE_MAPPINGS = {'embedding': 0, 'isolated': 1}
def read_labels(label_fp, width, height):
"""读取label文件"""
w, h = width, height
outs = []
with open(label_fp) as f:
for line in f:
eles = line.strip().split(' ')
if len(eles) != 9:
print(f'Warning - bad line: {line}')
continue
_type = MF_CATEGORIES[int(eles[0])]
coords = list(map(float, eles[1:]))
xmin, ymin = coords[:2]
xmax, ymax = coords[4:6]
_box = [xmin * w, ymin * h, xmax * w, ymax * h]
_box = list(map(int, _box))
outs.append({'type': _type, 'box': _box})
return outs
def plot_one_box(x, img, color=None, label=None, line_thickness=3):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = [int(x[0]), int(x[1])], [int(x[2]), int(x[3])]
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = [c1[0] + t_size[0], c1[1] - t_size[1] - 3]
txt_b_l = [c1[0], c1[1] - 2]
if c2[1] < 0: # 出界了
c2[1] = c1[1] + t_size[1] + 3
txt_b_l[1] = c1[1] + t_size[1] + 1
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(img, label, txt_b_l, 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
def save_layout_img(img0, categories, boxes, save_path):
"""可视化版面分析结果。"""
if isinstance(img0, Image.Image):
img0 = cv2.cvtColor(np.asarray(img0.convert('RGB')), cv2.COLOR_RGB2BGR)
colors = [[random.randint(0, 255) for _ in range(3)] for _ in categories]
for one_box in boxes:
_type = one_box['type']
xyxy = one_box['box']
label = f'{_type}'
plot_one_box(
xyxy,
img0,
label=label,
color=colors[categories.index(_type)],
line_thickness=1,
)
cv2.imwrite(save_path, img0)
print(f" The image with the result is saved in: {save_path}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-i',
'--image-fp',
type=str,
default='examples/CnMFD_Dataset/images/Adobe-SongTi-Std-L-2/syndoc-page7.jpg',
help='the file path of one page image',
)
parser.add_argument(
'-l',
'--label-fp',
type=str,
default='examples/CnMFD_Dataset/labels/Adobe-SongTi-Std-L-2/syndoc-page7.txt',
help='the label file path corresponding to the page image',
)
parser.add_argument(
'-o',
'--output-fp',
type=str,
default='output-vis7.jpg',
help='the file path of the output image after labelling math formulas',
)
args = parser.parse_args()
img = Image.open(args.image_fp).convert('RGB')
width, height = img.size
boxes = read_labels(args.label_fp, width, height)
save_layout_img(img, MF_CATEGORIES, boxes, args.output_fp)
if __name__ == '__main__':
main()