-
Notifications
You must be signed in to change notification settings - Fork 9
/
ocr_table.py
129 lines (94 loc) · 3.89 KB
/
ocr_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from time import time
from io import BytesIO
import numpy as np
import cv2
from PIL import Image
import src.auxiliary as aux
class OcrTable:
def __init__(self,
image,
language: str = 'por',
spell_corrector: bool = False,
show_performace: bool = False):
self.define_global_vars(language, show_performace, spell_corrector)
started_time = time()
input_type = aux.get_input_type(image)
self.text = process_image(image, input_type)
if self.spell_corrector:
sym_spell = aux.load_dict_to_memory()
self.text = [aux.get_word_suggestion(
sym_spell, input_term) for input_term in self.text.split(' ')]
self.text = ' '.join(self.text)
self.execution_time = time() - started_time
def __repr__(self):
return repr(self.text) \
if not self.show_performace \
else repr([self.text, self.show_performace])
def define_global_vars(self, language, show_performace, spell_corrector):
if isinstance(language, str) and \
isinstance(show_performace, bool) and \
isinstance(spell_corrector, bool):
self.lang = language
self.show_performace = show_performace
self.spell_corrector = spell_corrector
else:
raise TypeError(
'language variable must be a string, show_perf. and spell_corrector bool!')
def process_image(image, _type):
if _type == 1:
processed_img = run_online_img_ocr(image)
elif _type == 2:
processed_img = run_path_img_ocr(image)
elif _type == 3:
processed_img = run_img_ocr(image)
else:
raise NotImplementedError(
'method to this specific processing isn'"'"'t implemented yet!')
return processed_img
def run_online_img_ocr(image_url):
image = aux.get_image_from_url(image_url)
phrase = run_pipeline(Image.open(BytesIO(image.content)))
return phrase
def run_path_img_ocr(image):
phrase = run_pipeline(Image.open(image))
return phrase
def run_img_ocr(image):
phrase = run_pipeline(image)
return phrase
def run_pipeline(image):
if not isinstance(image, np.ndarray):
image = aux.to_opencv_type(image)
image = aux.remove_alpha_channel(image)
image = aux.brightness_contrast_optimization(image, 1, 0.5)
colors = aux.run_kmeans(image, 2)
image = remove_lines(image, colors)
image = aux.image_resize(image, height=image.shape[0]*4)
image = aux.open_close_filter(image, cv2.MORPH_CLOSE)
image = aux.brightness_contrast_optimization(image, 1, 0.5)
image = aux.unsharp_mask(image, (3, 3), 0.5, 1.5, 0)
image = aux.dilate_image(image, 1)
image = aux.binarize_image(image)
image = aux.open_close_filter(image, cv2.MORPH_CLOSE, 1)
sorted_results = aux.east_process(image)
sorted_chars = ' '.join(
map(lambda position_and_word: position_and_word[1], sorted_results))
return sorted_chars
def remove_lines(image, colors):
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
bin_image = cv2.threshold(
gray_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
h_contours = get_contours(bin_image, (25, 1))
v_contours = get_contours(bin_image, (1, 25))
for contour in h_contours:
cv2.drawContours(image, [contour], -1, colors[0][0], 2)
for contour in v_contours:
cv2.drawContours(image, [contour], -1, colors[0][0], 2)
return image
def get_contours(bin_image, initial_kernel):
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, initial_kernel)
detected_lines = cv2.morphologyEx(
bin_image, cv2.MORPH_OPEN, kernel, iterations=2)
contours = cv2.findContours(
detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
return contours