-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathOCR.py
More file actions
165 lines (138 loc) · 9.31 KB
/
OCR.py
File metadata and controls
165 lines (138 loc) · 9.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import io
import time
import traceback
from PIL import Image
# Import CodeProject.AI SDK
from codeproject_ai_sdk import LogVerbosity, ModuleRunner, JSON
from options import Options
from paddleocr import PaddleOCR
ocr = None
no_text_found = 'Text Not Found'
def init_detect_ocr(opts: Options) -> None:
global ocr
# See notes at the end of this file for options.
ocr = PaddleOCR(lang = opts.language,
use_gpu = opts.use_gpu,
show_log = opts.log_verbosity == LogVerbosity.Loud,
det_db_unclip_ratio = opts.det_db_unclip_ratio,
det_db_box_thresh = opts.box_detect_threshold,
drop_score = opts.char_detect_threshold,
rec_algorithm = opts.algorithm,
cls_model_dir = opts.cls_model_dir,
det_model_dir = opts.det_model_dir,
rec_model_dir = opts.rec_model_dir)
def read_text(module_runner: ModuleRunner, image: Image, rotate_deg: int = 0) -> JSON:
outputs = []
# rotate image if needed
working_image = image
if rotate_deg != 0:
working_image = image.rotate(rotate_deg, expand=True, resample=Image.BICUBIC)
# Possibly run it through a super-resolution module to improve readability
# working_image = enhance_image(working_image)
# Read text
inferenceTimeMs = 0
try:
# Convert the image to a bytes array
with io.BytesIO() as image_buffer:
working_image.save(image_buffer, format='JPEG')
img_byte_arr = image_buffer.getvalue()
start_time = time.perf_counter()
ocr_response = ocr.ocr(img_byte_arr, cls=True)
inferenceTimeMs = int((time.perf_counter() - start_time) * 1000)
# Note that ocr_response[0][0][0][0] could be a float with value 0 ('false'), or in some
# other universe maybe it's a string. To be really careful we would have a test like
# if hasattr(ocr_response[0][0][0][0], '__len__') and (not isinstance(ocr_response[0][0][0][0], str))
if not ocr_response or not ocr_response[0] or not ocr_response[0][0] or not ocr_response[0][0][0]:
return { "success": False, "error": "No OCR response received", "inferenceMs" : inferenceTimeMs }
# Seems that different versions of paddle return different structures, OR
# paddle returns different structures depending on its mood. We're expecting
# ocr_response = array of single set of detections
# -> detections = array of detection
# -> detection = array of bounding box, classification
# -> bounding box = array of [x,y], classification = array of label, confidence.
# so ocr_response[0][0][0][0][0] = first 'x' of the bounding boxes, which is a float
# However, the first "array of single set of detections" isn't always there, so first
# check to see if ocr_response[0][0][0][0] is a float
detections = ocr_response if isinstance(ocr_response[0][0][0][0], float) else ocr_response[0]
for detection in detections:
bounding_box = detection[0] # [ topleft, topright, bottom right, bottom left ], each is [x,y]
classification = detection[1]
label = classification[0]
confidence = classification[1]
if label and confidence:
# Obviously some optimisation can be done here, but is it worth
# it? The trivial optimisation is to assume the order of the
# points, but that's dangerous.
detection = {
"confidence": confidence,
"label": label,
"x_min": int(min(point[0] for point in bounding_box)), # int(bounding_box[0][0]),
"y_min": int(min(point[1] for point in bounding_box)), # int(bounding_box[0][1]),
"x_max": int(max(point[0] for point in bounding_box)), # int(bounding_box[3][0]),
"y_max": int(max(point[1] for point in bounding_box)), # int(bounding_box[3][1]),
}
outputs.append(detection)
# The operation was successfully completed. There just wasn't any text
# if not outputs:
# return { "success": False, "predictions" : None, "inferenceMs" : inferenceTimeMs }
return { "success": True, "predictions" : outputs, "inferenceMs" : inferenceTimeMs }
except Exception as ex:
module_runner.report_error(ex, __file__)
message = "".join(traceback.TracebackException.from_exception(ex).format())
return { "success": False, "error": message, "inferenceMs" : inferenceTimeMs }
"""
Options for the PaddleOCR object:
Parameter Default Description
-------------------------------------------------------------------------------
use_gpu TRUE use GPU or not
gpu_mem 8000M GPU memory size used for initialization
image_dir The images path or folder path for predicting
when used by the command line
det_algorithm DB Type of detection algorithm selected (DB = Differentiable Binarization)
det_model_dir None the text detection inference model folder. There
are two ways to transfer parameters, 1. None: Automatically download
the built-in model to ~/.paddleocr/det;
2. The path of the inference model converted by yourself, the model
and params files must be included in the model path
det_max_side_len 960 The maximum size of the long side of the image. When the long side
exceeds this value, the long side will be resized to this size, and
the short side will be scaled proportionally
det_db_thresh 0.3 Binarization threshold value of DB output map
det_db_box_thresh 0.5 The threshold value of the DB output box. Boxes score lower than
this value will be discarded
det_db_unclip_ratio 2 The expanded ratio of DB output box
det_east_score_thresh 0.8 Binarization threshold value of EAST output map
det_east_cover_thresh 0.1 The threshold value of the EAST output box. Boxes score lower than
this value will be discarded
det_east_nms_thresh 0.2 The NMS threshold value of EAST model output box
rec_algorithm CRNN Type of recognition algorithm selected
rec_model_dir None the text recognition inference model folder. There are two ways to
transfer parameters, 1. None: Automatically download the built-in
model to ~/.paddleocr/rec; 2. The path of the inference model
converted by yourself, the model and params files must be included
in the model path
rec_image_shape "3,32,320" image shape of recognition algorithm
rec_char_type ch Character type of recognition algorithm, Chinese (ch) or English (en)
rec_batch_num 30 When performing recognition, the batchsize of forward images
max_text_length 25 The maximum text length that the recognition algorithm can recognize
rec_char_dict_path ./ppocr/utils/ppocr_keys_v1.txt the alphabet path which needs to be modified to
your own path when rec_model_Name use mode 2
use_space_char TRUE Whether to recognize spaces
use_angle_cls FALSE Whether to load classification model
cls_model_dir None the classification inference model folder. There are two ways to
transfer parameters, 1. None: Automatically download the built-in model
to ~/.paddleocr/cls; 2. The path of the inference model converted
by yourself, the model and params files must be included in the
model path
cls_image_shape "3,48,192" image shape of classification algorithm
label_list ['0','180'] label list of classification algorithm
cls_batch_num 30 When performing classification, the batchsize of forward images
enable_mkldnn FALSE Whether to enable mkldnn
use_zero_copy_run FALSE Whether to forward by zero_copy_run
lang ch The support language, Only Chinese (ch), English (en), French (french),
German (german), Korean (korean), Japanese (japan) are supported
det TRUE Enable detection when ppocr.ocr func exec
rec TRUE Enable recognition when ppocr.ocr func exec
cls FALSE Enable classification when ppocr.ocr func exec. This parameter only
exists in code usage mode
"""