-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrunner.py
More file actions
48 lines (37 loc) · 2.09 KB
/
runner.py
File metadata and controls
48 lines (37 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from pathlib import Path
from ocrd_utils import EXT_TO_MIME
from .backends.dinglehopper import DinglehopperEvalBackend
from .backends.ocrevalUAtion import OcrevalUAtionEvalBackend
from .backends.PrimaTextEval import PrimaTextEvalBackend
from .backends.CorAsvAnnEval import CorAsvAnnEvalBackend
from .backends.CorAsvAnnCompare import CorAsvAnnCompareBackend
from .backends.OcrdSegmentEvaluate import OcrdSegmentEvaluateBackend
from .backends.IsriOcreval import IsriOcrevalBackend
BACKENDS = {
'dinglehopper': DinglehopperEvalBackend,
'ocrevalUAtion': OcrevalUAtionEvalBackend,
'PrimaTextEval': PrimaTextEvalBackend,
'CorAsvAnnEval': CorAsvAnnEvalBackend,
'CorAsvAnnCompare': CorAsvAnnCompareBackend,
'OcrdSegmentEvaluate': OcrdSegmentEvaluateBackend,
'IsriOcreval': IsriOcrevalBackend,
}
def guess_mediatype(fname, option_):
try:
return EXT_TO_MIME[Path(fname).suffix]
except KeyError:
raise ValueError("Cannot guess mimetype from extension '%s' for '%s'. Set %s explicitly" % (Path(fname).suffix, fname, option_))
def run_eval_backend(config, backend, gt_mediatype, gt_file, ocr_mediatype, ocr_file, pageId):
evaluator_config = config.get(backend, {})
evaluator = BACKENDS[backend](**evaluator_config)
if not evaluator.is_installed():
print('Backend %s requires installation:' % backend)
print(evaluator.is_installed.__doc__)
return
gt_mediatype = gt_mediatype if gt_mediatype else guess_mediatype(gt_file, '--gt-mediatype')
if gt_mediatype not in evaluator.supported_mediatypes:
raise ValueError('--gt-mediatype %s not supported by %s backend, must be one of %s' % (gt_mediatype, backend, evaluator.supported_mediatypes))
ocr_mediatype = ocr_mediatype if ocr_mediatype else guess_mediatype(ocr_file, '--ocr-mediatype')
if ocr_mediatype not in evaluator.supported_mediatypes:
raise ValueError('--ocr-mediatype %s not supported by %s backend, must be one of %s' % (ocr_mediatype, backend, evaluator.supported_mediatypes))
return evaluator.compare_files(gt_mediatype, gt_file, ocr_mediatype, ocr_file, pageId)