From a1ca342a97163e7a73a10e21c6849ed6b7dbc78a Mon Sep 17 00:00:00 2001 From: Recolic Keghart <root@recolic.net> Date: Sun, 17 May 2020 20:30:12 +0800 Subject: [PATCH] Add hust_code OCR --- hust_code.py | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/hust_code.py b/hust_code.py index 8f3ded4..34e6138 100644 --- a/hust_code.py +++ b/hust_code.py @@ -1,26 +1,38 @@ from PIL import Image -import sys +import sys, subprocess -i, o = sys.argv[1:] +tmp_fname = '/tmp/.recolic-hust.ocr-tmp.png' -im = Image.open(i) -# In pass.hust.edu.cn verification code, the second frame is COMPLETE! -# let's get the second frame. -im.seek(im.tell()+1) -im.save(o) +def prepare_ocr(input_fname, output_fname): + im = Image.open(input_fname) + # In pass.hust.edu.cn verification code, the second frame is COMPLETE! + # let's get the second frame. + im.seek(im.tell()+1) + + # To white-black + im = im.convert('L') + im = im.point(lambda x: 0 if x<254 else 255, '1') + # Try to crop. The Silly ocr doesn't like a single_line good image... + # Should use `tesseract output.png stdout --psm 8 -c tessedit_char_whitelist=0123456789` for cropped image. + #w, h = im.size + #if w == 90 and h == 58: + # im = im.crop((0, 18, w, 39)) + #else: + # print('WARNING: Expected image w=90, h=58, Got image w={}, h={}.'.format(w, h)) + # Save for OCR + im.save(output_fname) -im = Image.open(o) -# To white-black -im = im.convert('L') -im = im.point(lambda x: 0 if x<254 else 255, '1') -# Try to crop -w, h = im.size -if w == 90 and h == 58: - im = im.crop((0, 18, w, 39)) -else: - print('WARNING: Expected image w=90, h=58, Got image w={}, h={}.'.format(w, h)) -# Save for OCR -im.save(o) +def do_ocr(input_fname): + output = subprocess.check_output(['tesseract', input_fname, 'stdout']).decode('utf-8') + for line in output.split('\n'): + line = line.strip() + if len(line) == 4 and line.isdigit(): + return line + return None +#i = sys.argv[1] +#prepare_ocr(i, tmp_fname) +#print(do_ocr(tmp_fname)) +def recognize_hust_code() -- GitLab