Skip to content
Snippets Groups Projects
Commit a1ca342a authored by Recolic Keghart's avatar Recolic Keghart
Browse files

Add hust_code OCR

parent 0d228b85
No related branches found
No related tags found
No related merge requests found
from PIL import Image from PIL import Image
import sys import sys, subprocess
i, o = sys.argv[1:] tmp_fname = '/tmp/.recolic-hust.ocr-tmp.png'
im = Image.open(i) def prepare_ocr(input_fname, output_fname):
im = Image.open(input_fname)
# In pass.hust.edu.cn verification code, the second frame is COMPLETE! # In pass.hust.edu.cn verification code, the second frame is COMPLETE!
# let's get the second frame. # let's get the second frame.
im.seek(im.tell()+1) im.seek(im.tell()+1)
im.save(o)
im = Image.open(o)
# To white-black # To white-black
im = im.convert('L') im = im.convert('L')
im = im.point(lambda x: 0 if x<254 else 255, '1') im = im.point(lambda x: 0 if x<254 else 255, '1')
# Try to crop # Try to crop. The Silly ocr doesn't like a single_line good image...
w, h = im.size # Should use `tesseract output.png stdout --psm 8 -c tessedit_char_whitelist=0123456789` for cropped image.
if w == 90 and h == 58: #w, h = im.size
im = im.crop((0, 18, w, 39)) #if w == 90 and h == 58:
else: # im = im.crop((0, 18, w, 39))
print('WARNING: Expected image w=90, h=58, Got image w={}, h={}.'.format(w, h)) #else:
# print('WARNING: Expected image w=90, h=58, Got image w={}, h={}.'.format(w, h))
# Save for OCR # Save for OCR
im.save(o) im.save(output_fname)
def do_ocr(input_fname):
output = subprocess.check_output(['tesseract', input_fname, 'stdout']).decode('utf-8')
for line in output.split('\n'):
line = line.strip()
if len(line) == 4 and line.isdigit():
return line
return None
#i = sys.argv[1]
#prepare_ocr(i, tmp_fname)
#print(do_ocr(tmp_fname))
def recognize_hust_code()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment