diff --git a/hust_code.py b/hust_code.py index 8f3ded453bea7033297e171da2703adc774aef31..34e6138381b94f16dd9a477b305484d000974b66 100644 --- a/hust_code.py +++ b/hust_code.py @@ -1,26 +1,38 @@ from PIL import Image -import sys +import sys, subprocess -i, o = sys.argv[1:] +tmp_fname = '/tmp/.recolic-hust.ocr-tmp.png' -im = Image.open(i) -# In pass.hust.edu.cn verification code, the second frame is COMPLETE! -# let's get the second frame. -im.seek(im.tell()+1) -im.save(o) +def prepare_ocr(input_fname, output_fname): + im = Image.open(input_fname) + # In pass.hust.edu.cn verification code, the second frame is COMPLETE! + # let's get the second frame. + im.seek(im.tell()+1) + + # To white-black + im = im.convert('L') + im = im.point(lambda x: 0 if x<254 else 255, '1') + # Try to crop. The Silly ocr doesn't like a single_line good image... + # Should use `tesseract output.png stdout --psm 8 -c tessedit_char_whitelist=0123456789` for cropped image. + #w, h = im.size + #if w == 90 and h == 58: + # im = im.crop((0, 18, w, 39)) + #else: + # print('WARNING: Expected image w=90, h=58, Got image w={}, h={}.'.format(w, h)) + # Save for OCR + im.save(output_fname) -im = Image.open(o) -# To white-black -im = im.convert('L') -im = im.point(lambda x: 0 if x<254 else 255, '1') -# Try to crop -w, h = im.size -if w == 90 and h == 58: - im = im.crop((0, 18, w, 39)) -else: - print('WARNING: Expected image w=90, h=58, Got image w={}, h={}.'.format(w, h)) -# Save for OCR -im.save(o) +def do_ocr(input_fname): + output = subprocess.check_output(['tesseract', input_fname, 'stdout']).decode('utf-8') + for line in output.split('\n'): + line = line.strip() + if len(line) == 4 and line.isdigit(): + return line + return None +#i = sys.argv[1] +#prepare_ocr(i, tmp_fname) +#print(do_ocr(tmp_fname)) +def recognize_hust_code()