Add hust_code OCR

a1ca342a · Recolic Keghart · 0d228b85 · a1ca342a
Commit a1ca342a authored 5 years ago by Recolic Keghart
--- a/hust_code.py
+++ b/hust_code.py
 from PIL import Image
-import sys
+import sys, subprocess

-i, o = sys.argv[1:]
+tmp_fname = '/tmp/.recolic-hust.ocr-tmp.png'

-im = Image.open(i)
-# In pass.hust.edu.cn verification code, the second frame is COMPLETE!
-# let's get the second frame.
-im.seek(im.tell()+1)
-im.save(o)
+def prepare_ocr(input_fname, output_fname):
+    im = Image.open(input_fname)
+    # In pass.hust.edu.cn verification code, the second frame is COMPLETE!
+    # let's get the second frame.
+    im.seek(im.tell()+1)
+    
+    # To white-black
+    im = im.convert('L')
+    im = im.point(lambda x: 0 if x<254 else 255, '1')
+    # Try to crop. The Silly ocr doesn't like a single_line good image...
+    # Should use `tesseract output.png stdout --psm 8 -c tessedit_char_whitelist=0123456789` for cropped image.
+    #w, h = im.size
+    #if w == 90 and h == 58:
+    #    im = im.crop((0, 18, w, 39))
+    #else:
+    #    print('WARNING: Expected image w=90, h=58, Got image w={}, h={}.'.format(w, h))
+    # Save for OCR
+    im.save(output_fname)

-im = Image.open(o)
-# To white-black
-im = im.convert('L')
-im = im.point(lambda x: 0 if x<254 else 255, '1')
-# Try to crop
-w, h = im.size
-if w == 90 and h == 58:
-    im = im.crop((0, 18, w, 39))
-else:
-    print('WARNING: Expected image w=90, h=58, Got image w={}, h={}.'.format(w, h))
-# Save for OCR
-im.save(o)
+def do_ocr(input_fname):
+    output = subprocess.check_output(['tesseract', input_fname, 'stdout']).decode('utf-8')
+    for line in output.split('\n'):
+        line = line.strip()
+        if len(line) == 4 and line.isdigit():
+            return line
+    return None

+#i = sys.argv[1]
+#prepare_ocr(i, tmp_fname)
+#print(do_ocr(tmp_fname))

+def recognize_hust_code()