Skip to content

Commit

Permalink
Moved pyocr.get_available_tools() into a method
Browse files Browse the repository at this point in the history
  • Loading branch information
danielquinn committed Feb 21, 2016
1 parent 5f0962b commit 3a7923e
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ matrix:
env: TOXENV=py34
- python: 3.5
env: TOXENV=py35
- env: TOXENV=pep8
- python: 3.5
env: TOXENV=pep8

install:
- pip install --requirement requirements.txt
Expand Down
27 changes: 13 additions & 14 deletions src/documents/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,6 @@
from .languages import ISO639


def image_to_string(args):
self, png, lang = args
with Image.open(os.path.join(self.SCRATCH, png)) as f:
if self.OCR.can_detect_orientation():
try:
orientation = self.OCR.detect_orientation(f, lang=lang)
f = f.rotate(orientation["angle"], expand=1)
except TesseractError:
pass
return self.OCR.image_to_string(f, lang=lang)


class OCRError(Exception):
pass

Expand All @@ -61,7 +49,6 @@ class Consumer(object):
CONSUME = settings.CONSUMPTION_DIR
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None

OCR = pyocr.get_available_tools()[0]
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE

REGEX_TITLE = re.compile(
Expand Down Expand Up @@ -239,12 +226,24 @@ def _ocr(self, pngs, lang):

with Pool(processes=self.THREADS) as pool:
r = pool.map(
image_to_string, itertools.product([self], pngs, [lang]))
self.image_to_string, itertools.product(pngs, [lang]))
r = " ".join(r)

# Strip out excess white space to allow matching to go smoother
return re.sub(r"\s+", " ", r)

def image_to_string(self, args):
png, lang = args
ocr = pyocr.get_available_tools()[0]
with Image.open(os.path.join(self.SCRATCH, png)) as f:
if ocr.can_detect_orientation():
try:
orientation = ocr.detect_orientation(f, lang=lang)
f = f.rotate(orientation["angle"], expand=1)
except TesseractError:
pass
return ocr.image_to_string(f, lang=lang)

def _guess_attributes_from_name(self, parseable):
"""
We use a crude naming convention to make handling the sender, title,
Expand Down

0 comments on commit 3a7923e

Please sign in to comment.