Python 3 fixes (#12)

nektor211 · emedvedev · commit d299c5078511 · 2017-10-05T22:51:59.000+02:00
* aocr.util.data_get fixed python3 StringIO import

* util.dataset.generate: fixed line parsing

* .gitignore: added *~

* util.dataset.generate: use enumerate

* util.dataset.generate: fix bytes handling

* model.model: use xrange, pass range as list

* utils.data_gen: python3 IO handling fixed
diff --git a/.gitignore b/.gitignore
@@ -116,3 +116,5 @@ misc/
 data/evaluation_data
 .DS_Store
 .venv
+
+*~
diff --git a/aocr/model/model.py b/aocr/model/model.py
@@ -172,7 +172,7 @@ def __init__(self,
             )
 
             insert = table.insert(
-                tf.constant(range(len(DataGen.CHARMAP)), dtype=tf.int64),
+                tf.constant(list(range(len(DataGen.CHARMAP))), dtype=tf.int64),
                 tf.constant(DataGen.CHARMAP),
             )
 
@@ -425,17 +425,17 @@ def visualize_attention(self, filename, attentions, output, label, flag_incorrec
                         (mw, h),
                         Image.ANTIALIAS)
                 img_data = np.asarray(img, dtype=np.uint8)
-                for idx in range(len(output)):
+                for idx in xrange(len(output)):
                     output_filename = os.path.join(output_dir, 'image_%d.jpg' % (idx))
                     attention = attentions[idx][:(int(mw/4)-1)]
                     attention_orig = np.zeros(mw)
-                    for i in range(mw):
+                    for i in xrange(mw):
                         if i/4-1 > 0 and i/4-1 < len(attention):
                             attention_orig[i] = attention[int(i/4)-1]
                     attention_orig = np.convolve(attention_orig, [0.199547, 0.200226, 0.200454, 0.200226, 0.199547], mode='same')
                     attention_orig = np.maximum(attention_orig, 0.3)
                     attention_out = np.zeros((h, mw))
-                    for i in range(mw):
+                    for i in xrange(mw):
                         attention_out[:, i] = attention_orig[i]
                     if len(img_data.shape) == 3:
                         attention_out = attention_out[:, :, np.newaxis]
diff --git a/aocr/util/data_gen.py b/aocr/util/data_gen.py
@@ -3,8 +3,10 @@
 
 from .bucketdata import BucketData
 from PIL import Image
-from StringIO import StringIO
-
+try:
+    from StringIO import StringIO as IO
+except ImportError:
+    from io import BytesIO as IO # to handle py2 vs 3
 
 class DataGen(object):
     GO_ID = 1
@@ -54,7 +56,7 @@ def gen(self, batch_size):
                     raw_images, raw_labels = sess.run([images, labels])
                     for img, lex in zip(raw_images, raw_labels):
 
-                        if self.max_width and (Image.open(StringIO(img)).size[0] <= self.max_width):
+                        if self.max_width and (Image.open(IO(img)).size[0] <= self.max_width):
 
                             word = self.convert_lex(lex)
 
@@ -71,6 +73,8 @@ def gen(self, batch_size):
         self.clear()
 
     def convert_lex(self, lex):
+        if isinstance(lex, bytes):
+            lex = lex.decode()
         assert lex and len(lex) < self.bucket_specs[-1][1]
 
         return np.array(
diff --git a/aocr/util/dataset.py b/aocr/util/dataset.py
@@ -1,6 +1,12 @@
 import tensorflow as tf
 import logging
 
+import sys
+
+if sys.version_info[0] < 3:
+    text_type = unicode
+else:
+    text_type = str
 
 def _bytes_feature(value):
     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
@@ -17,21 +23,21 @@ def generate(annotations_path, output_path, log_step=5000):
     writer = tf.python_io.TFRecordWriter(output_path)
     count = 0
 
-    with open(annotations_path, 'r') as file:
-        for (img_path, label) in file.readlines():
-            idx += 1
+    with open(annotations_path, 'r') as f:
+        for idx, line in enumerate(f):
+            (img_path, label) = line.rstrip('\n').split('\t', 1)
             with open(img_path, 'rb') as img_file:
                 img = img_file.read()
 
             example = tf.train.Example(features=tf.train.Features(feature={
                 'image': _bytes_feature(img),
-                'label': _bytes_feature(label)}))
+                'label': _bytes_feature(text_type.encode(label))}))
 
             writer.write(example.SerializeToString())
 
             if idx % log_step == 0:
-                logging.info('Processed %s pairs.', idx)
+                logging.info('Processed %s pairs.', idx+1)
 
-    logging.info('Dataset is ready: %i pairs.', idx)
+    logging.info('Dataset is ready: %i pairs.', idx+1)
 
     writer.close()