Merge pull request BVLC#3471 from beijbom/clean-datalayer-tutorial

shelhamer · shelhamer · commit 358b60c5018a · 2016-03-01T10:29:15.000-08:00
[example] tutorial on python data layers and multilabel classification
diff --git a/examples/pascal-multilabel-with-datalayer.ipynb b/examples/pascal-multilabel-with-datalayer.ipynb
diff --git a/examples/pycaffe/layers/pascal_multilabel_datalayers.py b/examples/pycaffe/layers/pascal_multilabel_datalayers.py
@@ -0,0 +1,216 @@
+# imports
+import json
+import time
+import pickle
+import scipy.misc
+import skimage.io
+import caffe
+
+import numpy as np
+import os.path as osp
+
+from xml.dom import minidom
+from random import shuffle
+from threading import Thread
+from PIL import Image
+
+from tools import SimpleTransformer
+
+
+class PascalMultilabelDataLayerSync(caffe.Layer):
+
+    """
+    This is a simple syncronous datalayer for training a multilabel model on
+    PASCAL.
+    """
+
+    def setup(self, bottom, top):
+
+        self.top_names = ['data', 'label']
+
+        # === Read input parameters ===
+
+        # params is a python dictionary with layer parameters.
+        params = eval(self.param_str)
+
+        # Check the paramameters for validity.
+        check_params(params)
+
+        # store input as class variables
+        self.batch_size = params['batch_size']
+
+        # Create a batch loader to load the images.
+        self.batch_loader = BatchLoader(params, None)
+
+        # === reshape tops ===
+        # since we use a fixed input image size, we can shape the data layer
+        # once. Else, we'd have to do it in the reshape call.
+        top[0].reshape(
+            self.batch_size, 3, params['im_shape'][0], params['im_shape'][1])
+        # Note the 20 channels (because PASCAL has 20 classes.)
+        top[1].reshape(self.batch_size, 20)
+
+        print_info("PascalMultilabelDataLayerSync", params)
+
+    def forward(self, bottom, top):
+        """
+        Load data.
+        """
+        for itt in range(self.batch_size):
+            # Use the batch loader to load the next image.
+            im, multilabel = self.batch_loader.load_next_image()
+
+            # Add directly to the caffe data layer
+            top[0].data[itt, ...] = im
+            top[1].data[itt, ...] = multilabel
+
+    def reshape(self, bottom, top):
+        """
+        There is no need to reshape the data, since the input is of fixed size
+        (rows and columns)
+        """
+        pass
+
+    def backward(self, top, propagate_down, bottom):
+        """
+        These layers does not back propagate
+        """
+        pass
+
+
+class BatchLoader(object):
+
+    """
+    This class abstracts away the loading of images.
+    Images can either be loaded singly, or in a batch. The latter is used for
+    the asyncronous data layer to preload batches while other processing is
+    performed.
+    """
+
+    def __init__(self, params, result):
+        self.result = result
+        self.batch_size = params['batch_size']
+        self.pascal_root = params['pascal_root']
+        self.im_shape = params['im_shape']
+        # get list of image indexes.
+        list_file = params['split'] + '.txt'
+        self.indexlist = [line.rstrip('\n') for line in open(
+            osp.join(self.pascal_root, 'ImageSets/Main', list_file))]
+        self._cur = 0  # current image
+        # this class does some simple data-manipulations
+        self.transformer = SimpleTransformer()
+
+        print "BatchLoader initialized with {} images".format(
+            len(self.indexlist))
+
+    def load_next_image(self):
+        """
+        Load the next image in a batch.
+        """
+        # Did we finish an epoch?
+        if self._cur == len(self.indexlist):
+            self._cur = 0
+            shuffle(self.indexlist)
+
+        # Load an image
+        index = self.indexlist[self._cur]  # Get the image index
+        image_file_name = index + '.jpg'
+        im = np.asarray(Image.open(
+            osp.join(self.pascal_root, 'JPEGImages', image_file_name)))
+        im = scipy.misc.imresize(im, self.im_shape)  # resize
+
+        # do a simple horizontal flip as data augmentation
+        flip = np.random.choice(2)*2-1
+        im = im[:, ::flip, :]
+
+        # Load and prepare ground truth
+        multilabel = np.zeros(20).astype(np.float32)
+        anns = load_pascal_annotation(index, self.pascal_root)
+        for label in anns['gt_classes']:
+            # in the multilabel problem we don't care how MANY instances
+            # there are of each class. Only if they are present.
+            # The "-1" is b/c we are not interested in the background
+            # class.
+            multilabel[label - 1] = 1
+
+        self._cur += 1
+        return self.transformer.preprocess(im), multilabel
+
+
+def load_pascal_annotation(index, pascal_root):
+    """
+    This code is borrowed from Ross Girshick's FAST-RCNN code
+    (https://github.com/rbgirshick/fast-rcnn).
+    It parses the PASCAL .xml metadata files.
+    See publication for further details: (http://arxiv.org/abs/1504.08083).
+
+    Thanks Ross!
+
+    """
+    classes = ('__background__',  # always index 0
+               'aeroplane', 'bicycle', 'bird', 'boat',
+               'bottle', 'bus', 'car', 'cat', 'chair',
+                         'cow', 'diningtable', 'dog', 'horse',
+                         'motorbike', 'person', 'pottedplant',
+                         'sheep', 'sofa', 'train', 'tvmonitor')
+    class_to_ind = dict(zip(classes, xrange(21)))
+
+    filename = osp.join(pascal_root, 'Annotations', index + '.xml')
+    # print 'Loading: {}'.format(filename)
+
+    def get_data_from_tag(node, tag):
+        return node.getElementsByTagName(tag)[0].childNodes[0].data
+
+    with open(filename) as f:
+        data = minidom.parseString(f.read())
+
+    objs = data.getElementsByTagName('object')
+    num_objs = len(objs)
+
+    boxes = np.zeros((num_objs, 4), dtype=np.uint16)
+    gt_classes = np.zeros((num_objs), dtype=np.int32)
+    overlaps = np.zeros((num_objs, 21), dtype=np.float32)
+
+    # Load object bounding boxes into a data frame.
+    for ix, obj in enumerate(objs):
+        # Make pixel indexes 0-based
+        x1 = float(get_data_from_tag(obj, 'xmin')) - 1
+        y1 = float(get_data_from_tag(obj, 'ymin')) - 1
+        x2 = float(get_data_from_tag(obj, 'xmax')) - 1
+        y2 = float(get_data_from_tag(obj, 'ymax')) - 1
+        cls = class_to_ind[
+            str(get_data_from_tag(obj, "name")).lower().strip()]
+        boxes[ix, :] = [x1, y1, x2, y2]
+        gt_classes[ix] = cls
+        overlaps[ix, cls] = 1.0
+
+    overlaps = scipy.sparse.csr_matrix(overlaps)
+
+    return {'boxes': boxes,
+            'gt_classes': gt_classes,
+            'gt_overlaps': overlaps,
+            'flipped': False,
+            'index': index}
+
+
+def check_params(params):
+    """
+    A utility function to check the parameters for the data layers.
+    """
+    assert 'split' in params.keys(
+    ), 'Params must include split (train, val, or test).'
+
+    required = ['batch_size', 'pascal_root', 'im_shape']
+    for r in required:
+        assert r in params.keys(), 'Params must include {}'.format(r)
+
+
+def print_info(name, params):
+    """
+    Ouput some info regarding the class
+    """
+    print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format(
+        name,
+        params['split'],
+        params['batch_size'],
+        params['im_shape'])
diff --git a/examples/pycaffe/tools.py b/examples/pycaffe/tools.py
@@ -0,0 +1,121 @@
+import numpy as np
+
+
+class SimpleTransformer:
+
+    """
+    SimpleTransformer is a simple class for preprocessing and deprocessing
+    images for caffe.
+    """
+
+    def __init__(self, mean=[128, 128, 128]):
+        self.mean = np.array(mean, dtype=np.float32)
+        self.scale = 1.0
+
+    def set_mean(self, mean):
+        """
+        Set the mean to subtract for centering the data.
+        """
+        self.mean = mean
+
+    def set_scale(self, scale):
+        """
+        Set the data scaling.
+        """
+        self.scale = scale
+
+    def preprocess(self, im):
+        """
+        preprocess() emulate the pre-processing occuring in the vgg16 caffe
+        prototxt.
+        """
+
+        im = np.float32(im)
+        im = im[:, :, ::-1]  # change to BGR
+        im -= self.mean
+        im *= self.scale
+        im = im.transpose((2, 0, 1))
+
+        return im
+
+    def deprocess(self, im):
+        """
+        inverse of preprocess()
+        """
+        im = im.transpose(1, 2, 0)
+        im /= self.scale
+        im += self.mean
+        im = im[:, :, ::-1]  # change to RGB
+
+        return np.uint8(im)
+
+
+class CaffeSolver:
+
+    """
+    Caffesolver is a class for creating a solver.prototxt file. It sets default
+    values and can export a solver parameter file.
+    Note that all parameters are stored as strings. Strings variables are
+    stored as strings in strings.
+    """
+
+    def __init__(self, testnet_prototxt_path="testnet.prototxt",
+                 trainnet_prototxt_path="trainnet.prototxt", debug=False):
+
+        self.sp = {}
+
+        # critical:
+        self.sp['base_lr'] = '0.001'
+        self.sp['momentum'] = '0.9'
+
+        # speed:
+        self.sp['test_iter'] = '100'
+        self.sp['test_interval'] = '250'
+
+        # looks:
+        self.sp['display'] = '25'
+        self.sp['snapshot'] = '2500'
+        self.sp['snapshot_prefix'] = '"snapshot"'  # string withing a string!
+
+        # learning rate policy
+        self.sp['lr_policy'] = '"fixed"'
+
+        # important, but rare:
+        self.sp['gamma'] = '0.1'
+        self.sp['weight_decay'] = '0.0005'
+        self.sp['train_net'] = '"' + trainnet_prototxt_path + '"'
+        self.sp['test_net'] = '"' + testnet_prototxt_path + '"'
+
+        # pretty much never change these.
+        self.sp['max_iter'] = '100000'
+        self.sp['test_initialization'] = 'false'
+        self.sp['average_loss'] = '25'  # this has to do with the display.
+        self.sp['iter_size'] = '1'  # this is for accumulating gradients
+
+        if (debug):
+            self.sp['max_iter'] = '12'
+            self.sp['test_iter'] = '1'
+            self.sp['test_interval'] = '4'
+            self.sp['display'] = '1'
+
+    def add_from_file(self, filepath):
+        """
+        Reads a caffe solver prototxt file and updates the Caffesolver
+        instance parameters.
+        """
+        with open(filepath, 'r') as f:
+            for line in f:
+                if line[0] == '#':
+                    continue
+                splitLine = line.split(':')
+                self.sp[splitLine[0].strip()] = splitLine[1].strip()
+
+    def write(self, filepath):
+        """
+        Export solver parameters to INPUT "filepath". Sorted alphabetically.
+        """
+        f = open(filepath, 'w')
+        for key, value in sorted(self.sp.items()):
+            if not(type(value) is str):
+                raise TypeError('All solver parameters must be strings')
+            f.write('%s: %s\n' % (key, value))