In [360]:
%matplotlib inline
In [212]:
from __future__ import print_function
import cPickle, gzip
import numpy as np
from matplotlib import pyplot as plt
from IPython.html.widgets import interact
from itertools import product
import parallel_coroutine
import threading
import theano
import theano.tensor as T
import unittest
import random
import math
from scipy.ndimage import filters
In [4]:
# the MNIST data set 
# from http://deeplearning.net/tutorial/gettingstarted.html#index-1
with gzip.open('../Data/mnist.pkl.gz', 'rb') as fobj:
    train_set, valid_set, test_set = cPickle.load(fobj)
In [205]:
# Helper routines
def shapes(arrays):
    return [np.shape(a) for a in arrays]

def means(arrays):
    return [np.mean(a) for a in arrays]

def imshow(W, **kwargs):
    """imshow with centered colors
    """
    m = np.max(np.abs(W))
    kwargs.setdefault("vmin", -m)
    kwargs.setdefault("vmax", +m)
    
    return plt.imshow(W, **kwargs)

def nonzero_part_x(img):
    """Clip the image to its nonzero part in x direction
    """
    col_max = np.max(img, axis=0) > 0.1
    img_nonzero, = np.nonzero(col_max)
    return img[:,img_nonzero[0]:img_nonzero[-1] + 1]

def get_random_image(digits, w, h):
    """Get a random image correctly rehaped to be shown with imshow
    """
    idx = random.randint(0, digits.shape[0] - 1)
    return digits[idx].reshape((w,h))
    
def multi_digit_example(digigts, w, h):
    """Return multiple digits aranaged in a row
    """
    parts = []
    margins = []
    total_width = 0
    
    while total_width < 3 * w:
        margin = random.randint(1, 4)
        img = get_random_image(digigts, w, h)
        img = nonzero_part_x(img)
        
        parts.append(img)
        margins.append(margin)
        total_width += img.shape[1] + margin
    
    res = np.zeros((h,total_width))
    pos = 0
    for (part, margin) in zip(parts, margins):
        part_w = part.shape[1]
        res[:,pos:pos + part_w] = part
        pos += part_w + margin
    
    dx = random.randint(0, total_width - 1)
    res = np.roll(res, dx, axis=1)
    
    return res[:,:2*w].reshape((-1))

def execute(func):
    "make scoping sensible in Ipython"
    func()
    return func
In [6]:
# Parameters
w,h = 28, 28
v_size = h * (2 * w)
In [7]:
def generate_transformed_data(source, size):
    result = np.zeros((size, v_size))
    
    for i in range(size):
        j = random.randint(0, source.shape[0] - 1)
        result[i,:] = multi_digit_example(source, w, h).reshape((-1))
    
    return result
    
transformed_train = generate_transformed_data(train_set[0], int(1e6))
transformed_valid = generate_transformed_data(valid_set[0], int(1e2))
In [238]:
class AutoEncoderModel(object):
    def __init__(self, layer_sizes, soft_relu=True, tied_weights=True):
        self.layer_sizes = list(layer_sizes)
        self.tied_weights = tied_weights
        
        self.soft_relu = soft_relu
        
        self.rng = T.shared_randomstreams.RandomStreams(np.random.randint(2 ** 30))
        
        self.weights_enc, self.weights_dec = self._init_weights(layer_sizes, tied_weights)
        self.factors = self._init_factors(layer_sizes)
        self.bias_enc = self._init_bias_enc(layer_sizes)
        self.bias_dec = self._init_bias_dec(layer_sizes)

        self.parameters = []
        self.parameters.extend(self.weights_enc)
        self.parameters.extend(self.weights_dec)
        self.parameters.extend(self.bias_enc)
        self.parameters.extend(self.bias_dec)
        
        if tied_weights:
            self.parameters.extend(self.factors)
        
        self.parameters = list(set(self.parameters))
        
        self.momentum = self._init_momentum(self.parameters)
        
        self.use_relu = [True for _ in layer_sizes[1:]]
        self.use_relu[-1] = False
        
    def _init_weights(self, layer_sizes, tied_weights):
        def get_weights(idx, shape):
            # TODO: change back to 4.0 * np.sqrt(6.0 / sum(shape))
            mag = 1.0 / np.sqrt(np.sum(shape))
            weight = np.random.uniform(low=-mag, high=+mag, size=shape)
            weight = np.asarray(weight, dtype=theano.config.floatX)
            weight_enc = theano.shared(weight, name="weight_enc{0}".format(idx), borrow=True)
            
            if tied_weights:
                return weight_enc, weight_enc
                
            weight_dec = theano.shared(np.array(weight), name="weight_dec{0}".format(idx), borrow=True)
            
            return weight_enc, weight_dec
        
        weights_enc, weights_dec = [], []
        
        for (idx, shape) in enumerate(self.weight_shapes(layer_sizes)):
            we, wd = get_weights(idx, shape)
            weights_enc.append(we)
            weights_dec.append(wd)
        
        return weights_enc, weights_dec
    
    def _init_factors(self, layer_sizes):
        def make(i, ls):
            factor = np.ones(ls, dtype=theano.config.floatX)
            return theano.shared(factor, name="factor{0}".format(i), borrow=True)
        
        return [make(i,l) for (i,l) in enumerate(layer_sizes[1:])]
    
    def _init_momentum(self, parameters):
        def asshared(x, name):
            x = np.asarray(x, dtype=theano.config.floatX)
            return theano.shared(x, name=name, borrow=True)
        
        return [asshared(np.zeros(p.get_value(borrow=True).shape), "momentum_" + p.name)
                for p in parameters]
    
    def _init_bias_enc(self, layer_sizes):
        return [self._get_bias("bias_enc{0}".format(i), s) 
                for (i,s) in enumerate(layer_sizes[1:])]
    
    def _init_bias_dec(self, layer_sizes):
        return [self._get_bias("bias_enc{0}".format(i), s) 
                for (i,s) in enumerate(layer_sizes[:-1])]
    
    @staticmethod
    def _get_bias(name, shape):
        bias = np.zeros(shape)
        bias = np.asarray(bias, dtype=theano.config.floatX)
        bias = theano.shared(bias, name=name, borrow=True)
        return bias
    
    def full_transform(self, x, as_value=False, dropout=0.0):
        dropout_masks = self.get_dropout_masks(dropout, as_value)
        encoded = self.encode(x, as_value=as_value, dropout_masks=dropout_masks)
        decoded = self.decode(encoded[-1], as_value=as_value, dropout_masks=dropout_masks)
        
        encoded.extend(decoded)
        return encoded
    
    def get_dropout_masks(self, dropout, as_value=False):
        rng = self.rng if not as_value else np.random
        get_mask = lambda size, p: rng.binomial(n=1, p=(1.0 - p), size=size) / (1.0 - p)
        return [get_mask(ls if as_value else b.shape, dropout) for ls,b in zip(self.layer_sizes[1:], self.bias_enc)]
        
    def encode(self, x, as_value=False, dropout_masks=[]):
        encoded = []
        
        for (W,b,mask,use_relu) in zip(self.weights_enc, self.bias_enc, dropout_masks, self.use_relu):
            x = (self.transform(W, b, x, as_value=as_value) 
                 if not use_relu else 
                 self.transform_relu(W, b, x, as_value=as_value))
            x = mask * x
            encoded.append(x)
            
        return encoded
    
    def decode(self, x, as_value=False, dropout_masks=[]):
        decoded = []
        
        masks = [1.0]
        masks.extend(dropout_masks[:-1])
        
        for (W, b, f, mask, use_relu) in reversed(zip(self.weights_dec, self.bias_dec, self.factors, masks, 
                                                      reversed(self.use_relu))):
            #x = self.dropout(x, dropout, as_value=as_value)
            x = self._multiply_factor(x, f, as_value=as_value)
            x = (self.transform(W, b, x, transpose=True, as_value=as_value) 
                 if not use_relu else
                 self.transform_relu(W, b, x, transpose=True, as_value=as_value))
            x = mask * x
            decoded.append(x)
            
        return decoded
    
    def _multiply_factor(self, x, f, as_value=False):
        return f.get_value() * x if as_value else f * x
    
    def transform_relu(self, W, b, x, transpose=False, as_value=False):
        if as_value:
            W = W.get_value(borrow=True)
            b = b.get_value(borrow=True)
                
        W = W.T if transpose else W
        
        y = (np.dot(x, W.T) + b) if as_value else (T.dot(x, W.T) + b)
        
        if self.soft_relu:
            return np.log(1 + np.exp(y)) if as_value else T.log(1 + T.exp(y))
        
        else:
            return np.maximum(0, y) if as_value else T.maximum(0, y)
    
    def transform(self, W, b, x, transpose=False, as_value=False):
        if as_value:
            W = W.get_value(borrow=True)
            b = b.get_value(borrow=True)
                
        W = W.T if transpose else W
        y = (np.dot(x, W.T) + b) if as_value else (T.dot(x, W.T) + b)
        return (1.0 / (1.0 + np.exp(-y))) if as_value else T.nnet.sigmoid(y)

    def corrupt(self, x, corruption_level):
        mask = self.rng.binomial(size=x.shape, n=1, p=1 - corruption_level)
        return mask * x
    
    def get_training_function(self, data, corruption_level, learning_rate, dropout, regularization, momentum_rate=0.0):
        corrupted = self.corrupt(data, corruption_level)
        cost = self.get_cost(data, corrupted, dropout, regularization=regularization)
        updates = self.get_updates(cost, learning_rate, momentum_rate=momentum_rate)
        
        return (cost, updates)
    
    def get_cost(self, data, corrupted, dropout=0.0, regularization=0.0):
        activations = self.full_transform(corrupted, dropout=dropout)
        reconstructed = activations[-1]
        
        loss = - T.sum(data * T.log(reconstructed) +
                       (1.0 - data) * T.log(1.0 - reconstructed),
                       axis=1)
        
        return T.mean(loss) 
    
    def get_updates(self, cost, learning_rate, momentum_rate=0.0):
        updates = []
        
        if False: #abs(momentum_rate) < 1e-5:
            updates.extend((p, p - learning_rate * T.grad(cost, p))
                           for p in self.parameters)
        
        else:
            updates.extend((p, p + g) 
                       for (p,g) in zip(self.parameters, self.momentum))
        
            updates.extend((g, momentum_rate * g - learning_rate * T.grad(cost, p))
                           for (p,g) in zip(self.parameters, self.momentum))
        
        return updates
    
    @staticmethod
    def weight_shapes(layer_sizes):
        return zip(layer_sizes[1:], layer_sizes[:-1])
    
def share_data(x):
    x = np.asarray(x, dtype=theano.config.floatX)
    return theano.shared(x, borrow=True)

def run(self):
    training_data = share_data(transformed_train)
    shared_test_data = share_data(transformed_valid)
    
    self.model = AutoEncoderModel([v_size, 50 * 50, 50 * 50, 20 * 20], tied_weights=True)
    self.training_rate = 0.001
    self.corruption_level = 0.5
    self.dropout = 0.25
    self.momentum = 0.0
    
    batch_size = 20
    yield_size = 100
    training_epochs = 15
    corruption_level = 0.1
    regularization = 0.0
    
    n_batches = training_data.get_value(borrow=True).shape[0] / batch_size
    batch_index = T.lscalar()
    batch_data = training_data[batch_index * batch_size:(batch_index + 1) * batch_size]
    
    example_data = T.dmatrix(name="example_data")
    
    def build_functions():
        cost, updates = self.model.get_training_function(example_data, 
                                                         self.corruption_level, 
                                                         self.training_rate, 
                                                         dropout=self.dropout,
                                                         regularization=regularization,
                                                         momentum_rate=self.momentum)
    
        evaluate_and_update = theano.function([batch_index],
                                              cost,
                                              updates=updates,
                                              givens={example_data: batch_data})
        
        test_cost = self.model.get_cost(shared_test_data, shared_test_data)
    
        evaluate_test = theano.function([], test_cost)
        
        return evaluate_and_update, evaluate_test
        
    yield_index = 0
    for epoch in range(training_epochs):
        evaluate_and_update, evaluate_test = build_functions()
        
        for batch in range(n_batches):
            result = evaluate_and_update(batch)
        
            if (yield_index % yield_size) == 0:
                test_error = evaluate_test()
                yield epoch, batch, result, test_error

            else:
                yield None
            
            yield_index += 1
        
        yield "new_epoch"
In [239]:
try:
    training.stop()

except NameError:
    print("no prior training running")

training = parallel_coroutine.ParallelCoroutine()
training.execute(run)
training.start()
training.controls
In [323]:
training.is_running
Out[323]:
False
In [350]:
@execute
def plot_error():
    result = [t for t in training.result if isinstance(t, tuple)]
    if not len(result):
        result = [(0,0,0,0)]
        
    E = np.asarray([t[2] for t in result])
    TE = np.asarray([t[3] for t in result])
    final = result[-1]
    T = np.arange(len(E))
    
    plt.figure(figsize=(4,3))
    plt.plot(T, E, label="training")
    plt.plot(T, TE, "r-", label="test")
    plt.legend(loc="upper right", frameon=False)
    plt.title("reconstruction error (run: {0}, step: {1}"
              .format(final[0], final[1]) 
              + (", running)" if training.is_running else ")"))
    plt.xlabel("batch #")

    plt.ylim(0.0, 1.05 * np.max(E))
    plt.xlim(-0.05 * np.max(T), np.max(T))
In [359]:
@interact(idx=(0, max(transformed_valid.shape[0], transformed_train.shape[0]) - 1))
def plot(idx=0, training_example=True):
    data = transformed_train if training_example else transformed_valid
    idx = min(idx, data.shape[0] - 1)
    example = data[idx,:]
    transformed = training.model.full_transform(example, True)
    
    def show_example():
        plt.subplot(1, 1 + len(transformed), 1)
        plt.imshow(example.reshape((h, 2 * w)), vmin=-1, vmax=1, interpolation="nearest")
    
    def show_hidden():
        for (i, x) in enumerate(transformed[:-1]):
            plt.subplot(1, 1 + len(transformed), 2 + i)
            plt.imshow(reshape(x), vmin=-1, vmax=1, interpolation="nearest")
            plt.title("[{0:.1e}, {1:.1e}]".format(np.min(x), np.max(x)))
    
    def show_reconstruction():
        plt.subplot(1, 1 + len(transformed), 1 + len(transformed))
        x = transformed[-1]
        plt.imshow(x.reshape((h, 2 * w)), vmin=-1, vmax=1, interpolation="nearest")
    
    def reshape(x):
        w = int(math.sqrt(x.size))
        return x.reshape((w, w))
    
    plt.figure(figsize=(3 * (1 + len(transformed)), 2.75))
    show_example()
    show_hidden()
    show_reconstruction()   
    plt.suptitle("{type} example {example}"
                 .format(example=idx, type=("training" if training_example else "test")))
    
In [358]:
@interact(idx=(0, max(training.model.layer_sizes) - 1), layer=(0,len(training.model.layer_sizes) - 1))
def plot(idx=0, layer=0):
    weights_enc = training.model.weights_enc[layer].get_value(borrow=True)
    weights_dec = training.model.weights_dec[layer].get_value(borrow=True)
    
    assert weights_enc.shape == weights_dec.shape
    
    idx = min(idx, weights_enc.shape[0] - 1)
    
    if layer > 0:
        layer_w = int(np.sqrt(weights_enc.shape[1]))
        layer_h = layer_w
    
    else:
        layer_w = 2 * w
        layer_h = h
    
    plt.figure(figsize=(6,5))
    imshow(weights_enc[idx,:].reshape((layer_h, layer_w)), interpolation="nearest")
    plt.colorbar(orientation="horizontal")
    plt.title("feature {0} in layer {1}".format(idx, layer))
In [329]:
parameters = {"values": [p.get_value(borrow=True) for p in training.model.parameters],
              "layer_sizes": training.model.layer_sizes,
              "tied_weights": training.model.tied_weights,
              "relu": training.model.use_relu,
              "soft_relu": training.model.soft_relu,
              "result": training.result,
              "training_rate": training.training_rate,
              "dropout": training.dropout,
              "corruptions": training.corruption_level}
In [330]:
#with open("cache/parmeters-20150128.pck", "w") as fobj:
#    cPickle.dump(parameters, fobj)
In []: