In [360]:
%matplotlib inline

In [212]:
from __future__ import print_function
import cPickle, gzip
import numpy as np
from matplotlib import pyplot as plt
from IPython.html.widgets import interact
from itertools import product
import parallel_coroutine
import theano
import theano.tensor as T
import unittest
import random
import math
from scipy.ndimage import filters

In [4]:
# the MNIST data set
# from http://deeplearning.net/tutorial/gettingstarted.html#index-1
with gzip.open('../Data/mnist.pkl.gz', 'rb') as fobj:

In [205]:
# Helper routines
def shapes(arrays):
return [np.shape(a) for a in arrays]

def means(arrays):
return [np.mean(a) for a in arrays]

def imshow(W, **kwargs):
"""imshow with centered colors
"""
m = np.max(np.abs(W))
kwargs.setdefault("vmin", -m)
kwargs.setdefault("vmax", +m)

return plt.imshow(W, **kwargs)

def nonzero_part_x(img):
"""Clip the image to its nonzero part in x direction
"""
col_max = np.max(img, axis=0) > 0.1
img_nonzero, = np.nonzero(col_max)
return img[:,img_nonzero[0]:img_nonzero[-1] + 1]

def get_random_image(digits, w, h):
"""Get a random image correctly rehaped to be shown with imshow
"""
idx = random.randint(0, digits.shape[0] - 1)
return digits[idx].reshape((w,h))

def multi_digit_example(digigts, w, h):
"""Return multiple digits aranaged in a row
"""
parts = []
margins = []
total_width = 0

while total_width < 3 * w:
margin = random.randint(1, 4)
img = get_random_image(digigts, w, h)
img = nonzero_part_x(img)

parts.append(img)
margins.append(margin)
total_width += img.shape[1] + margin

res = np.zeros((h,total_width))
pos = 0
for (part, margin) in zip(parts, margins):
part_w = part.shape[1]
res[:,pos:pos + part_w] = part
pos += part_w + margin

dx = random.randint(0, total_width - 1)
res = np.roll(res, dx, axis=1)

return res[:,:2*w].reshape((-1))

def execute(func):
"make scoping sensible in Ipython"
func()
return func

In [6]:
# Parameters
w,h = 28, 28
v_size = h * (2 * w)

In [7]:
def generate_transformed_data(source, size):
result = np.zeros((size, v_size))

for i in range(size):
j = random.randint(0, source.shape[0] - 1)
result[i,:] = multi_digit_example(source, w, h).reshape((-1))

return result

transformed_train = generate_transformed_data(train_set[0], int(1e6))
transformed_valid = generate_transformed_data(valid_set[0], int(1e2))

In [238]:
class AutoEncoderModel(object):
def __init__(self, layer_sizes, soft_relu=True, tied_weights=True):
self.layer_sizes = list(layer_sizes)
self.tied_weights = tied_weights

self.soft_relu = soft_relu

self.rng = T.shared_randomstreams.RandomStreams(np.random.randint(2 ** 30))

self.weights_enc, self.weights_dec = self._init_weights(layer_sizes, tied_weights)
self.factors = self._init_factors(layer_sizes)
self.bias_enc = self._init_bias_enc(layer_sizes)
self.bias_dec = self._init_bias_dec(layer_sizes)

self.parameters = []
self.parameters.extend(self.weights_enc)
self.parameters.extend(self.weights_dec)
self.parameters.extend(self.bias_enc)
self.parameters.extend(self.bias_dec)

if tied_weights:
self.parameters.extend(self.factors)

self.parameters = list(set(self.parameters))

self.momentum = self._init_momentum(self.parameters)

self.use_relu = [True for _ in layer_sizes[1:]]
self.use_relu[-1] = False

def _init_weights(self, layer_sizes, tied_weights):
def get_weights(idx, shape):
# TODO: change back to 4.0 * np.sqrt(6.0 / sum(shape))
mag = 1.0 / np.sqrt(np.sum(shape))
weight = np.random.uniform(low=-mag, high=+mag, size=shape)
weight = np.asarray(weight, dtype=theano.config.floatX)
weight_enc = theano.shared(weight, name="weight_enc{0}".format(idx), borrow=True)

if tied_weights:
return weight_enc, weight_enc

weight_dec = theano.shared(np.array(weight), name="weight_dec{0}".format(idx), borrow=True)

return weight_enc, weight_dec

weights_enc, weights_dec = [], []

for (idx, shape) in enumerate(self.weight_shapes(layer_sizes)):
we, wd = get_weights(idx, shape)
weights_enc.append(we)
weights_dec.append(wd)

return weights_enc, weights_dec

def _init_factors(self, layer_sizes):
def make(i, ls):
factor = np.ones(ls, dtype=theano.config.floatX)
return theano.shared(factor, name="factor{0}".format(i), borrow=True)

return [make(i,l) for (i,l) in enumerate(layer_sizes[1:])]

def _init_momentum(self, parameters):
def asshared(x, name):
x = np.asarray(x, dtype=theano.config.floatX)
return theano.shared(x, name=name, borrow=True)

return [asshared(np.zeros(p.get_value(borrow=True).shape), "momentum_" + p.name)
for p in parameters]

def _init_bias_enc(self, layer_sizes):
return [self._get_bias("bias_enc{0}".format(i), s)
for (i,s) in enumerate(layer_sizes[1:])]

def _init_bias_dec(self, layer_sizes):
return [self._get_bias("bias_enc{0}".format(i), s)
for (i,s) in enumerate(layer_sizes[:-1])]

@staticmethod
def _get_bias(name, shape):
bias = np.zeros(shape)
bias = np.asarray(bias, dtype=theano.config.floatX)
bias = theano.shared(bias, name=name, borrow=True)
return bias

def full_transform(self, x, as_value=False, dropout=0.0):

encoded.extend(decoded)
return encoded

rng = self.rng if not as_value else np.random
get_mask = lambda size, p: rng.binomial(n=1, p=(1.0 - p), size=size) / (1.0 - p)
return [get_mask(ls if as_value else b.shape, dropout) for ls,b in zip(self.layer_sizes[1:], self.bias_enc)]

encoded = []

x = (self.transform(W, b, x, as_value=as_value)
if not use_relu else
self.transform_relu(W, b, x, as_value=as_value))
encoded.append(x)

return encoded

decoded = []

reversed(self.use_relu))):
#x = self.dropout(x, dropout, as_value=as_value)
x = self._multiply_factor(x, f, as_value=as_value)
x = (self.transform(W, b, x, transpose=True, as_value=as_value)
if not use_relu else
self.transform_relu(W, b, x, transpose=True, as_value=as_value))
decoded.append(x)

return decoded

def _multiply_factor(self, x, f, as_value=False):
return f.get_value() * x if as_value else f * x

def transform_relu(self, W, b, x, transpose=False, as_value=False):
if as_value:
W = W.get_value(borrow=True)
b = b.get_value(borrow=True)

W = W.T if transpose else W

y = (np.dot(x, W.T) + b) if as_value else (T.dot(x, W.T) + b)

if self.soft_relu:
return np.log(1 + np.exp(y)) if as_value else T.log(1 + T.exp(y))

else:
return np.maximum(0, y) if as_value else T.maximum(0, y)

def transform(self, W, b, x, transpose=False, as_value=False):
if as_value:
W = W.get_value(borrow=True)
b = b.get_value(borrow=True)

W = W.T if transpose else W
y = (np.dot(x, W.T) + b) if as_value else (T.dot(x, W.T) + b)
return (1.0 / (1.0 + np.exp(-y))) if as_value else T.nnet.sigmoid(y)

def corrupt(self, x, corruption_level):
mask = self.rng.binomial(size=x.shape, n=1, p=1 - corruption_level)

def get_training_function(self, data, corruption_level, learning_rate, dropout, regularization, momentum_rate=0.0):
corrupted = self.corrupt(data, corruption_level)
cost = self.get_cost(data, corrupted, dropout, regularization=regularization)

def get_cost(self, data, corrupted, dropout=0.0, regularization=0.0):
activations = self.full_transform(corrupted, dropout=dropout)
reconstructed = activations[-1]

loss = - T.sum(data * T.log(reconstructed) +
(1.0 - data) * T.log(1.0 - reconstructed),
axis=1)

return T.mean(loss)

if False: #abs(momentum_rate) < 1e-5:
for p in self.parameters)

else:
for (p,g) in zip(self.parameters, self.momentum))

for (p,g) in zip(self.parameters, self.momentum))

@staticmethod
def weight_shapes(layer_sizes):
return zip(layer_sizes[1:], layer_sizes[:-1])

def share_data(x):
x = np.asarray(x, dtype=theano.config.floatX)
return theano.shared(x, borrow=True)

def run(self):
training_data = share_data(transformed_train)
shared_test_data = share_data(transformed_valid)

self.model = AutoEncoderModel([v_size, 50 * 50, 50 * 50, 20 * 20], tied_weights=True)
self.training_rate = 0.001
self.corruption_level = 0.5
self.dropout = 0.25
self.momentum = 0.0

batch_size = 20
yield_size = 100
training_epochs = 15
corruption_level = 0.1
regularization = 0.0

n_batches = training_data.get_value(borrow=True).shape[0] / batch_size
batch_index = T.lscalar()
batch_data = training_data[batch_index * batch_size:(batch_index + 1) * batch_size]

example_data = T.dmatrix(name="example_data")

def build_functions():
self.corruption_level,
self.training_rate,
dropout=self.dropout,
regularization=regularization,
momentum_rate=self.momentum)

evaluate_and_update = theano.function([batch_index],
cost,
givens={example_data: batch_data})

test_cost = self.model.get_cost(shared_test_data, shared_test_data)

evaluate_test = theano.function([], test_cost)

return evaluate_and_update, evaluate_test

yield_index = 0
for epoch in range(training_epochs):
evaluate_and_update, evaluate_test = build_functions()

for batch in range(n_batches):
result = evaluate_and_update(batch)

if (yield_index % yield_size) == 0:
test_error = evaluate_test()
yield epoch, batch, result, test_error

else:
yield None

yield_index += 1

yield "new_epoch"

In [239]:
try:
training.stop()

except NameError:
print("no prior training running")

training = parallel_coroutine.ParallelCoroutine()
training.execute(run)
training.start()
training.controls

In [323]:
training.is_running

Out[323]:
False

In [350]:
@execute
def plot_error():
result = [t for t in training.result if isinstance(t, tuple)]
if not len(result):
result = [(0,0,0,0)]

E = np.asarray([t[2] for t in result])
TE = np.asarray([t[3] for t in result])
final = result[-1]
T = np.arange(len(E))

plt.figure(figsize=(4,3))
plt.plot(T, E, label="training")
plt.plot(T, TE, "r-", label="test")
plt.legend(loc="upper right", frameon=False)
plt.title("reconstruction error (run: {0}, step: {1}"
.format(final[0], final[1])
+ (", running)" if training.is_running else ")"))
plt.xlabel("batch #")

plt.ylim(0.0, 1.05 * np.max(E))
plt.xlim(-0.05 * np.max(T), np.max(T))

In [359]:
@interact(idx=(0, max(transformed_valid.shape[0], transformed_train.shape[0]) - 1))
def plot(idx=0, training_example=True):
data = transformed_train if training_example else transformed_valid
idx = min(idx, data.shape[0] - 1)
example = data[idx,:]
transformed = training.model.full_transform(example, True)

def show_example():
plt.subplot(1, 1 + len(transformed), 1)
plt.imshow(example.reshape((h, 2 * w)), vmin=-1, vmax=1, interpolation="nearest")

def show_hidden():
for (i, x) in enumerate(transformed[:-1]):
plt.subplot(1, 1 + len(transformed), 2 + i)
plt.imshow(reshape(x), vmin=-1, vmax=1, interpolation="nearest")
plt.title("[{0:.1e}, {1:.1e}]".format(np.min(x), np.max(x)))

def show_reconstruction():
plt.subplot(1, 1 + len(transformed), 1 + len(transformed))
x = transformed[-1]
plt.imshow(x.reshape((h, 2 * w)), vmin=-1, vmax=1, interpolation="nearest")

def reshape(x):
w = int(math.sqrt(x.size))
return x.reshape((w, w))

plt.figure(figsize=(3 * (1 + len(transformed)), 2.75))
show_example()
show_hidden()
show_reconstruction()
plt.suptitle("{type} example {example}"
.format(example=idx, type=("training" if training_example else "test")))


In [358]:
@interact(idx=(0, max(training.model.layer_sizes) - 1), layer=(0,len(training.model.layer_sizes) - 1))
def plot(idx=0, layer=0):
weights_enc = training.model.weights_enc[layer].get_value(borrow=True)
weights_dec = training.model.weights_dec[layer].get_value(borrow=True)

assert weights_enc.shape == weights_dec.shape

idx = min(idx, weights_enc.shape[0] - 1)

if layer > 0:
layer_w = int(np.sqrt(weights_enc.shape[1]))
layer_h = layer_w

else:
layer_w = 2 * w
layer_h = h

plt.figure(figsize=(6,5))
imshow(weights_enc[idx,:].reshape((layer_h, layer_w)), interpolation="nearest")
plt.colorbar(orientation="horizontal")
plt.title("feature {0} in layer {1}".format(idx, layer))

In [329]:
parameters = {"values": [p.get_value(borrow=True) for p in training.model.parameters],
"layer_sizes": training.model.layer_sizes,
"tied_weights": training.model.tied_weights,
"relu": training.model.use_relu,
"soft_relu": training.model.soft_relu,
"result": training.result,
"training_rate": training.training_rate,
"dropout": training.dropout,
"corruptions": training.corruption_level}

In [330]:
#with open("cache/parmeters-20150128.pck", "w") as fobj:
#    cPickle.dump(parameters, fobj)

In []: