Initial commit

This commit is contained in:
Laurent El Shafey 2024-12-10 08:56:11 -08:00
commit 9fdd561586
246 changed files with 58283 additions and 0 deletions

1
CONTRIBUTING.md Normal file
View file

@ -0,0 +1 @@
External contributions are not accepted, sorry!

9
LICENSE Normal file
View file

@ -0,0 +1,9 @@
Copyright 2023 Google LLC.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

28
Makefile-distrib Executable file
View file

@ -0,0 +1,28 @@
MODELNAME := _ConvNet
INCLUDES := -I$(PYTHON_INCLUDE_PATH) -I$(NUMPY_INCLUDE_PATH) -I./include -I./include/common -I./include/cudaconv2 -I./include/nvmatrix
LIB := -lpthread -L$(ATLAS_LIB_PATH) -L$(CUDA_INSTALL_PATH)/lib64 -lcblas
USECUBLAS := 1
PYTHON_VERSION=$(shell python -V 2>&1 | cut -d ' ' -f 2 | cut -d '.' -f 1,2)
LIB += -lpython$(PYTHON_VERSION)
GENCODE_ARCH := -gencode=arch=compute_20,code=\"sm_20,compute_20\"
COMMONFLAGS := -DNUMPY_INTERFACE -DMODELNAME=$(MODELNAME) -DINITNAME=init$(MODELNAME)
EXECUTABLE := $(MODELNAME).so
CUFILES := $(shell echo src/*.cu src/cudaconv2/*.cu src/nvmatrix/*.cu)
CU_DEPS := $(shell echo include/*.cuh include/cudaconv2/*.cuh include/nvmatrix/*.cuh)
CCFILES := $(shell echo src/common/*.cpp)
C_DEPS := $(shell echo include/common/*.h)
include common-gcc-cuda-4.0.mk
makedirectories:
$(VERBOSE)mkdir -p $(LIBDIR)
$(VERBOSE)mkdir -p $(OBJDIR)/src/cudaconv2
$(VERBOSE)mkdir -p $(OBJDIR)/src/nvmatrix
$(VERBOSE)mkdir -p $(OBJDIR)/src/common
$(VERBOSE)mkdir -p $(TARGETDIR)

9
README.md Normal file
View file

@ -0,0 +1,9 @@
# AlexNet
This package contains the original AlexNet code.
Krizhevsky, A., Sutskever, I. & Hinton, G. E. (2012).
ImageNet Classification with Deep Convolutional Neural Networks.
In F. Pereira, C. J. C. Burges, L. Bottou & K. Q. Weinberger (ed.),
Advances in Neural Information Processing Systems 25 (pp. 1097--1105).
Curran Associates, Inc. .

2
SdkMasterLog.csv Normal file
View file

@ -0,0 +1,2 @@
deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 4, Device = Tesla S2050, Device = Tesla S2050
deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 4, Device = Tesla S2050, Device = Tesla S2050
1 deviceQuery CUDA Driver = CUDART CUDA Driver Version = 4.2 CUDA Runtime Version = 4.2 NumDevs = 4 Device = Tesla S2050 Device = Tesla S2050
2 deviceQuery CUDA Driver = CUDART CUDA Driver Version = 4.2 CUDA Runtime Version = 4.2 NumDevs = 4 Device = Tesla S2050 Device = Tesla S2050

23
avg-test.py Executable file
View file

@ -0,0 +1,23 @@
from util import *
import os
import sys
import re
import random as r
import os
def do_avg(paths, tgtpath, coeffs):
for i,f in enumerate(sorted(os.listdir(paths[0]))):
b = int(re.match('test_preds_(\d+)', f).group(1))
dics = [unpickle(os.path.join(p, f)) for p in paths]
preds = sum(c * d['data'] for c,d in zip(coeffs, dics))
pickle(os.path.join(tgtpath, 'test_preds_%d' % b), {'data': preds})
print "Wrote batch %d" % b
if __name__ == "__main__":
paths = sys.argv[1].split(',')
tgtpath = sys.argv[2]
if not os.path.exists(tgtpath):
os.makedirs(tgtpath)
coeffs = [float(x) for x in sys.argv[3].split(',')]
do_avg(paths, tgtpath, coeffs)

108
avg-valid.py Executable file
View file

@ -0,0 +1,108 @@
from util import *
import os
import sys
import re
import random as r
import numpy.random as nr
from math import sqrt
#VALID_PATH = '/ais/gobi3/u/kriz/lsvrc-2012'
VALID_PATH = '/storage/lsvrc-2012'
def compute_top5(preds, labels):
errs = 0
for c in xrange(preds.shape[0]):
err = True
for i in xrange(5):
top = preds[c,:].argmax()
if top == labels[c]:
err = False
break
preds[c, top] = -1
errs += err
return errs
#top5 = [[k[0] for k in sorted(zip(xrange(preds.shape[1]), preds[c,:]), key=lambda x:x[1], reverse=True)[:5]] for c in xrange(preds.shape[0])]
#return sum(l not in t for l,t in zip(labels, top5))
def do_avg(paths, coeffs, top5=False):
#coeffs = [float(x) for x in sys.argv[2].split(',')]
off = unpickle(os.path.join(VALID_PATH, 'batches.meta'))['label_offset']
errs1, errs5, cases = 0, 0, 0
for i,f in enumerate(sorted(os.listdir(paths[0]))):
b = int(re.match('test_preds_(\d+)', f).group(1))
dics = [unpickle(os.path.join(p, f)) for p in paths]
dicv = unpickle(os.path.join(VALID_PATH, 'data_batch_%d' % b))
labels = n.array([d[1]+off for d in dicv[2]])
assert labels.min >= 0 and labels.max() < 1000
preds = sum(c * d['data'] for c,d in zip(coeffs, dics))
assert preds.shape[1] == 1000
err1 = sum(preds.argmax(1) != labels)
err5 = compute_top5(preds, labels) if top5 else 0
errs1 += err1
errs5 += err5
cases += preds.shape[0]
#print "%.4f %.4f" % (float(err1) / preds.shape[0], float(err5) / preds.shape[0])
return errs1 / float(cases), errs5 / float(cases)
#print "Average error rate with coeffs %s: %.4f %.4f" % (", ".join("%.2f" % f for f in coeffs), errs1 / float(cases), errs5 / float(cases))
def find_coeffs(paths, passes=5, cmin=0.0, cmax=1.0, step=0.05):
coeffs = [(cmax-cmin)/2 for i in xrange(len(paths))]
#coeffs = [cmin + (r.random() * (cmax-cmin)) for i in xrange(len(paths))]
best1 = do_avg(paths, coeffs, top5=True)[1]
changed = -1
for p in xrange(passes):
print "Pass %d" % p
for i in xrange(len(coeffs)):
if changed == i:
changed = -2
break
for c in [cmin + c * step for c in xrange(1+int((cmax-cmin)/step))]:
oldc = coeffs[i]
coeffs[i] = c
err = do_avg(paths, coeffs, top5=True)[1]
if err < best1:
best1 = err
changed = i
else:
coeffs[i] = oldc
print "Best error rate: %.4f, coeffs: [%s]" % (best1, ",".join("%.2f" % f for f in coeffs))
if changed == -2:
break
def find_coeffs2(paths, passes=50):
#coeffs = n.array([r.random() for i in xrange(len(paths))])
coeffs = n.array([0.5 for i in xrange(len(paths))])
coeffs /= coeffs.sum()
#crange = [[cmin + c * step for c in xrange(1+int((cmax-cmin)/step))] for i in xrange(len(paths))]
for p in xrange(passes):
print "Pass %d" % p
for i in nr.permutation(range(coeffs.shape[0])):
#bigger = r.randint(0,2) == 0
#c = coeffs[i] + r.random() * (1 - coeffs[i]) if bigger else r.random() * coeffs[i]
c = min(1, max(0, coeffs[i] + nr.randn() / (2*sqrt(1+p))))
oldc = coeffs[i]
coeffs[i] = c
err = do_avg(paths, coeffs, top5=True)[1]
changed = ""
if err < best1:
best1 = err
changed = "*"
#crange = [[cmin + x * step for x in xrange(1+int((cmax-cmin)/step))] for i in xrange(len(paths))]
else:
coeffs[i] = oldc
coeffs /= coeffs.sum()
#crange[i].remove(c)
print "Best error rate: %.4f, coeffs: [%s]%s" % (best1, ",".join("%.4f" % f for f in coeffs), changed)
if __name__ == "__main__":
paths = sys.argv[1].split(',')
if len(sys.argv) == 2:
find_coeffs(paths)
else:
coeffs = n.array([float(x) for x in sys.argv[2].split(',')])
errs = do_avg(paths, coeffs, top5=True)
print "Average error rate with coeffs %s: %.4f %.4f" % (", ".join("%.2f" % f for f in coeffs), errs[0], errs[1])

19
build.sh Executable file
View file

@ -0,0 +1,19 @@
#!/bin/sh
NVMATRIX=$NVMATRIX_K20X_INCLUDE/..
CUDACONV=$NVCONV2_K20X_INCLUDE/..
rm -rf build
rm *.so
mkdir -p build
cp -r src build/
cp -r include build/
cp $NVMATRIX/src/nvmatrix.cu $NVMATRIX/src/nvmatrix_kernels.cu $NVMATRIX/src/gpu_locking.cpp build/src
cp $NVMATRIX/include/nvmatrix.cuh $NVMATRIX/include/nvmatrix_kernels.cuh $NVMATRIX/include/nvmatrix_operators.cuh $NVMATRIX/include/gpu_locking.h build/include
cp $CUDACONV/src/conv_util.cu $CUDACONV/src/filter_acts.cu $CUDACONV/src/weight_acts.cu $CUDACONV/src/img_acts.cu build/src
cp $CUDACONV/include/conv_util.cuh $CUDACONV/include/cudaconv2.cuh build/include
cp Makefile-all build/Makefile
cd build && make -j kepler=1 $* && cd ..
ln -fs build/*.so ./

336
convdata.py Executable file
View file

@ -0,0 +1,336 @@
from data import *
import numpy.random as nr
import numpy as n
import random as r
from time import time
from threading import Thread
from math import sqrt
import sys
from pylab import *
class FlatMemoryDataProvider(LabeledMemoryDataProvider):
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.data_mean = self.batch_meta['data_mean'].reshape((self.batch_meta['data_mean'].shape[0], 1))
# Subtract the mean from the data and make sure that both data and
# labels are in single-precision floating point.
for d in self.data_dic:
# This converts the data matrix to single precision and makes sure that it is C-ordered
d['data'] = n.require((d['data'] - self.data_mean), dtype=n.single, requirements='C')
d['labels'] = d['labels'].astype(n.int)
d['labelprobs'] = n.zeros((self.get_num_classes(), d['data'].shape[1]), dtype=n.single)
for c in xrange(d['data'].shape[1]):
d['labelprobs'][d['labels'][c],c] = 1.0
def get_next_batch(self):
epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self)
return epoch, batchnum, [datadic['data'], datadic['labelprobs']]
def get_data_dims(self, idx=0):
return self.batch_meta['num_vis'] if idx == 0 else self.get_num_classes()
class ImageNetDP(LabeledDataProvider):
MAX_PCA_COMPONENTS = 1024 # Use this many components for noise generation
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.init_commons(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
def init_commons(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
self.data_mean = self.batch_meta['data_mean'].astype(n.single)
self.color_eig = self.batch_meta['color_pca'][1].astype(n.single)
self.color_stdevs = n.c_[self.batch_meta['color_pca'][0].astype(n.single)]
self.color_noise_coeff = dp_params['color_noise']
self.pca_noise_coeff = dp_params['pca_noise']
self.num_colors = 3
self.img_size = int(sqrt(self.batch_meta['num_vis'] / self.num_colors))
def get_labels(self, datadic):
pass
def showimg(self, img):
pixels = img.shape[0] / 3
size = int(sqrt(pixels))
img = img.reshape((3,size,size)).swapaxes(0,2).swapaxes(0,1)
imshow(img, interpolation='nearest')
show()
def get_next_batch(self):
epoch, batchnum, datadic = LabeledDataProvider.get_next_batch(self)
# This takes about 1 sec per batch :(
# If I don't convert both to single ahead of time, it takes even longer.
data = n.require(datadic['data'] - self.data_mean, dtype=n.single, requirements='C')
labels = self.get_labels(datadic)
# wordvecs = datadic['wordvecs']
wordpres = datadic['wordpres']
# Labels have to be in the range 0-(number of classes - 1)
assert labels.max() < self.get_num_classes(), "Invalid labels!"
assert labels.min() == 0, "Invalid labels!"
return epoch, batchnum, [data, labels, wordpres]
# Takes as input an array returned by get_next_batch
# Returns a (numCases, imgSize, imgSize, 3) array which can be
# fed to pylab for plotting.
# This is used by shownet.py to plot test case predictions.
def get_plottable_data(self, data, add_mean=True):
return n.require((data + (self.data_mean if add_mean else 0)).T.reshape(data.shape[1], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
class ImageNetLogRegDP(ImageNetDP):
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
ImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
def get_labels(self, datadic):
return n.array(datadic['labels'], dtype=n.single).reshape((1, datadic['data'].shape[1]))
def get_data_dims(self, idx=0):
if idx == 0:
return self.img_size**2 * self.num_colors
if idx == 2:
return 100
return 1
class BatchLoaderThread(Thread):
def __init__(self, data_dir, path, list_out):
Thread.__init__(self)
self.data_dir = data_dir
self.path = path
self.list_out = list_out
#print "loading %d" % self.bnum
def run(self):
self.list_out.append(unpickle(self.path))
class ColorNoiseMakerThread(Thread):
def __init__(self, pca_stdevs, pca_vecs, num_noise, list_out):
Thread.__init__(self)
self.pca_stdevs, self.pca_vecs = pca_stdevs, pca_vecs
self.num_noise = num_noise
self.list_out = list_out
def run(self):
noise = n.dot(self.pca_vecs, nr.randn(3, self.num_noise).astype(n.single) * self.pca_stdevs)
self.list_out.append(noise)
class CroppedImageNetDP(ImageNetDP):
def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
ImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.border_size = dp_params['crop_border']
self.inner_size = self.img_size - self.border_size*2
self.multiview = dp_params['multiview_test'] and test
self.num_views = 5*2
self.data_mult = self.num_views if self.multiview else 1
self.crop_chunk = 32 # This many images will be cropped in the same way
# Maintain poitners to previously-returned data matrices so they don't get garbage collected.
# I've never seen this happen but it's a safety measure.
self.data = [None, None]
self.cropped_data = [n.zeros((self.get_data_dims(), 0*self.data_mult), dtype=n.single) for x in xrange(2)]
self.loader_thread, self.color_noise_thread = None, None
self.convnet = dp_params['convnet']
self.num_noise = 1024
self.batches_generated = 0
self.data_mean_crop = self.data_mean.reshape((3,self.img_size,self.img_size))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((3*self.inner_size**2, 1))
def get_data_dims(self, idx=0):
if idx == 0:
return self.inner_size**2 * 3
return 1
def start_color_noise_maker(self):
color_noise_list = []
self.color_noise_thread = ColorNoiseMakerThread(self.color_stdevs, self.color_eig, self.num_noise, color_noise_list)
self.color_noise_thread.start()
return color_noise_list
def get_labels(self, datadic):
pass
def start_loader(self, batch_idx):
self.load_data = []
self.loader_thread = BatchLoaderThread(self.data_dir, self.get_data_file_name(self.batch_range[batch_idx]), self.load_data)
self.loader_thread.start()
def get_next_batch(self):
self.d_idx = self.batches_generated % 2
if self.test:
epoch, batchnum, self.data[self.d_idx] = LabeledDataProvider.get_next_batch(self)
else:
epoch, batchnum = self.curr_epoch, self.curr_batchnum
if self.loader_thread is None:
self.start_loader(self.batch_idx)
self.loader_thread.join()
self.data[self.d_idx] = self.load_data[0]
self.start_loader(self.get_next_batch_idx())
else:
# Set the argument to join to 0 to re-enable batch reuse
self.loader_thread.join()
if not self.loader_thread.is_alive():
self.data[self.d_idx] = self.load_data[0]
self.start_loader(self.get_next_batch_idx())
self.advance_batch()
cropped = self.get_cropped_data(self.data[self.d_idx])
if self.color_noise_coeff > 0 and not self.test:
# At this point the data already has 0 mean.
# So I'm going to add noise to it, but I'm also going to scale down
# the original data. This is so that the overall scale of the training
# data doesn't become too different from the test data.
s = cropped.shape
cropped_size = self.get_data_dims(0) / 3
ncases = s[1]
if self.color_noise_thread is None:
self.color_noise_list = self.start_color_noise_maker()
self.color_noise_thread.join()
self.color_noise = self.color_noise_list[0]
self.color_noise_list = self.start_color_noise_maker()
else:
self.color_noise_thread.join(0)
if not self.color_noise_thread.is_alive():
self.color_noise = self.color_noise_list[0]
self.color_noise_list = self.start_color_noise_maker()
# print "Generated new noise"
# else:
# print "Reusing old noise"
# If the noise thread IS alive, then we'll just re-use the noise from the last run
cropped = self.cropped_data[self.d_idx] = cropped.reshape((3, cropped_size, ncases)).swapaxes(0,1).reshape((cropped_size, ncases*3))
self.color_noise = self.color_noise[:,:ncases].reshape((1, 3*ncases))
cropped += self.color_noise * self.color_noise_coeff
cropped = self.cropped_data[self.d_idx] = cropped.reshape((cropped_size, 3, ncases)).swapaxes(0,1).reshape(s)
cropped /= 1.0 + self.color_noise_coeff
# cropped -= cropped.min()
# cropped /= cropped.max()
# self.showimg(cropped[:,0])
self.data[self.d_idx]['labels'] = self.get_labels(self.data[self.d_idx])
self.data[self.d_idx]['data'] = cropped
self.batches_generated += 1
return epoch, batchnum, [self.data[self.d_idx]['data'], self.data[self.d_idx]['labels']]
def get_cropped_data(self, data):
cropped = self.cropped_data[self.d_idx]
if cropped.shape[1] != data['data'].shape[1] * self.data_mult:
cropped = self.cropped_data[self.d_idx] = n.zeros((cropped.shape[0], data['data'].shape[1] * self.data_mult), dtype=n.single)
self.__trim_borders(data['data'], cropped)
return self.subtract_mean(cropped)
def subtract_mean(self,data):
data -= self.data_mean_crop
return data
# Takes as input an array returned by get_next_batch
# Returns a (numCases, imgSize, imgSize, 3) array which can be
# fed to pylab for plotting.
# This is used by shownet.py to plot test case predictions.
def get_plottable_data(self, data, add_mean=True):
return n.require((data + (self.data_mean_crop if add_mean else 0)).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
def __trim_borders(self, x, target):
y = x.reshape(3, self.img_size, self.img_size, x.shape[1])
if self.test: # don't need to loop over cases
if self.multiview:
start_positions = [(0,0), (0, self.border_size*2),
(self.border_size, self.border_size),
(self.border_size*2, 0), (self.border_size*2, self.border_size*2)]
end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
for i in xrange(self.num_views/2):
pic = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:]
target[:,i * x.shape[1]:(i+1)* x.shape[1]] = pic.reshape((self.get_data_dims(),x.shape[1]))
target[:,(self.num_views/2 + i) * x.shape[1]:(self.num_views/2 +i+1)* x.shape[1]] = pic[:,:,::-1,:].reshape((self.get_data_dims(),x.shape[1]))
else:
pic = y[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size, :] # just take the center for now
target[:,:] = pic.reshape((self.get_data_dims(), x.shape[1]))
else:
for c in xrange(0, x.shape[1], self.crop_chunk): # loop over cases in chunks
startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
endY, endX = startY + self.inner_size, startX + self.inner_size
c_end = min(c + self.crop_chunk, x.shape[1])
pic = y[:,startY:endY,startX:endX, c:c_end]
if nr.randint(2) == 0: # also flip the images with 50% probability
pic = pic[:,:,::-1,:]
target[:,c:c_end] = pic.reshape((self.get_data_dims(),c_end-c))
#target[:] = n.require(target[:,nr.permutation(x.shape[1])], requirements='C')
class CroppedImageNetLogRegDP(CroppedImageNetDP):
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
CroppedImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
def get_labels(self, datadic):
return n.require(n.tile(n.array(datadic['labels'], dtype=n.single).reshape((1, datadic['data'].shape[1])), (1, self.data_mult)), requirements='C')
class RandomScaleImageNetLogRegDP(CroppedImageNetLogRegDP):
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
CroppedImageNetLogRegDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
del self.cropped_data
self.data_mean_mean = self.data_mean.mean()
def get_cropped_data(self):
if self.test and self.multiview:
x = self.data['data']
y = x.reshape(3, self.img_size, self.img_size, x.shape[1])
target = n.zeros((self.inner_size**2*3, self.data['data'].shape[1]*self.num_views), dtype=n.uint8)
start_positions = [(0,0), (0, self.border_size), (0, self.border_size*2),
(self.border_size, 0), (self.border_size, self.border_size), (self.border_size, self.border_size*2),
(self.border_size*2, 0), (self.border_size*2, self.border_size), (self.border_size*2, self.border_size*2)]
end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
for i in xrange(self.num_views):
target[:,i * x.shape[1]:(i+1)* x.shape[1]] = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:].reshape((self.inner_size**2*3,x.shape[1]))
return self.subtract_mean(target)
elif not self.test:
# it should be ok to flip it into the same matrix
# since if it ends up being reused, flips are invertible.
self.reflect_data(self.data['data'], self.data['data'])
return self.subtract_mean(self.data['data'])
def reflect_data(self, x, target):
y = x.reshape(3, self.img_size, self.img_size, x.shape[1])
for c in xrange(0, x.shape[1], self.crop_chunk): # loop over cases in chunks
c_end = min(c + self.crop_chunk, x.shape[1])
pic = y[:,:,:, c:c_end]
if nr.randint(2) == 0: # flip the images with 50% probability
pic = pic[:,:,::-1,:]
target[:,c:c_end] = pic.reshape((self.get_data_dims(),c_end-c))
# Note that this variant subtracts the same scalar from each pixel
def subtract_mean(self, data):
return n.require(data - self.data_mean_mean, dtype=n.single, requirements='C')
def get_data_dims(self, idx=0):
return self.img_size**2 * 3 if idx == 0 else 1
class DummyConvNetLogRegDP(LabeledDummyDataProvider):
def __init__(self, data_dim):
LabeledDummyDataProvider.__init__(self, data_dim)
self.batch_meta['tree'] = dict([(i, []) for i in xrange(self.num_classes)])
self.batch_meta['tree'][10] = [0, 1, 2]
self.batch_meta['tree'][11] = [3, 4, 5]
self.batch_meta['tree'][12] = [6, 7]
self.batch_meta['tree'][13] = [8, 9]
self.batch_meta['tree'][14] = [10, 11]
self.batch_meta['tree'][15] = [12, 13]
self.batch_meta['tree'][16] = [14, 15]
self.batch_meta['all_wnids'] = {'gproot': 16}
self.img_size = int(sqrt(data_dim/3))
def get_next_batch(self):
epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self)
dic['data'] = n.require(dic['data'].T, requirements='C')
dic['labels'] = n.require(dic['labels'].T, requirements='C')
dic['gates'] = nr.rand(1, dic['data'].shape[1]).astype(n.single)
return epoch, batchnum, [dic['data'], dic['labels'], dic['gates']]
# Returns the dimensionality of the two data matrices returned by get_next_batch
def get_data_dims(self, idx=0):
return self.batch_meta['num_vis'] if idx == 0 else 1

115
convdata_cifar.py Executable file
View file

@ -0,0 +1,115 @@
from data import *
import numpy.random as nr
import numpy as n
import random as r
class CIFARDataProvider(LabeledDataProvider):
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.data_mean = self.batch_meta['data_mean']
self.num_colors = 3
self.img_size = 32
self.data_dims = [self.img_size**2 * self.num_colors, 1, self.get_num_classes()]
def get_next_batch(self):
epoch, batchnum, datadic = LabeledDataProvider.get_next_batch(self)
if 'processed' not in datadic:
datadic['data'] = n.require((datadic['data'] - self.data_mean), dtype=n.single, requirements='C')
datadic['labelsVec'] = n.require(n.array(datadic['labels']).reshape((1, datadic['data'].shape[1])), requirements='C', dtype=n.single)
datadic['labelsMat'] = n.zeros((self.get_num_classes(), datadic['data'].shape[1]), dtype=n.single)
datadic['labelsMat'][datadic['labels'],n.c_[0:datadic['data'].shape[1]]] = 1
datadic['processed'] = True
return epoch, batchnum, [datadic['data'], datadic['labelsVec'], datadic['labelsMat']]
# Returns the dimensionality of the two data matrices returned by get_next_batch
# idx is the index of the matrix.
def get_data_dims(self, idx=0):
return self.data_dims[idx]
# Takes as input an array returned by get_next_batch
# Returns a (numCases, imgSize, imgSize, 3) array which can be
# fed to pylab for plotting.
# This is used by shownet.py to plot test case predictions.
def get_plottable_data(self, data):
return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
class CroppedCIFARDataProvider(LabeledMemoryDataProvider):
def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.border_size = dp_params['crop_border']
self.inner_size = 32 - self.border_size*2
self.multiview = dp_params['multiview_test'] and test
self.num_views = 9
self.data_mult = self.num_views if self.multiview else 1
self.num_colors = 3
for d in self.data_dic:
d['data'] = n.require(d['data'], requirements='C')
d['labels'] = n.require(n.tile(d['labels'].reshape((1, d['data'].shape[1])), (1, self.data_mult)), requirements='C')
self.cropped_data = [n.zeros((self.get_data_dims(), self.data_dic[0]['data'].shape[1]*self.data_mult), dtype=n.single) for x in xrange(2)]
self.batches_generated = 0
self.data_mean = self.batch_meta['data_mean'].reshape((3,32,32))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((self.get_data_dims(), 1))
def get_next_batch(self):
epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self)
cropped = self.cropped_data[self.batches_generated % 2]
self.__trim_borders(datadic['data'], cropped)
cropped -= self.data_mean
self.batches_generated += 1
return epoch, batchnum, [cropped, datadic['labels']]
def get_data_dims(self, idx=0):
return self.inner_size**2 * 3 if idx == 0 else 1
# Takes as input an array returned by get_next_batch
# Returns a (numCases, imgSize, imgSize, 3) array which can be
# fed to pylab for plotting.
# This is used by shownet.py to plot test case predictions.
def get_plottable_data(self, data):
return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
def __trim_borders(self, x, target):
y = x.reshape(3, 32, 32, x.shape[1])
if self.test: # don't need to loop over cases
if self.multiview:
start_positions = [(0,0), (0, self.border_size), (0, self.border_size*2),
(self.border_size, 0), (self.border_size, self.border_size), (self.border_size, self.border_size*2),
(self.border_size*2, 0), (self.border_size*2, self.border_size), (self.border_size*2, self.border_size*2)]
end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
for i in xrange(self.num_views):
target[:,i * x.shape[1]:(i+1)* x.shape[1]] = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:].reshape((self.get_data_dims(),x.shape[1]))
else:
pic = y[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size, :] # just take the center for now
target[:,:] = pic.reshape((self.get_data_dims(), x.shape[1]))
else:
for c in xrange(x.shape[1]): # loop over cases
startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
endY, endX = startY + self.inner_size, startX + self.inner_size
pic = y[:,startY:endY,startX:endX, c]
if nr.randint(2) == 0: # also flip the image with 50% probability
pic = pic[:,:,::-1]
target[:,c] = pic.reshape((self.get_data_dims(),))
class DummyConvNetDataProvider(LabeledDummyDataProvider):
def __init__(self, data_dim):
LabeledDummyDataProvider.__init__(self, data_dim)
def get_next_batch(self):
epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self)
dic['data'] = n.require(dic['data'].T, requirements='C')
dic['labels'] = n.require(dic['labels'].T, requirements='C')
return epoch, batchnum, [dic['data'], dic['labels']]
# Returns the dimensionality of the two data matrices returned by get_next_batch
def get_data_dims(self, idx=0):
return self.batch_meta['num_vis'] if idx == 0 else 1

297
convdata_flickr.py Executable file
View file

@ -0,0 +1,297 @@
from data import *
import numpy.random as nr
import numpy as n
import random as r
from time import time
from threading import Thread
from math import sqrt
import sys
from pylab import *
from PIL import Image
from StringIO import StringIO
class JPEGBatchLoaderThread(Thread):
def __init__(self, data_dir, path, freq_to_id, tgt, tgt_labels, list_out):
Thread.__init__(self)
self.data_dir = data_dir
self.path = path
self.tgt = tgt
self.tgt_labels = tgt_labels
self.list_out = list_out
self.freq_to_id = freq_to_id
#print "loading %d" % self.bnum
@staticmethod
def raw_to_freq_id(raw_tags, freq_to_id):
raw_tags = [''.join(t.lower().strip().split()) for t in raw_tags]
return [freq_to_id[t] for t in raw_tags if t in freq_to_id]
@staticmethod
def load_jpeg_batch((strings, sizes, labels), freq_to_id, tgt, tgt_labels):
tgt_labels[:] = 0
for k,s in enumerate(strings):
ima = n.asarray(Image.open(StringIO(s)).convert('RGB'))
tgt[k,:] = ima.swapaxes(0,2).swapaxes(1,2).flatten()
tgt_labels[k, JPEGBatchLoaderThread.raw_to_freq_id(labels[k], freq_to_id)] = 1
return {'data': tgt[:len(strings),:],
'labels': tgt_labels[:len(strings),:]}
def run(self):
p = self.load_jpeg_batch(unpickle(self.path),
self.freq_to_id,
self.tgt,
self.tgt_labels)
self.list_out.append(p)
class ColorNoiseMakerThread(Thread):
def __init__(self, pca_stdevs, pca_vecs, num_noise, list_out):
Thread.__init__(self)
self.pca_stdevs, self.pca_vecs = pca_stdevs, pca_vecs
self.num_noise = num_noise
self.list_out = list_out
def run(self):
noise = n.dot(nr.randn(self.num_noise, 3).astype(n.single) * self.pca_stdevs.T, self.pca_vecs.T)
self.list_out.append(noise)
class FlickrDP(LabeledDataProvider):
MAX_PCA_COMPONENTS = 1024 # Use this many components for noise generation
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.init_commons(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
def init_commons(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
self.data_mean = self.batch_meta['data_mean'].astype(n.single)
self.color_eig = self.batch_meta['color_pca'][1].astype(n.single)
self.color_stdevs = n.c_[self.batch_meta['color_pca'][0].astype(n.single)]
self.color_noise_coeff = dp_params['color_noise']
self.pca_noise_coeff = dp_params['pca_noise']
self.num_colors = 3
self.img_size = int(sqrt(self.batch_meta['num_vis'] / self.num_colors))
self.freq_to_id = self.batch_meta['freq_to_id']
def get_labels(self, datadic):
pass
def showimg(self, img):
pixels = img.shape[0] / 3
size = int(sqrt(pixels))
img = img.reshape((3,size,size)).swapaxes(0,2).swapaxes(0,1)
imshow(img, interpolation='nearest')
show()
def get_next_batch(self):
epoch, batchnum, datadic = LabeledDataProvider.get_next_batch(self)
# This takes about 1 sec per batch :(
# If I don't convert both to single ahead of time, it takes even longer.
data = n.require(datadic['data'] - self.data_mean, dtype=n.single, requirements='C')
labels = self.get_labels(datadic)
# Labels have to be in the range 0-(number of classes - 1)
assert labels.max() < self.get_num_classes(), "Invalid labels!"
assert labels.min() >= 0, "Invalid labels!"
return epoch, batchnum, [data, labels]
# Takes as input an array returned by get_next_batch
# Returns a (numCases, imgSize, imgSize, 3) array which can be
# fed to pylab for plotting.
# This is used by shownet.py to plot test case predictions.
def get_plottable_data(self, data, add_mean=True):
return n.require((data + (self.data_mean if add_mean else 0)).reshape(data.shape[0], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
class JPEGCroppedFlickrDP(FlickrDP):
def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.init_commons(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.img_size = int(sqrt(self.batch_meta['num_vis'] / self.num_colors))
self.border_size = dp_params['crop_border']
self.inner_size = self.img_size - self.border_size*2
self.multiview = dp_params['multiview_test'] and test
self.num_views = 5*2
self.data_mult = self.num_views if self.multiview else 1
self.crop_chunk = 32 # This many images will be cropped in the same way
self.batch_size = self.batch_meta['batch_size']
# Maintain poitners to previously-returned data matrices so they don't get garbage collected.
# I've never seen this happen but it's a safety measure.
self.data = [None, None]
self.cropped_data = [n.zeros((0*self.data_mult, self.get_data_dims()), dtype=n.float32) for x in xrange(2)]
if self.test:
self.orig_data = [n.zeros((self.batch_size, self.img_size**2*3), dtype=n.uint8) for x in xrange(1)]
self.orig_labels = [n.zeros((self.batch_size, self.get_num_classes()), dtype=n.float32) for x in xrange(2)]
else:
self.orig_data = [n.zeros((self.batch_size, self.img_size**2*3), dtype=n.uint8) for x in xrange(2)]
# There have to be 3 copies of labels because this matrix actually gets used by the training code
self.orig_labels = [n.zeros((self.batch_size, self.get_num_classes()), dtype=n.float32) for x in xrange(3)]
self.loader_thread, self.color_noise_thread = None, None
self.convnet = dp_params['convnet']
self.num_noise = self.batch_size
self.batches_generated, self.loaders_started = 0, 0
self.data_mean_crop = self.data_mean.reshape((3,self.img_size,self.img_size))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((1,3*self.inner_size**2))
def get_data_dims(self, idx=0):
assert idx in (0,1), "Invalid index: %d" % idx
if idx == 0:
return self.inner_size**2 * 3
return self.get_num_classes()
def start_loader(self, batch_idx):
self.load_data = []
#print "loading %d" % self.batch_range_perm[self.batch_idx]
self.loader_thread = JPEGBatchLoaderThread(self.data_dir, self.get_data_file_name(self.batch_range[batch_idx]), self.freq_to_id,
self.orig_data[self.loaders_started % 2], self.orig_labels[self.loaders_started % 3],
self.load_data)
self.loader_thread.start()
self.loaders_started += 1
def start_color_noise_maker(self):
color_noise_list = []
self.color_noise_thread = ColorNoiseMakerThread(self.color_stdevs, self.color_eig, self.num_noise, color_noise_list)
self.color_noise_thread.start()
return color_noise_list
def get_labels(self, datadic):
pass
def get_next_batch(self):
self.d_idx = self.batches_generated % 2
if self.test:
epoch, batchnum, self.data[self.d_idx] = LabeledDataProvider.get_next_batch(self)
self.data[self.d_idx] = JPEGBatchLoaderThread.load_jpeg_batch(self.data[self.d_idx], self.freq_to_id, self.orig_data[0], self.orig_labels[self.d_idx])
else:
epoch, batchnum = self.curr_epoch, self.curr_batchnum
if self.loader_thread is None:
self.start_loader(self.batch_idx)
self.loader_thread.join()
self.data[self.d_idx] = self.load_data[0]
self.start_loader(self.get_next_batch_idx())
else:
# Set the argument to join to 0 to re-enable batch reuse
self.loader_thread.join()
if not self.loader_thread.is_alive():
self.data[self.d_idx] = self.load_data[0]
self.start_loader(self.get_next_batch_idx())
# else:
# print "Re-using batch"
self.advance_batch()
cropped = self.get_cropped_data(self.data[self.d_idx])
if self.color_noise_coeff > 0 and not self.test:
# At this point the data already has 0 mean.
# So I'm going to add noise to it, but I'm also going to scale down
# the original data. This is so that the overall scale of the training
# data doesn't become too different from the test data.
s = cropped.shape
cropped_size = self.get_data_dims(0) / 3
ncases = s[0]
if self.color_noise_thread is None:
self.color_noise_list = self.start_color_noise_maker()
self.color_noise_thread.join()
self.color_noise = self.color_noise_list[0]
self.color_noise_list = self.start_color_noise_maker()
else:
self.color_noise_thread.join(0)
if not self.color_noise_thread.is_alive():
self.color_noise = self.color_noise_list[0]
self.color_noise_list = self.start_color_noise_maker()
cropped = self.cropped_data[self.d_idx] = cropped.reshape((ncases*3, cropped_size))
self.color_noise = self.color_noise[:ncases,:].reshape((3*ncases, 1))
cropped += self.color_noise * self.color_noise_coeff
cropped = self.cropped_data[self.d_idx] = cropped.reshape((ncases, 3* cropped_size))
cropped /= (1.0 + self.color_noise_coeff)
self.data[self.d_idx]['labels'] = self.get_labels(self.data[self.d_idx])
self.data[self.d_idx]['data'] = cropped
self.batches_generated += 1
# idx = 1000
# cropped -= cropped.min()
# cropped /= cropped.max()
#
# print [self.batch_meta['label_names'][i] for i in n.where(self.data['labels'][idx,:]==1)[0]]
# self.showimg(cropped[idx,:])
#print cropped.shape
return epoch, batchnum, [self.data[self.d_idx]['data'].T, self.data[self.d_idx]['labels'].T]
def get_cropped_data(self, data):
cropped = self.cropped_data[self.d_idx]
if cropped.shape[0] != data['data'].shape[0] * self.data_mult:
cropped = self.cropped_data[self.d_idx] = n.zeros((data['data'].shape[0] * self.data_mult, cropped.shape[1]), dtype=n.float32)
self.__trim_borders(data['data'], cropped)
return self.subtract_mean(cropped)
def subtract_mean(self,data):
data -= self.data_mean_crop
return data
# Takes as input an array returned by get_next_batch
# Returns a (numCases, imgSize, imgSize, 3) array which can be
# fed to pylab for plotting.
# This is used by shownet.py to plot test case predictions.
def get_plottable_data(self, data, add_mean=True):
return n.require((data.T + (self.data_mean_crop if add_mean else 0)).reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
def __trim_borders(self, x, target):
y = x.reshape(x.shape[0], 3, self.img_size, self.img_size)
if self.test: # don't need to loop over cases
if self.multiview:
start_positions = [(0,0), (0, self.border_size*2),
(self.border_size, self.border_size),
(self.border_size*2, 0), (self.border_size*2, self.border_size*2)]
end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
for i in xrange(self.num_views/2):
pic = y[:,:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1]]
target[i * x.shape[0]:(i+1)* x.shape[0],:] = pic.reshape((x.shape[0], self.get_data_dims()))
target[(self.num_views/2 + i) * x.shape[0]:(self.num_views/2 +i+1)* x.shape[0],:] = pic[:,:,:,::-1].reshape((x.shape[0],self.get_data_dims()))
else:
pic = y[:,:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size] # just take the center for now
target[:,:] = pic.reshape((x.shape[0], self.get_data_dims()))
else:
for c in xrange(0, x.shape[0], self.crop_chunk): # loop over cases in chunks
startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
endY, endX = startY + self.inner_size, startX + self.inner_size
c_end = min(c + self.crop_chunk, x.shape[0])
pic = y[c:c_end,:,startY:endY,startX:endX]
if nr.randint(2) == 0: # also flip the images with 50% probability
pic = pic[:,:,:,::-1]
target[c:c_end,:] = pic.reshape((c_end-c, self.get_data_dims()))
#target[:] = n.require(target[:,nr.permutation(x.shape[1])], requirements='C')
class JPEGCroppedFlickrCEDP(JPEGCroppedFlickrDP):
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
JPEGCroppedFlickrDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
def get_labels(self, data):
return n.require(n.tile(data['labels'], (self.data_mult, 1)), requirements='C')
class DummyConvNetCEDP(LabeledDummyDataProvider):
def __init__(self, data_dim):
LabeledDummyDataProvider.__init__(self, data_dim, num_classes=16, num_cases=16)
def get_next_batch(self):
epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self)
dic['data'] = n.require(dic['data'].T, requirements='F')
dic['labels'] = n.zeros((self.get_data_dims(idx=1), dic['data'].shape[1]), dtype=n.float32, order='F')
for c in xrange(dic['labels'].shape[1]): # loop over cases
r = nr.randint(0, dic['labels'].shape[0])
dic['labels'][r,c] = 1
return epoch, batchnum, [dic['data'], dic['labels']]
# Returns the dimensionality of the two data matrices returned by get_next_batch
def get_data_dims(self, idx=0):
return self.batch_meta['num_vis'] if idx == 0 else 16

270
convdata_jpeg.py Executable file
View file

@ -0,0 +1,270 @@
from data import *
import numpy.random as nr
import numpy as n
import random as r
from time import time
from threading import Thread
from math import sqrt
import sys
from pylab import *
from PIL import Image
from StringIO import StringIO
from convdata import ImageNetDP
class JPEGBatchLoaderThread(Thread):
def __init__(self, data_dir, path, data_mean, no_crop, label_offset, tgt, list_out):
Thread.__init__(self)
self.data_dir = data_dir
self.path = path
self.tgt = tgt
self.list_out = list_out
self.label_offset = label_offset
self.data_mean = data_mean
self.no_crop = no_crop
#print "loading %d" % self.bnum
@staticmethod
def load_jpeg_batch((strings, orig_sizes, labels), data_mean, no_crop, label_offset, tgt):
lab_arr = n.zeros((len(strings), 1), dtype=n.single)
failed = 0
img256 = n.zeros((256, 256, 3), dtype=n.uint8) if no_crop else None
for k,(s,l) in enumerate(zip(strings, labels)):
try:
ima = n.asarray(Image.open(StringIO(s)).convert('RGB'))
if no_crop:
off_y, off_x = (256 - ima.shape[0]) / 2, (256 - ima.shape[1]) / 2
img256[:] = data_mean
img256[off_y:ima.shape[0]+off_y,off_x:ima.shape[1]+off_x,:] = ima
tgt[k - failed,:] = img256.swapaxes(0,2).swapaxes(1,2).flatten()
else:
tgt[k - failed,:] = ima.swapaxes(0,2).swapaxes(1,2).flatten()
# For the 2012 test set, the labels will be None
lab_arr[k - failed,0] = 0 if l[1] is None else l[1] + label_offset
except IOError:
failed += 1
return {'data': tgt[:len(strings) - failed,:],
'labels': lab_arr[:len(strings) - failed,:]}
def run(self):
p = JPEGBatchLoaderThread.load_jpeg_batch(unpickle(self.path),
self.data_mean,
self.no_crop,
self.label_offset,
self.tgt)
self.list_out.append(p)
class ColorNoiseMakerThread(Thread):
def __init__(self, pca_stdevs, pca_vecs, num_noise, list_out):
Thread.__init__(self)
self.pca_stdevs, self.pca_vecs = pca_stdevs, pca_vecs
self.num_noise = num_noise
self.list_out = list_out
def run(self):
noise = n.dot(nr.randn(self.num_noise, 3).astype(n.single) * self.pca_stdevs.T, self.pca_vecs.T)
self.list_out.append(noise)
class JPEGCroppedImageNetDP(ImageNetDP):
def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
ImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
self.mini = dp_params['minibatch_size']
self.border_size = dp_params['crop_border']
self.inner_size = self.img_size - self.border_size*2
self.multiview = dp_params['multiview_test'] and test
self.num_views = 5*2
self.data_mult = self.num_views if self.multiview else 1
self.crop_chunk = 32 # This many images will be cropped in the same way
self.batch_size = self.batch_meta['batch_size']
self.label_offset = 0 if 'label_offset' not in self.batch_meta else self.batch_meta['label_offset']
self.no_crop = False if 'no_crop' not in self.batch_meta else self.batch_meta['no_crop']
self.scalar_mean = 'scalar_mean' in dp_params and dp_params['scalar_mean']
# Maintain poitners to previously-returned data matrices so they don't get garbage collected.
# I've never seen this happen but it's a safety measure.
self.data = [None, None] # These are pointers to previously-returned data matrices
# This is where I crop data into
self.cropped_data = [n.zeros((0*self.data_mult, self.get_data_dims()), dtype=n.float32) for x in xrange(2)]
# This is where I load data into (jpeg --> uint8)
self.orig_data = [n.zeros((self.batch_size, self.img_size**2*3), dtype=n.uint8) for x in xrange(1 if test else 2)]
self.loader_thread, self.color_noise_thread = None, None
self.convnet = dp_params['convnet']
self.num_noise = self.batch_size
self.batches_generated, self.loaders_started = 0, 0
self.data_mean_crop = self.data_mean.reshape((3,self.img_size,self.img_size))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((1,3*self.inner_size**2))
if self.no_crop or self.scalar_mean:
self.data_mean_crop = self.data_mean.mean()
def get_data_dims(self, idx=0):
if idx == 0:
return self.inner_size**2 * 3
return 1
def start_loader(self, batch_idx):
self.load_data = []
#print "loading %d" % self.batch_range_perm[self.batch_idx]
self.loader_thread = JPEGBatchLoaderThread(self.data_dir,
self.get_data_file_name(self.batch_range[batch_idx]),
self.data_mean_crop,
self.no_crop,
self.label_offset,
self.orig_data[self.loaders_started],
self.load_data)
self.loader_thread.start()
self.loaders_started = (self.loaders_started + 1) % 2
def start_color_noise_maker(self):
color_noise_list = []
self.color_noise_thread = ColorNoiseMakerThread(self.color_stdevs, self.color_eig, self.num_noise, color_noise_list)
self.color_noise_thread.start()
return color_noise_list
def get_labels(self, datadic):
pass
def get_next_batch(self):
self.d_idx = self.batches_generated % 2
if self.test:
epoch, batchnum, self.data[self.d_idx] = LabeledDataProvider.get_next_batch(self)
self.data[self.d_idx] = JPEGBatchLoaderThread.load_jpeg_batch(self.data[self.d_idx],
self.data_mean_crop,
self.no_crop,
self.label_offset,
self.orig_data[0])
else:
epoch, batchnum = self.curr_epoch, self.curr_batchnum
if self.loader_thread is None:
self.start_loader(self.batch_idx)
self.loader_thread.join()
self.data[self.d_idx] = self.load_data[0]
self.start_loader(self.get_next_batch_idx())
else:
# Set the argument to join to 0 to re-enable batch reuse
self.loader_thread.join()
if not self.loader_thread.is_alive():
self.data[self.d_idx] = self.load_data[0]
self.start_loader(self.get_next_batch_idx())
#else:
# print "Re-using batch"
self.advance_batch()
cropped = self.get_cropped_data(self.data[self.d_idx])
if self.color_noise_coeff > 0 and not self.test:
# At this point the data already has 0 mean.
# So I'm going to add noise to it, but I'm also going to scale down
# the original data. This is so that the overall scale of the training
# data doesn't become too different from the test data.
s = cropped.shape
cropped_size = self.get_data_dims(0) / 3
ncases = s[0]
if self.color_noise_thread is None:
self.color_noise_list = self.start_color_noise_maker()
self.color_noise_thread.join()
self.color_noise = self.color_noise_list[0]
self.color_noise_list = self.start_color_noise_maker()
else:
self.color_noise_thread.join(0)
if not self.color_noise_thread.is_alive():
self.color_noise = self.color_noise_list[0]
self.color_noise_list = self.start_color_noise_maker()
cropped = self.cropped_data[self.d_idx] = cropped.reshape((ncases*3, cropped_size))
self.color_noise = self.color_noise[:ncases,:].reshape((3*ncases, 1))
cropped += self.color_noise * self.color_noise_coeff
cropped = self.cropped_data[self.d_idx] = cropped.reshape((ncases, 3* cropped_size))
cropped /= (1.0 + self.color_noise_coeff)
self.data[self.d_idx]['labels'] = self.get_labels(self.data[self.d_idx])
self.data[self.d_idx]['data'] = cropped
self.batches_generated += 1
if False and not self.test:
idx = 111
cropped -= cropped.min()
cropped /= cropped.max()
label = int(self.data[self.d_idx]['labels'][idx,0])
print label
print self.batch_meta['label_names'][label]
print cropped.max(), cropped.min()
print self.data[self.d_idx]['labels']
self.showimg(cropped[idx,:])
# NOTE: It would be good to add some logic here to pad irregularly-sized
# batches by duplicating training cases.
return epoch, batchnum, [self.data[self.d_idx]['data'].T, self.data[self.d_idx]['labels'].T]
def get_cropped_data(self, data):
cropped = self.cropped_data[self.d_idx]
if cropped.shape[0] != data['data'].shape[0] * self.data_mult:
cropped = self.cropped_data[self.d_idx] = n.zeros((data['data'].shape[0] * self.data_mult, cropped.shape[1]), dtype=n.float32)
self.__trim_borders(data['data'], cropped)
return self.subtract_mean(cropped)
def subtract_mean(self,data):
data -= self.data_mean_crop
return data
# Takes as input an array returned by get_next_batch
# Returns a (numCases, imgSize, imgSize, 3) array which can be
# fed to pylab for plotting.
# This is used by shownet.py to plot test case predictions.
def get_plottable_data(self, data, add_mean=True):
mean = self.data_mean_crop if data.flags.f_contiguous or self.scalar_mean else self.data_mean_crop.T
return n.require((data + (mean if add_mean else 0)).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
def __trim_borders(self, x, target):
y = x.reshape(x.shape[0], 3, self.img_size, self.img_size)
if self.test: # don't need to loop over cases
if self.multiview:
start_positions = [(0,0), (0, self.border_size*2),
(self.border_size, self.border_size),
(self.border_size*2, 0), (self.border_size*2, self.border_size*2)]
end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
for i in xrange(self.num_views/2):
pic = y[:,:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1]]
target[i * x.shape[0]:(i+1)* x.shape[0],:] = pic.reshape((x.shape[0], self.get_data_dims()))
target[(self.num_views/2 + i) * x.shape[0]:(self.num_views/2 +i+1)* x.shape[0],:] = pic[:,:,:,::-1].reshape((x.shape[0],self.get_data_dims()))
else:
pic = y[:,:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size] # just take the center for now
target[:,:] = pic.reshape((x.shape[0], self.get_data_dims()))
else:
for c in xrange(0, x.shape[0], self.crop_chunk): # loop over cases in chunks
startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
endY, endX = startY + self.inner_size, startX + self.inner_size
c_end = min(c + self.crop_chunk, x.shape[0])
pic = y[c:c_end,:,startY:endY,startX:endX]
if nr.randint(2) == 0: # also flip the images with 50% probability
pic = pic[:,:,:,::-1]
target[c:c_end,:] = pic.reshape((c_end-c, self.get_data_dims()))
# With 5% chance, replace this chunk with the average of this chunk and some future chunk
#if c >= self.crop_chunk and nr.rand() < 0.05:
#r = nr.randint(0, c - self.crop_chunk + 1)
#r_end = r + self.crop_chunk
#target[c:c_end,:] = 0.75 * target[c:c_end,:] + 0.25 * target[r:r_end,:]
#print "faded in past batch (%d,%d) to batch (%d,%d)" % (r, r_end, c, c_end)
#for c in xrange(0, x.shape[0]-self.crop_chunk, self.crop_chunk): # loop over cases in chunks
# if nr.rand() < 0.05:
# c_end = min(c + self.crop_chunk, x.shape[0])
# r = nr.randint(c, x.shape[0] - self.crop_chunk+1)
# r_end = r + self.crop_chunk
# target[c:c_end,:] = 0.75 * target[c:c_end,:] + 0.25 * target[r:r_end,:]
#print "faded in past batch (%d,%d) to batch (%d,%d)" % (r, r_end, c, c_end)
#target[:] = n.require(target[:,nr.permutation(x.shape[1])], requirements='C')
class JPEGCroppedImageNetLogRegDP(JPEGCroppedImageNetDP):
def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
JPEGCroppedImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
def get_labels(self, data):
return n.require(n.tile(n.array(data['labels'], dtype=n.single).reshape((data['data'].shape[0], 1)), (self.data_mult, 1)), requirements='C')

218
convnet.py Executable file
View file

@ -0,0 +1,218 @@
import numpy as n
import numpy.random as nr
import random as r
from util import *
from data import *
from options import *
from gpumodel import *
import sys
import math as m
import layer as lay
from convdata import *
from convdata_jpeg import JPEGCroppedImageNetLogRegDP
from convdata_flickr import JPEGCroppedFlickrCEDP, DummyConvNetCEDP
from convdata_cifar import CIFARDataProvider, CroppedCIFARDataProvider
from os import linesep as NL
import pylab as pl
import copy as cp
class ConvNet(IGPUModel):
def __init__(self, op, load_dic, dp_params={}):
filename_options = []
for v in ('color_noise', 'pca_noise', 'multiview_test', 'crop_border', 'scalar_mean', 'minibatch_size'):
dp_params[v] = op.get_value(v)
IGPUModel.__init__(self, "ConvNet", op, load_dic, filename_options, dp_params=dp_params)
self.writing_test = False
def import_model(self):
lib_name = "_ConvNet_k20x" if is_kepler_machine() else "_ConvNet"
print "========================="
print "Importing %s C++ module" % lib_name
self.libmodel = __import__(lib_name)
def init_model_lib(self):
self.libmodel.initModel(self.layers, self.device_ids, self.device_cpus, self.minibatch_size, self.wupdate_freq)
def init_model_state(self):
ms = self.model_state
if self.load_file:
ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self, ms['layers'])
else:
ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self)
# Convert convolutional layers to local
if len(self.op.get_value('conv_to_local')) > 0:
for i, layer in enumerate(ms['layers']):
if layer['type'] == 'conv' and layer['name'] in self.op.get_value('conv_to_local'):
lay.LocalLayerParser.conv_to_local(ms['layers'], i)
# Decouple weight matrices
if len(self.op.get_value('unshare_weights')) > 0:
for name_str in self.op.get_value('unshare_weights'):
if name_str:
name = lay.WeightLayerParser.get_layer_name(name_str)
if name is not None:
name, idx = name[0], name[1]
if name not in ms['layers']:
raise ModelStateException("Layer '%s' does not exist; unable to unshare" % name)
layer = ms['layers'][name]
lay.WeightLayerParser.unshare_weights(layer, ms['layers'], matrix_idx=idx)
else:
raise ModelStateException("Invalid layer name '%s'; unable to unshare." % name_str)
self.op.set_value('conv_to_local', [], parse=False)
self.op.set_value('unshare_weights', [], parse=False)
self.writing_test = False
def get_layer_idx(self, layer_name, check_type=[]):
try:
layer_idx = [l['name'] for l in self.model_state['layers']].index(layer_name)
if check_type:
layer_type = self.model_state['layers'][layer_idx]['type']
if layer_type not in check_type:
raise ModelStateException("Layer with name '%s' has type '%s'; should be one of %s." % (layer_name, layer_type, ",".join("'%s'" %s for s in check_type)))
return layer_idx
except ValueError:
raise ModelStateException("Layer with name '%s' not defined." % layer_name)
def fill_excused_options(self):
if self.op.get_value('check_grads'):
self.op.set_value('save_path', '')
self.op.set_value('train_batch_range', '0')
self.op.set_value('test_batch_range', '0')
self.op.set_value('data_path', '')
# Make sure the data provider returned data in proper format
def parse_batch_data(self, batch_data, train=True):
if max(d.dtype != n.single for d in batch_data[2]):
raise DataProviderException("All matrices returned by data provider must consist of single-precision floats.")
return batch_data
def start_batch(self, batch_data, train=True):
data = batch_data[2]
self.writing_test = False
if self.check_grads:
self.libmodel.checkGradients(data)
elif not train and self.multiview_test:
num_views = self.test_data_provider.num_views
if self.test_out != "" and self.logreg_name != "":
self.writing_test = True
self.test_file_name = os.path.join(self.test_out, 'test_preds_%d' % batch_data[1])
self.probs = n.zeros((data[0].shape[1]/num_views, self.test_data_provider.get_num_classes()), dtype=n.single)
self.libmodel.startMultiviewTest(data, num_views, self.probs, self.logreg_name)
else:
self.libmodel.startMultiviewTest(data, num_views)
else:
num_batches_total = self.num_epochs * len(self.train_batch_range)
progress = min(1.0, max(0.0, float(self.get_num_batches_done()-1) / num_batches_total))
self.libmodel.startBatch(data, progress, not train)
def finish_batch(self):
ret = IGPUModel.finish_batch(self)
if self.writing_test:
if not os.path.exists(self.test_out):
os.makedirs(self.test_out)
pickle(self.test_file_name, {'data': self.probs,
'note': 'generated from %s' % self.save_file})
return ret
def print_iteration(self):
print "%d.%d..." % (self.epoch, self.batchnum),
def print_train_time(self, compute_time_py):
print "(%.3f sec)" % (compute_time_py)
def print_costs(self, cost_outputs):
costs, num_cases = cost_outputs[0], cost_outputs[1]
for errname in costs.keys():
costs[errname] = [(v/num_cases) for v in costs[errname]]
print "%s: " % errname,
print ", ".join("%.6f" % v for v in costs[errname]),
if sum(m.isnan(v) for v in costs[errname]) > 0 or sum(m.isinf(v) for v in costs[errname]):
print "^ got nan or inf!"
sys.exit(1)
def print_train_results(self):
self.print_costs(self.train_outputs[-1])
def print_test_status(self):
pass
def print_test_results(self):
print NL + "======================Test output======================"
self.print_costs(self.test_outputs[-1])
print NL + "----------------------Averages-------------------------"
self.print_costs((self.aggregate_test_outputs(self.test_outputs[-len(self.test_batch_range):])[0], min(len(self.test_outputs), len(self.test_batch_range))))
print NL + "-------------------------------------------------------",
for name in sorted(self.layers.keys()): # This is kind of hacky but will do for now.
l = self.layers[name]
if 'weights' in l:
if type(l['weights']) == n.ndarray:
print "%sLayer '%s' weights: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['weights'])), n.mean(n.abs(l['weightsInc']))),
elif type(l['weights']) == list:
print ""
print NL.join("Layer '%s' weights[%d]: %e [%e]" % (l['name'], i, n.mean(n.abs(w)), n.mean(n.abs(wi))) for i,(w,wi) in enumerate(zip(l['weights'],l['weightsInc']))),
print "%sLayer '%s' biases: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['biases'])), n.mean(n.abs(l['biasesInc']))),
print ""
def conditional_save(self):
self.save_state()
print "-------------------------------------------------------"
print "Saved checkpoint to %s" % os.path.join(self.save_path, self.save_file)
print "=======================================================",
def aggregate_test_outputs(self, test_outputs):
test_outputs = cp.deepcopy(test_outputs)
num_cases = sum(t[1] for t in test_outputs)
for i in xrange(1 ,len(test_outputs)):
for k,v in test_outputs[i][0].items():
for j in xrange(len(v)):
test_outputs[0][0][k][j] += test_outputs[i][0][k][j]
return (test_outputs[0][0], num_cases)
@classmethod
def get_options_parser(cls):
op = IGPUModel.get_options_parser()
op.add_option("mini", "minibatch_size", IntegerOptionParser, "Minibatch size", default=128)
op.add_option("layer-def", "layer_def", StringOptionParser, "Layer definition file", set_once=True)
op.add_option("layer-params", "layer_params", StringOptionParser, "Layer parameter file")
op.add_option("check-grads", "check_grads", BooleanOptionParser, "Check gradients and quit?", default=0, excuses=['data_path','save_path','train_batch_range','test_batch_range'])
op.add_option("multiview-test", "multiview_test", BooleanOptionParser, "Cropped DP: test on multiple patches?", default=0)
op.add_option("crop-border", "crop_border", IntegerOptionParser, "Cropped DP: crop border size", default=4, set_once=True)
op.add_option("conv-to-local", "conv_to_local", ListOptionParser(StringOptionParser), "Convert given conv layers to unshared local", default=[])
op.add_option("unshare-weights", "unshare_weights", ListOptionParser(StringOptionParser), "Unshare weight matrices in given layers", default=[])
op.add_option("conserve-mem", "conserve_mem", BooleanOptionParser, "Conserve GPU memory (slower)?", default=0)
op.add_option("color-noise", "color_noise", FloatOptionParser, "Add PCA noise to color channels with given scale", default=0.0)
op.add_option("test-out", "test_out", StringOptionParser, "Output test case predictions to given path", default="", requires=['logreg_name', 'multiview_test'])
op.add_option("logreg-name", "logreg_name", StringOptionParser, "Logreg cost layer name (for --test-out)", default="")
op.add_option("pca-noise", "pca_noise", FloatOptionParser, "Add PCA noise to pixels with given scale", default=0.0)
op.add_option("scalar-mean", "scalar_mean", FloatOptionParser, "Subtract scalar pixel mean (as opposed to vector)?", default=False)
op.add_option("wupdate-freq", "wupdate_freq", IntegerOptionParser, "Weight update (inverse) frequency, in minibatches (1 = every minibatch)", default=1)
op.delete_option('max_test_err')
op.options["max_filesize_mb"].default = 0
op.options["testing_freq"].default = 50
op.options["num_epochs"].default = 50000
op.options['dp_type'].default = None
DataProvider.register_data_provider('dummy-lr-n', 'Dummy ConvNet logistic regression', DummyConvNetLogRegDP)
DataProvider.register_data_provider('inet-lr', 'ImageNet logistic regression', ImageNetLogRegDP)
DataProvider.register_data_provider('inet-lr-cropped', 'ImageNet logistic regression cropped', CroppedImageNetLogRegDP)
DataProvider.register_data_provider('inet-lr-cropped-jpeg', 'ImageNet logistic regression cropped JPEG', JPEGCroppedImageNetLogRegDP)
DataProvider.register_data_provider('inet-rs-lr-cropped', 'Random scale cropped ImageNet logistic regression', RandomScaleImageNetLogRegDP)
DataProvider.register_data_provider('flickr-ce-cropped', 'Flickr cross-entropy cropped', JPEGCroppedFlickrCEDP)
DataProvider.register_data_provider('dummy-ce-n', 'Dummy cross-entropy', DummyConvNetCEDP)
DataProvider.register_data_provider('flatmem', 'Flat memory', FlatMemoryDataProvider)
DataProvider.register_data_provider('cifar', 'CIFAR', CIFARDataProvider)
DataProvider.register_data_provider('cifar-cropped', 'Cropped CIFAR', CroppedCIFARDataProvider)
return op
if __name__ == "__main__":
#nr.seed(5)
op = ConvNet.get_options_parser()
op, load_dic = IGPUModel.parse_options(op)
model = ConvNet(op, load_dic)
model.start()

143
deviceQuery.txt Normal file
View file

@ -0,0 +1,143 @@
/u/kriz/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/deviceQuery Starting...
CUDA Device Query (Runtime API) version (CUDART static linking)
Found 4 CUDA Capable device(s)
Device 0: "Tesla S2050"
CUDA Driver Version / Runtime Version 4.2 / 4.2
CUDA Capability Major/Minor version number: 2.0
Total amount of global memory: 3072 MBytes (3220897792 bytes)
(14) Multiprocessors x ( 32) CUDA Cores/MP: 448 CUDA Cores
GPU Clock rate: 1147 MHz (1.15 GHz)
Memory Clock rate: 1546 Mhz
Memory Bus Width: 384-bit
L2 Cache Size: 786432 bytes
Max Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
Max Layered Texture Size (dim) x layers 1D=(16384) x 2048, 2D=(16384,16384) x 2048
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 49152 bytes
Total number of registers available per block: 32768
Warp size: 32
Maximum number of threads per multiprocessor: 1536
Maximum number of threads per block: 1024
Maximum sizes of each dimension of a block: 1024 x 1024 x 64
Maximum sizes of each dimension of a grid: 65535 x 65535 x 65535
Maximum memory pitch: 2147483647 bytes
Texture alignment: 512 bytes
Concurrent copy and execution: Yes with 2 copy engine(s)
Run time limit on kernels: No
Integrated GPU sharing Host Memory: No
Support host page-locked memory mapping: Yes
Concurrent kernel execution: Yes
Alignment requirement for Surfaces: Yes
Device has ECC support enabled: No
Device is using TCC driver mode: No
Device supports Unified Addressing (UVA): Yes
Device PCI Bus ID / PCI location ID: 7 / 0
Compute Mode:
< Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
Device 1: "Tesla S2050"
CUDA Driver Version / Runtime Version 4.2 / 4.2
CUDA Capability Major/Minor version number: 2.0
Total amount of global memory: 3072 MBytes (3220897792 bytes)
(14) Multiprocessors x ( 32) CUDA Cores/MP: 448 CUDA Cores
GPU Clock rate: 1147 MHz (1.15 GHz)
Memory Clock rate: 1546 Mhz
Memory Bus Width: 384-bit
L2 Cache Size: 786432 bytes
Max Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
Max Layered Texture Size (dim) x layers 1D=(16384) x 2048, 2D=(16384,16384) x 2048
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 49152 bytes
Total number of registers available per block: 32768
Warp size: 32
Maximum number of threads per multiprocessor: 1536
Maximum number of threads per block: 1024
Maximum sizes of each dimension of a block: 1024 x 1024 x 64
Maximum sizes of each dimension of a grid: 65535 x 65535 x 65535
Maximum memory pitch: 2147483647 bytes
Texture alignment: 512 bytes
Concurrent copy and execution: Yes with 2 copy engine(s)
Run time limit on kernels: No
Integrated GPU sharing Host Memory: No
Support host page-locked memory mapping: Yes
Concurrent kernel execution: Yes
Alignment requirement for Surfaces: Yes
Device has ECC support enabled: No
Device is using TCC driver mode: No
Device supports Unified Addressing (UVA): Yes
Device PCI Bus ID / PCI location ID: 8 / 0
Compute Mode:
< Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
Device 2: "Tesla S2050"
CUDA Driver Version / Runtime Version 4.2 / 4.2
CUDA Capability Major/Minor version number: 2.0
Total amount of global memory: 3072 MBytes (3220897792 bytes)
(14) Multiprocessors x ( 32) CUDA Cores/MP: 448 CUDA Cores
GPU Clock rate: 1147 MHz (1.15 GHz)
Memory Clock rate: 1546 Mhz
Memory Bus Width: 384-bit
L2 Cache Size: 786432 bytes
Max Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
Max Layered Texture Size (dim) x layers 1D=(16384) x 2048, 2D=(16384,16384) x 2048
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 49152 bytes
Total number of registers available per block: 32768
Warp size: 32
Maximum number of threads per multiprocessor: 1536
Maximum number of threads per block: 1024
Maximum sizes of each dimension of a block: 1024 x 1024 x 64
Maximum sizes of each dimension of a grid: 65535 x 65535 x 65535
Maximum memory pitch: 2147483647 bytes
Texture alignment: 512 bytes
Concurrent copy and execution: Yes with 2 copy engine(s)
Run time limit on kernels: No
Integrated GPU sharing Host Memory: No
Support host page-locked memory mapping: Yes
Concurrent kernel execution: Yes
Alignment requirement for Surfaces: Yes
Device has ECC support enabled: No
Device is using TCC driver mode: No
Device supports Unified Addressing (UVA): Yes
Device PCI Bus ID / PCI location ID: 16 / 0
Compute Mode:
< Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
Device 3: "Tesla S2050"
CUDA Driver Version / Runtime Version 4.2 / 4.2
CUDA Capability Major/Minor version number: 2.0
Total amount of global memory: 3072 MBytes (3220897792 bytes)
(14) Multiprocessors x ( 32) CUDA Cores/MP: 448 CUDA Cores
GPU Clock rate: 1147 MHz (1.15 GHz)
Memory Clock rate: 1546 Mhz
Memory Bus Width: 384-bit
L2 Cache Size: 786432 bytes
Max Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
Max Layered Texture Size (dim) x layers 1D=(16384) x 2048, 2D=(16384,16384) x 2048
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 49152 bytes
Total number of registers available per block: 32768
Warp size: 32
Maximum number of threads per multiprocessor: 1536
Maximum number of threads per block: 1024
Maximum sizes of each dimension of a block: 1024 x 1024 x 64
Maximum sizes of each dimension of a grid: 65535 x 65535 x 65535
Maximum memory pitch: 2147483647 bytes
Texture alignment: 512 bytes
Concurrent copy and execution: Yes with 2 copy engine(s)
Run time limit on kernels: No
Integrated GPU sharing Host Memory: No
Support host page-locked memory mapping: Yes
Concurrent kernel execution: Yes
Alignment requirement for Surfaces: Yes
Device has ECC support enabled: No
Device is using TCC driver mode: No
Device supports Unified Addressing (UVA): Yes
Device PCI Bus ID / PCI location ID: 17 / 0
Compute Mode:
< Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 4, Device = Tesla S2050, Device = Tesla S2050

View file

@ -0,0 +1,35 @@
# 18% error on CIFAR-10 in 20 minutes - layer definition file
# Reduce all learning rates by factor of 10 after 120 epochs.
# Then another factor of 10 after 10 more epochs.
[conv1]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[conv2]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[conv3]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[fc10]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=1
[logprob]
coeff=1

View file

@ -0,0 +1,33 @@
# 19% error on CIFAR-10 in 20 minutes - layer parameter file
# Set wc to 0 for translations -- 14.2%
[conv1]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[conv2]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[conv3]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[fc10]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=3
[logprob]
coeff=1

View file

@ -0,0 +1,39 @@
# 26% error on CIFAR-10 in 80 seconds - layer parameter file
[conv1]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[conv2]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[conv3]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[fc64]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=.03
[fc10]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=.03
[logprob]
coeff=1

View file

@ -0,0 +1,40 @@
# 12% error on CIFAR-10 - layer parameter file
# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
[conv1]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.00
[conv2]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.00
[local3]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[local4]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[fc10]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[logprob]
coeff=1

View file

@ -0,0 +1,40 @@
# 13% error on CIFAR-10 - layer parameter file
# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
[conv1]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.00
[conv2]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.00
[local3]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[local4]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[fc10]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
[logprob]
coeff=1

View file

@ -0,0 +1,44 @@
[conv32]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0
[local32]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0
[fc1024]
momW=0.9
momB=0.9
epsW=0.00001
epsB=0.00002
wc=0
[conv32-2]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0
[conv32-3]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0
[fc10]
epsW=0.0001,0.001
epsB=0.002
momW=0.5,0.9
momB=0.9
wc=0,0
[logprob]
coeff=1

View file

@ -0,0 +1,66 @@
[conv32a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0
[conv32b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0
[conv32c]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0
[fc10]
wc=0
momB=0
momW=0
epsW=0.00001
epsB=0.00002
[fc16a]
wc=0,0,0
momB=0
momW=0,0,0
epsW=0.00001,0.1,0.1
epsB=0.00002
[fc16b]
wc=0,0,0
momB=0
momW=0,0,0
epsW=0.00001,0.1,0.1
epsB=0.00002
[fc16c]
wc=0,0,0
momB=0
momW=0,0,0
epsW=0.00001,0.1,0.1
epsB=0.00002
[logreg]
coeff=1
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=0.25
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=0.25
[rnorm1c]
scale=0.0001
pow=0.75
minDiv=0.25

View file

@ -0,0 +1,109 @@
# 18% error on CIFAR-10 in 20 minutes - layer definition file
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv1]
type=conv
inputs=data
channels=3
filters=32
padding=2
stride=1
filterSize=5
neuron=relu
initW=0.0001
partialSum=4
sharedBiases=1
[pool1]
type=pool
pool=max
inputs=conv1
start=0
sizeX=3
stride=2
outputsX=0
channels=32
[rnorm1]
type=rnorm
inputs=pool1
channels=32
sizeX=3
scale=0.00005
pow=.75
[conv2]
type=conv
inputs=rnorm1
filters=32
padding=2
stride=1
filterSize=5
channels=32
neuron=relu
initW=0.01
partialSum=4
sharedBiases=1
[pool2]
type=pool
pool=avg
inputs=conv2
start=0
sizeX=3
stride=2
outputsX=0
channels=32
[rnorm2]
type=rnorm
inputs=pool2
channels=32
sizeX=3
scale=0.00005
pow=.75
[conv3]
type=conv
inputs=rnorm2
filters=64
padding=2
stride=1
filterSize=5
channels=32
neuron=relu
initW=0.01
partialSum=4
sharedBiases=1
[pool3]
type=pool
pool=avg
inputs=conv3
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[fc10]
type=fc
outputs=10
inputs=pool3
initW=0.01
neuron=ident
[probs]
type=softmax
inputs=fc10
[logprob]
type=cost.logreg
inputs=labels,probs

View file

@ -0,0 +1,93 @@
# 19% error on CIFAR-10 in 20 minutes - layer definition file
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv1]
type=conv
inputs=data
channels=3
filters=32
padding=2
stride=1
filterSize=5
neuron=relu
initW=0.0001
partialSum=1
sharedBiases=1
[pool1]
type=pool
pool=max
inputs=conv1
start=0
sizeX=3
stride=2
outputsX=0
channels=32
[conv2]
type=conv
inputs=pool1
filters=32
padding=2
stride=1
filterSize=5
channels=32
neuron=relu
initW=0.01
partialSum=1
sharedBiases=1
[pool2]
type=pool
pool=avg
inputs=conv2
start=0
sizeX=3
stride=2
outputsX=0
channels=32
[conv3]
type=conv
inputs=pool2
filters=64
padding=2
stride=1
filterSize=5
channels=32
neuron=relu
initW=0.01
partialSum=1
sharedBiases=1
[pool3]
type=pool
pool=avg
inputs=conv3
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[fc10]
type=fc
outputs=10
inputs=pool3
initW=0.01
neuron=ident
[probs]
type=softmax
inputs=fc10
[logprob]
type=cost.logreg
inputs=labels,probs

View file

@ -0,0 +1,100 @@
# 26% error on CIFAR-10 in 80 seconds - layer definition file
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv1]
type=conv
inputs=data
channels=3
filters=32
padding=2
stride=1
filterSize=5
neuron=relu
initW=0.0001
partialSum=4
sharedBiases=1
[pool1]
type=pool
pool=max
inputs=conv1
start=0
sizeX=3
stride=2
outputsX=0
channels=32
[conv2]
type=conv
inputs=pool1
filters=32
padding=2
stride=1
filterSize=5
channels=32
neuron=relu
initW=0.01
partialSum=4
sharedBiases=1
[pool2]
type=pool
pool=avg
inputs=conv2
start=0
sizeX=3
stride=2
outputsX=0
channels=32
[conv3]
type=conv
inputs=pool2
filters=64
padding=2
stride=1
filterSize=5
channels=32
neuron=relu
initW=0.01
partialSum=4
sharedBiases=1
[pool3]
type=pool
pool=avg
inputs=conv3
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[fc64]
type=fc
outputs=64
inputs=pool3
initW=0.1
neuron=relu
[fc10]
type=fc
outputs=10
inputs=fc64
initW=0.1
neuron=ident
[probs]
type=softmax
inputs=fc10
[logprob]
type=cost.logreg
inputs=labels,probs

View file

@ -0,0 +1,92 @@
# 19% error on CIFAR-10 in 20 minutes - layer definition file
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv1]
type=conv
inputs=data
channels=3
filters=64
padding=2
stride=1
filterSize=5
neuron=relu
initW=0.0001
partialSum=4
sharedBiases=1
[pool1]
type=pool
pool=max
inputs=conv1
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[conv2]
type=conv
inputs=pool1
filters=64
padding=2
stride=1
filterSize=5
channels=64
neuron=relu
initW=0.01
partialSum=8
sharedBiases=1
[pool2]
type=pool
pool=max
inputs=conv2
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[local3]
type=local
inputs=pool2
filters=32
padding=1
stride=1
filterSize=3
channels=64
neuron=relu
initW=0.04
[local4]
type=local
inputs=local3
filters=32
padding=1
stride=1
filterSize=3
channels=32
neuron=relu
initW=0.04
[fc10]
type=fc
outputs=10
inputs=local4
initW=0.01
neuron=ident
[probs]
type=softmax
inputs=fc10
[logprob]
type=cost.logreg
inputs=labels,probs

View file

@ -0,0 +1,93 @@
# 13% error on CIFAR-10 in 20 minutes - layer definition file
# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv1]
type=conv
inputs=data
channels=3
filters=64
padding=2
stride=1
filterSize=5
neuron=relu
initW=0.0001
partialSum=4
sharedBiases=1
[pool1]
type=pool
pool=max
inputs=conv1
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[conv2]
type=conv
inputs=pool1
filters=64
padding=2
stride=1
filterSize=5
channels=64
neuron=relu
initW=0.01
partialSum=8
sharedBiases=1
[pool2]
type=pool
pool=max
inputs=conv2
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[local3]
type=local
inputs=pool2
filters=32
padding=1
stride=1
filterSize=3
channels=64
neuron=relu
initW=0.04
[local4]
type=local
inputs=local3
filters=32
padding=1
stride=1
filterSize=3
channels=32
neuron=relu
initW=0.04
[fc10]
type=fc
outputs=10
inputs=local4
initW=0.01
neuron=ident
[probs]
type=softmax
inputs=fc10
[logprob]
type=cost.logreg
inputs=labels,probs

View file

@ -0,0 +1,115 @@
# This is a layer configuration file that contains all the
# layer types supported by this code. It's not actually good for anything
# other than demonstrating how layers are specified and connected to one another.
# Note: this file has gotten so big that the resultant net will not run on anything short of a 3GB GTX 580.
# But there's no particular reason to run the net specified by this file. It's not actually good.
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv32]
type=conv
inputs=data
channels=3
filters=32
padding=4
stride=1
filterSize=9
neuron=logistic
initW=0.00001
partialSum=1
sharedBiases=true
[local32]
type=local
inputs=conv32
channels=32
filters=32
padding=4
stride=1
filterSize=9
neuron=logistic
initW=0.00001
[fc1024]
type=fc
outputs=1024
inputs=data
initW=0.001
neuron=relu
[maxpool]
type=pool
pool=max
inputs=local32
start=0
sizeX=4
stride=2
outputsX=0
channels=32
[rnorm1]
type=rnorm
inputs=maxpool
channels=32
sizeX=5
scale=0.0000125
pow=0.75
[cnorm1]
type=cnorm
inputs=rnorm1
channels=32
sizeX=7
scale=0.001
pow=0.5
[conv32-2]
type=conv
inputs=cnorm1
groups=4
channels=32
filters=32
padding=2
stride=1
filterSize=5
neuron=relu
initW=0.0001
partialSum=1
sharedBiases=false
[conv32-3]
type=conv
inputs=conv32-2
groups=4
channels=128
filters=32
padding=2
stride=2
filterSize=5
neuron=relu
initW=0.0001
partialSum=1
randSparse=true
filterChannels=64
[fc10]
type=fc
outputs=10
inputs=conv32-3,fc1024
initW=0.0001,0.0001
neuron=ident
[probs]
type=softmax
inputs=fc10
[logprob]
type=cost.logreg
inputs=labels,probs

View file

@ -0,0 +1,112 @@
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv32a]
type=conv
inputs=data
filters=16
padding=0
stride=1
filterSize=3
channels=3
neuron=relu
initW=0.3
initB=1
partialSum=1
sharedBiases=true
gpu=0
[conv32b]
type=conv
inputs=data
filters=16
padding=0
stride=1
filterSize=3
channels=3
neuron=relu
initW=0.3
initB=1
partialSum=1
sharedBiases=true
gpu=1
[conv32c]
type=conv
inputs=data
filters=16
padding=0
stride=1
filterSize=3
channels=3
neuron=relu
initW=0.3
initB=1
partialSum=1
sharedBiases=true
gpu=2
[rnorm1a]
type=cmrnorm
inputs=conv32a
channels=16
size=5
[rnorm1b]
type=cmrnorm
inputs=conv32b
channels=16
size=5
[rnorm1c]
type=cmrnorm
inputs=conv32c
channels=16
size=5
[fc16a]
type=fc
outputs=16
inputs=rnorm1a,rnorm1b,rnorm1c
initW=0.1,0.1,0.1
gpu=0
[fc16b]
type=fc
outputs=16
inputs=rnorm1b,rnorm1c,rnorm1a
initW=0.1,0.1,0.1
gpu=1
[fc16c]
type=fc
outputs=16
inputs=rnorm1c,rnorm1a,rnorm1a
initW=0.1,0.1,0.1
gpu=2
[concat]
type=concat
inputs=fc16a,fc16c,fc16b
[fc10]
type=fc
inputs=concat
outputs=10
initW=0.08
gpu=0
[probs]
type=softmax
inputs=fc10
gpu=0
[logreg]
type=cost.logreg
inputs=labels,probs
gpu=0

78
findsimilar.py Executable file
View file

@ -0,0 +1,78 @@
import os
import sys
from getopt import getopt
import numpy as n
import numpy.random as nr
from time import time
from util import *
import pylab as pl
import gc
imnet_dir = '/storage2/imnet-contest'
ftr_dir = '/storage2/imnet-features-4096'
TEST_IMGS = 128
TOP_IMGS = 16
TEST_BATCH = 'data_batch_3000'
IMG_SIZE = 256
IMGS_PER_FIGURE = 16
def draw_fig(test_imgs, tops):
for f in xrange(TEST_IMGS/IMGS_PER_FIGURE):
pl.figure(f+1, figsize=(15,15))
pl.clf()
bigpic = n.zeros((3, (IMG_SIZE+1)*IMGS_PER_FIGURE - 1, (IMG_SIZE+1)*(1+TOP_IMGS) + 3), dtype=n.single)
for i in xrange(IMGS_PER_FIGURE):
img_idx = f * IMGS_PER_FIGURE + i
bigpic[:, (IMG_SIZE+1) * i:(IMG_SIZE+1)*i+IMG_SIZE,:IMG_SIZE] = test_imgs[:,img_idx].reshape(3, IMG_SIZE, IMG_SIZE)
for j in xrange(TOP_IMGS):
if tops[img_idx][j]['img'] is not None:
bigpic[:, (IMG_SIZE+1) * i:(IMG_SIZE+1)*i+IMG_SIZE,IMG_SIZE + 4 + j*(IMG_SIZE+1):IMG_SIZE + 4 + j*(IMG_SIZE+1)+IMG_SIZE] = tops[img_idx][j]['img'].reshape(3, IMG_SIZE, IMG_SIZE)
bigpic /= 255
pl.imshow(bigpic.swapaxes(0,1).swapaxes(1,2), interpolation='lanczos')
if __name__ == "__main__":
(options, args) = getopt(sys.argv[1:], "")
options = dict(options)
# Take 128 images from test batch
dic = unpickle(os.path.join(ftr_dir, TEST_BATCH))
p = nr.permutation(dic['data'].shape[0])[:TEST_IMGS]
data = dic['data'][p,:]
labels = dic['labels'][:,p]
dicimgs = unpickle(os.path.join(imnet_dir, TEST_BATCH))
test_imgs = dicimgs['data'][:,p]
tops = [[{'dist': n.inf, 'batch': 0, 'idx': 0, 'img': None} for i in xrange(TOP_IMGS)] for j in xrange(TEST_IMGS)]
pl.ion()
for b in xrange(1, 1335):
dic = unpickle(os.path.join(ftr_dir, 'data_batch_%d' % b))
dicimgs = unpickle(os.path.join(imnet_dir, 'data_batch_%d' % b))
t = time()
dists = [n.sum((data[i,:] - dic['data'])**2, axis=1) for i in xrange(TEST_IMGS)]
minidx = [d.argmin() for d in dists]
print dists[0].shape
for i, dist, midx, top in zip(xrange(TEST_IMGS), dists, minidx, tops):
k = TOP_IMGS
while k > 0 and dist[midx] < top[k - 1]['dist']:
k -= 1
if k < TOP_IMGS:
top.insert(k, {'dist': dist[midx], 'batch': b, 'idx': midx, 'img': dicimgs['data'][:,midx].copy()})
top.pop()
#print top
del dic
del dicimgs
del dists
del minidx
gc.collect()
#print tops
print "Finished training batch %d (%f sec)" % (b, time() - t)
if b % 50 == 0:
draw_fig(test_imgs, tops)
pl.draw()
pl.ioff()
draw_fig(test_imgs, tops)
pl.show()

40
fix-big-imgnet.py Executable file
View file

@ -0,0 +1,40 @@
import os
import sys
from PIL import Image
from StringIO import StringIO
from util import *
src = '/ais/gobi3/u/ilya/jpg_valid_2010_85/'
dst = '/ais/gobi3/u/kriz/lsvrc-2010-jpg/'
BATCH_SIZE = 1024
def save_batch(c_strings, c_labels, c_wnids, out_b):
pickle(os.path.join(dst, 'data_batch_%d' % out_b), (c_strings, c_labels, c_wnids))
return out_b + 1
if __name__ == "__main__":
c_strings = []
c_labels = []
c_wnids = []
out_b = 2000
for b in xrange(49):
failed = 0
strings, sizes, labels = unpickle(os.path.join(src, '%s' % b))
for s,l in zip(strings, labels):
try:
im = Image.open(StringIO(s)).convert('RGB')
c_strings += [s]
c_labels += [l[1]]
c_wnids += [l[0]]
if len(c_strings) == BATCH_SIZE:
out_b = save_batch(c_strings, c_labels, c_wnids, out_b)
c_strings = []
c_labels = []
c_wnids = []
except IOError,e:
failed += 1
print "Batch %d failed: %d" % (b, failed)
if len(c_strings) > 0:
save_batch(c_strings, c_labels, c_wnids, out_b)

41
fix-flickr.py Executable file
View file

@ -0,0 +1,41 @@
import os
import sys
from PIL import Image
from StringIO import StringIO
from util import *
src = '/ais/gobi3/u/ilya/flickr_85/'
dst = '/ais/gobi3/u/kriz/flickr-85-1024/'
BATCH_SIZE = 2048
def save_batch(c_strings, c_sizes, c_labels, out_b):
pickle(os.path.join(dst, 'data_batch_%d' % out_b), (c_strings, c_sizes, c_labels))
return out_b + 1
if __name__ == "__main__":
c_strings = []
c_sizes = []
c_labels = []
out_b = 1
for b in xrange(977):
failed = 0
strings, sizes, labels = unpickle(os.path.join(src, '%s' % b))
for s,z,l in zip(strings, sizes, labels):
try:
im = Image.open(StringIO(s)).convert('RGB')
c_strings += [s]
c_sizes += [z]
c_labels += [l]
if len(c_strings) == BATCH_SIZE:
out_b = save_batch(c_strings, c_sizes, c_labels, out_b)
c_strings = []
c_sizes = []
c_labels = []
except IOError,e:
failed += 1
print "Batch %d failed: %d" % (b, failed)
if len(c_strings) > 0:
save_batch(c_strings, c_sizes, c_labels, out_b)

65
gen-py-interface.py Executable file
View file

@ -0,0 +1,65 @@
import sys
import re
import os
MODEL_CONSTRUCTOR = """ConvNet::ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID)"""
pytype_mappings = {"float": "",
"int": "",
"bool":"",
"PyListObject": "PyList_Type"}
argstring_mappings = {"float": "d",
"bool":"i",
"int": "i"}
init_type_mappings = {"float": "double",
"int": "int",
"bool":"int",
"PyListObject": "PyListObject*"}
if __name__ == "__main__":
m = re.match(r"^(\w+)::\w+\((.*)\)$", MODEL_CONSTRUCTOR, re.MULTILINE | re.DOTALL)
model_name = m.group(1)
model_params = m.group(2).split(',')
template = ""
with open('./pyInterface.cutemp', 'r') as f:
template = ''.join(line for line in f)
template = template.replace("${MODEL_NAME}", model_name)
template = template.replace("${MODEL_NAME_LOWER}", model_name.lower())
init_vars = ""
init_parse = ""
arg_string = ""
model_preamble = ""
model_start = " model = new %s(" % model_name
space_padding = len(model_start)
numVectors = 0
for i,p in enumerate(model_params):
param = p.strip().split(' ')
ptype = re.match("^([\w<>\*]+)", param[0]).group(1).strip('*')
pname = param[1].strip('*')
pname = "py" + pname[0].upper() + pname[1:]
if ptype not in pytype_mappings:
print "Unknown type: %s" % ptype
sys.exit(1)
mapping = pytype_mappings[ptype]
if mapping == "":
arg_string += argstring_mappings[ptype]
init_parse += " &%s" % pname
else:
arg_string += "O!"
init_parse += " &%s, &%s" % (mapping, pname)
model_start += "%*s%s" % (space_padding * (i>0), "", pname)
if i < len(model_params) - 1:
init_parse += ",\n"
model_start += ",\n"
init_vars += " %s %s;\n" % (init_type_mappings[ptype], pname)
model_start += ");\n"
template = template.replace("${INIT_VARS}", init_vars)
template = template.replace("${INIT_PARSE}", init_parse)
template = template.replace("${ARG_STRING}", arg_string)
template = template.replace("${MODEL_START}", model_preamble + model_start)
print template

163
include/convnet.cuh Normal file
View file

@ -0,0 +1,163 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CONVNET3
#define CONVNET3
#include <vector>
#include <string>
#include <set>
#include <map>
#include <helper_cuda.h>
#include <time.h>
#include <queue.h>
#include <thread.h>
#include <math.h>
#include <sync.h>
#include <quantizer.cuh>
#include <messages.cuh>
#include <pipedispenser.cuh>
#include "layer.cuh"
#include "data.cuh"
#include "worker.cuh"
#include "weights.cuh"
#include "hostmem.cuh"
class Worker;
class WorkResult;
class Layer;
class DataLayer;
class CostLayer;
class ConvNetGPU;
class ConvNet : public Thread {
protected:
std::map<std::string,Layer*> _layerMap;
std::vector<DataLayer*> _dataLayers;
std::vector<ConvNetGPU*> _convNetThreads; // List of convnet threads
DataProvider* _dp;
CPUData* _data;
ThreadSynchronizer* _sync;
PipeDispenser* _pd;
intv* _deviceIDs;
std::vector<intv*>* _deviceCPUs;
Queue<Worker*> _workerQueue;
Queue<WorkResult*> _resultQueue;
Queue<Message*> _msgQueue;
int _numFwdTerminal, _numBwdTerminal;
int _weightUpdateFreq, _numBwdMiniPasses;
// For gradient checking
int _numFailures;
int _numTests;
// Training progress (between 0 and 1).
// Used to determine learning rate based on LearningRateSchedule.
double _trainingProgress;
double _baseErr;
void waitForTerminals(int numMsgs, MESSAGES msg);
void sendMessage(MESSAGES msg, bool sync);
void findBwdTerminal(Layer& l, std::set<std::string>& visited, std::set<std::string> &terminal);
void* run();
public:
ConvNet(PyObject* layerParams, intv& deviceIDs, std::vector<intv*>& deviceCPUs, int minibatchSize, int weightUpdateFreq);
Queue<Message*>& getMessageQueue();
Queue<Worker*>& getWorkerQueue();
Queue<WorkResult*>& getResultQueue();
DataProvider& getDataProvider();
Layer& operator[](string& name);
Layer& getLayer(string& name);
void copyToCPU();
void copyToGPU();
void updateWeights();
void reset();
void bprop(PASS_TYPE passType);
void fprop(PASS_TYPE passType);
void fprop(int miniIdx, PASS_TYPE passType);
void fprop(CPUData& data, PASS_TYPE passType);
void setTrainingProgress(double progress);
double getTrainingProgress() const;
bool checkGradient(const std::string& name, float eps, Weights& weights);
void checkGradients();
Cost& getCost();
Cost& getCost(Cost& cost);
double getCostValue();
int getDeviceID(int gpuIdx);
intv& getDeviceIDs();
ThreadSynchronizer& getSync();
void syncWithChildren();
int getWeightUpdateFreq();
int getNumBwdMiniPasses();
int getMinibatchSize();
PipeDispenser& getPipeDispenser();
};
class ConvNetGPU : public Thread {
protected:
std::map<std::string,Layer*> _layerMap;
std::vector<CostLayer*> _costs;
ConvNet* _convNet;
int _deviceID;
Queue<Message*> _msgQueue;
void initCuda();
virtual void initLayer(PyObject* paramsDict);
void* run();
void copyToCPU();
void copyToGPU();
void updateWeights();
void reset();
public:
ConvNetGPU(PyObject* layerList, int deviceID, intv& deviceCPUs, ConvNet* convNet);
std::map<std::string, Layer*>& getLayerMap();
void bprop(PASS_TYPE passType);
void fprop(PASS_TYPE passType);
void fprop(int miniIdx, PASS_TYPE passType);
int getDeviceID();
ConvNet& getConvNet();
void enqueueMessage(Message* msg);
Queue<Message*>& getMessageQueue();
std::vector<CostLayer*>& getCostLayers();
Cost& getCost(int numCases);
Layer& operator[](string& name);
Layer& getLayer(string& name);
};
#endif /* CONVNET */

66
include/cost.cuh Normal file
View file

@ -0,0 +1,66 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef COST_CUH
#define COST_CUH
#include <vector>
#include <map>
#include <helper_cuda.h>
#include "layer.cuh"
#include "util.cuh"
class CostLayer;
/*
* Wrapper for dictionary mapping cost name to vector of returned values.
*/
class Cost {
private:
int _numCases;
CostMap _costMap;
CostCoeffMap _costCoeffMap;
public:
Cost(int numCases);
Cost(int numCases, std::vector<CostLayer*>& costs);
doublev& operator [](const std::string s);
CostMap& getCostMap();
CostCoeffMap& getCostCoeffMap();
int getNumCases();
/*
* Returns sum of first values returned by all the costs, weighted by the cost coefficients.
*/
double getValue();
Cost& operator += (Cost& er);
Cost& operator |= (Cost& er);
Cost& operator /= (const double v);
virtual ~Cost();
};
#endif /* COST_CUH */

31
include/cpuCNN.cuh Normal file
View file

@ -0,0 +1,31 @@
/*
* File: cpuFuncs.h
* Author: Alex Krizhevsky
*
* Created on September 10, 2012, 5:05 PM
*/
#ifndef CPUFUNCS_H
#define CPUFUNCS_H
#include <helper_cuda.h>
#include <softmaxtree.cuh>
/*
* weights: (numNodes, numFeatures)
* nodes: numNodesAtDepth-length array of ushort2
* where x coordinate gives node idx and y coordinate gives parent idx
* targets: (numNodes, numFeatures)
*
*/
void cpuSoftmaxTreeFwd(float* weights, float* targets, const int numFeatures, SoftmaxTree& tree);
/*
* grads: (numNodes, numFeatures)
*
*/
void cpuSoftmaxTreeBwd(float* grads, const int numFeatures, SoftmaxTree& tree);
void cpuSoftmaxTreeUpdateWeights(float* weights, float* weightsInc, float* weightsGrad,
const int numFeatures, float eps, const float mom, float wc, SoftmaxTree& tree);
#endif /* CPUFUNCS_H */

111
include/data.cuh Normal file
View file

@ -0,0 +1,111 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DATA_CUH
#define DATA_CUH
#include <vector>
#include <algorithm>
#include "util.cuh"
class Data {
protected:
MatrixV* _data;
void assertDimensions() {
assert(_data->size() > 0);
for (int i = 1; i < _data->size(); i++) {
assert(_data->at(i-1)->getNumCols() == _data->at(i)->getNumCols());
assert(_data->at(i-1)->isTrans() == _data->at(i)->isTrans());
}
assert(_data->at(0)->getNumCols() > 0);
}
public:
typedef typename MatrixV::iterator T_iter;
// Cases in columns, but array may be transposed
// (so in memory they can really be in rows -- in which case the array is transposed
// during the copy to GPU).
Data(PyObject* pyData) {
_data = getMatrixV(pyData);
assertDimensions();
}
Data(MatrixV* data) : _data(data) {
assertDimensions();
}
~Data() {
for (T_iter it = _data->begin(); it != _data->end(); ++it) {
delete *it;
}
delete _data;
}
Matrix& operator [](int idx) const {
return *_data->at(idx);
}
int getSize() const {
return _data->size();
}
MatrixV& getData() const {
return *_data;
}
Matrix& getData(int i) const {
return *_data->at(i);
}
bool isTrans() const {
return _data->at(0)->isTrans();
}
int getNumCases() const {
return _data->at(0)->getNumCols();
}
};
typedef Data CPUData;
class DataProvider {
protected:
CPUData* _hData;
NVMatrixV _data;
int _minibatchSize;
public:
DataProvider(int minibatchSize);
void setData(CPUData&);
void clearData();
CPUData& getMinibatch(int idx);
CPUData& getDataSlice(int startCase, int endCase);
int getNumMinibatches();
int getMinibatchSize();
int getNumCases();
int getNumCasesInMinibatch(int idx);
};
#endif /* DATA_CUH */

51
include/hostmem.cuh Normal file
View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HOSTMEM_CUH
#define HOSTMEM_CUH
#include <helper_cuda.h>
#include <cuda.h>
#include <cuda_runtime.h>
/*
* A utility class for transferring untyped memory from CPU to GPU and vice versa.
*/
class PinnedHostMem {
protected:
uint _numBytes;
void* _data;
public:
PinnedHostMem();
~PinnedHostMem();
void resize(uint bytes);
void copyFrom(void* src, uint bytes);
void copyTo(void* dst);
void* getData();
};
#endif /* HOSTMEM_CUH */

654
include/layer.cuh Normal file
View file

@ -0,0 +1,654 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LAYER_CUH
#define LAYER_CUH
#include <algorithm>
#include <string>
#include <vector>
#include <map>
#include <assert.h>
#include <nvmatrix.cuh>
#include <multisoftmax.h>
#include <helper_timer.h>
#include "convnet.cuh"
#include "cost.cuh"
#include "weights.cuh"
#include "neuron.cuh"
#include "data.cuh"
#include "layer_kernels.cuh"
#include "hostmem.cuh"
#include "softmaxtree.cuh"
#include "pipedispenser.cuh"
class Cost;
class ConvNet;
class ConvNetGPU;
class CostLayer;
class DataLayer;
//class Message;
//class FpropMessage;
// The input matrix here is the squared norm.
// This replaces the squared norm with:
// 1 if it is below the threshold given by norm2
// norm/sqrt(a) otherwise -- i.e. the desired norm (not squared)
class WeightConstraintOperator {
private:
float _norm, _norm2;
public:
WeightConstraintOperator(float norm) : _norm(norm), _norm2(norm*norm) {
}
__device__ inline float operator()(const float a) const {
return a > _norm2 ? __fdividef(_norm, sqrtf(a)) : 1.0f;
}
};
class WeightContrastNormOperator {
private:
float _min, _max, _scale;
public:
WeightContrastNormOperator(float min, float max, float scale) : _min(min), _max(max), _scale(scale) {
}
__device__ inline float operator()(float a) const {
a = sqrtf(a) * _scale;
return a < _min ? __fdividef(_min, a) : a > _max ? __fdividef(_max, a) : 1.0f;
}
};
/*
* Abstract layer.
*/
class Layer {
protected:
ConvNetGPU* _convNetGPU;
std::vector<Layer*> _prev, _next;
int _rcvdFInputs;
std::map<int, int> _rcvdBInputs;
int _rcvdBInputMsgs;
int _numOutputs;
NVMatrixV _inputs;
std::map<int, NVMatrix*> _outputs;
std::map<int, NVMatrix*> _actsGrad; // Layer activity gradients
bool _gradConsumer, _foundGradConsumers, _trans;
bool _conserveMem;
bool _bwdTerminal;
int _numGradProducersNext;
int _actsTarget, _actsGradTarget;
std::string _name, _type;
int _deviceID;
intv _nextDeviceIDs;
HostNVMatrix _hostMemFwd, _hostMemBwd;
Quantizer* _fwdQuantizer, *_bwdQuantizer;
virtual void fpropNext(PASS_TYPE passType);
virtual void truncBwdActs();
virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) = 0;
virtual void bpropCommon(NVMatrix& v, PASS_TYPE passType) {
// Do nothing by default
}
virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
assert(!isGradProducer()); // Only do nothing if not grad producer
}
void shuffle(intv& v);
public:
static bool _saveActsGrad, _saveActs;
Layer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans);
virtual void fprop(PASS_TYPE passType);
void fprop(NVMatrix& v, PASS_TYPE passType);
virtual void fprop(NVMatrixV& v, PASS_TYPE passType);
virtual void bprop(PASS_TYPE passType);
virtual void bprop(NVMatrix& v, PASS_TYPE passType);
virtual void reset();
int getNumCases(NVMatrix& v);
int incRcvdBInputs(int deviceID);
int getRcvdFInputs();
int getRcvdBInputs(int deviceID);
int incRcvdBInputMsgs();
bool isGradConsumer();
bool hasGradProducerNext(std::string& layerName);
// Does this layer produce a gradient for any layer?
virtual bool isGradProducer();
// Does this layer produce a gradient for layer of given name?
virtual bool isGradProducer(std::string& layerName);
std::string& getName();
std::string& getType();
void addNext(Layer* l);
void addPrev(Layer* l);
std::vector<Layer*>& getPrev();
std::vector<Layer*>& getNext();
virtual NVMatrix& getActs();
virtual NVMatrix& getActs(int deviceID);
virtual NVMatrix& getActsGrad(int deviceID);
virtual NVMatrix& getActsGrad();
virtual void postInit();
int getDeviceID();
ConvNetGPU& getConvNetGPU();
ConvNet& getConvNet();
PipeDispenser& getPipeDispenser();
void setBwdTerminal(bool t);
// Do nothing if this layer has no weights
virtual bool updateWeights() {
return false;
}
virtual void checkGradients() {
}
virtual void copyToCPU() {
}
virtual void copyToGPU() {
}
};
class NeuronLayer : public Layer {
protected:
Neuron* _neuron;
string _neuronType;
virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
NeuronLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
std::string& getNeuronType();
};
class WeightLayer : public Layer {
protected:
WeightList _weights;
Weights *_biases;
float _wStep, _bStep;
bool _gradComputed;
void bpropCommon(NVMatrix& v, PASS_TYPE passType);
virtual void bpropBiases(NVMatrix& v, PASS_TYPE passType) = 0;
virtual void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) = 0;
virtual void constrainWeights() = 0;
public:
WeightLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans, bool useGrad, bool initWeights);
virtual bool updateWeights();
virtual void copyToCPU();
virtual void copyToGPU();
virtual void checkGradients();
Weights& getWeights(int idx);
};
class FCLayer : public WeightLayer {
protected:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropBiases(NVMatrix& v, PASS_TYPE passType);
void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
virtual void constrainWeights();
public:
FCLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool useGrad, bool initWeights);
FCLayer();
};
class TreeFCLayer : public FCLayer {
protected:
TreeWeights* _treeWeights;
static void makeTree(PyObject* pyTree, SoftmaxNode& rootNode);
void constrainWeights();
public:
TreeFCLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
void checkGradients();
};
class SoftmaxLayer : public Layer {
protected:
bool _doLogregGrad;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
SoftmaxLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
void setDoLogregGrad(bool b);
};
class ConcatenationLayer : public Layer {
protected:
intv* _copyOffsets;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
ConcatenationLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
void setDoLogregGrad(bool b);
};
class EltwiseSumLayer : public Layer {
protected:
floatv* _coeffs;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
EltwiseSumLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class EltwiseMaxLayer : public Layer {
protected:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
EltwiseMaxLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class DataLayer : public Layer {
protected:
bool _useBuffer;
int _dataIdx;
int _bufferMinibatchIdx;
std::map<int, NVMatrix*> _outputs2; // Buffer for copying data during computation
CPUData* _bufferData;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void postInit();
void copyData(CPUData& data, bool other);
void fpropNext(PASS_TYPE passType);
public:
DataLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
NVMatrix& getActs(int deviceID);
NVMatrix& getActs(int deviceID, bool other);
bool isGradProducer();
void fprop(PASS_TYPE passType);
void fprop(NVMatrixV& data, PASS_TYPE passType);
void setBuffer(CPUData& data, int minibatchIdx);
void startFprop(CPUData& data, PASS_TYPE passType);
void startFpropFromBuffer(PASS_TYPE passType);
int getBufferMinibatchIdx();
CPUData* getBufferData();
};
class LocalLayer : public WeightLayer {
protected:
struct FilterConns {
int* hFilterConns;
int* dFilterConns;
};
vector<FilterConns>* _filterConns;
intv* _padding, *_stride, *_filterSize, *_channels, *_imgSize, *_groups;
intv* _imgPixels, *_filterPixels, *_filterChannels, *_overSample, *_randSparse;
int _modulesX, _modules, _numFilters;
void copyToGPU();
public:
LocalLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool useGrad);
};
class ConvLayer : public LocalLayer {
protected:
int _partialSum;
bool _sharedBiases;
floatv* _weightContrastNormMin, *_weightContrastNormMax;
NVMatrix _weightGradTmp, _actGradTmp;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropBiases(NVMatrix& v, PASS_TYPE passType);
void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
void truncBwdActs();
void constrainWeights();
public:
ConvLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class LocalUnsharedLayer : public LocalLayer {
protected:
NVMatrix _sexMask;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropBiases(NVMatrix& v, PASS_TYPE passType);
void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
void constrainWeights();
public:
LocalUnsharedLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class PoolLayer : public Layer {
protected:
int _channels, _sizeX, _start, _stride, _outputsX;
int _imgSize;
string _pool;
public:
PoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans);
static PoolLayer& makePoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class AvgPoolLayer : public PoolLayer {
protected:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
AvgPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class MaxPoolLayer : public PoolLayer {
protected:
bool _abs;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
MaxPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool abs);
};
class RandomPoolLayer : public PoolLayer {
protected:
bool _doMax;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
RandomPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class RandomScaleLayer : public Layer {
protected:
int _channels, _imgSize, _tgtSize, _minScaledSize;
float _maxScale; // should be >= 1
NVMatrix _rescaledActs;
std::vector<double> _scaleProbs;
public:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
RandomScaleLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class NailbedLayer : public Layer {
protected:
int _channels, _start, _stride, _outputsX;
int _imgSize;
public:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
NailbedLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class GaussianBlurLayer : public Layer {
protected:
int _channels;
Matrix* _hFilter;
NVMatrix _filter;
NVMatrix _actGradsTmp;
public:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
void copyToGPU();
GaussianBlurLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class HorizontalReflectionLayer : public Layer {
protected:
int _channels, _imgSize;
public:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
HorizontalReflectionLayer(ConvNetGPU* convNet, PyObject* paramsDict);
};
class ResizeLayer : public Layer {
protected:
int _channels;
float _scale;
int _imgSize, _tgtSize;
public:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
ResizeLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class HiddenSexLayer : public Layer {
protected:
bool _enable;
float _keep;
NVMatrix _sexMask;
public:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
void truncBwdActs();
HiddenSexLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class RGBToYUVLayer : public Layer {
public:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
RGBToYUVLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class RGBToLABLayer : public Layer {
protected:
bool _center;
public:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
RGBToLABLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class ResponseNormLayer : public Layer {
protected:
int _channels, _size;
float _scale, _pow;
NVMatrix _denoms;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
void truncBwdActs();
public:
ResponseNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class CrossMapResponseNormLayer : public ResponseNormLayer {
protected:
bool _blocked;
float _minDiv;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
CrossMapResponseNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class ContrastNormLayer : public ResponseNormLayer {
protected:
int _imgSize;
NVMatrix _meanDiffs;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
void truncBwdActs();
public:
ContrastNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class CostLayer : public Layer {
protected:
float _coeff;
doublev _costv;
public:
CostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans);
void bprop(NVMatrix& v, PASS_TYPE passType);
// void bprop(PASS_TYPE passType); // Pure idiocy... it won't compile without this useless definition.
void fprop(PASS_TYPE passType);
virtual doublev& getCost();
float getCoeff();
bool isGradProducer();
void setSendTerminalMessages(bool send);
static CostLayer& makeCostLayer(ConvNetGPU* convNetGPU, string& type, PyObject* paramsDict);
};
/*
* Input 0: labels
* Input 1: softmax outputs
*/
class CrossEntCostLayer : public CostLayer {
protected:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
CrossEntCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
/*
* Input 0: labels
* Input 1: softmax outputs
*/
class LogregCostLayer : public CostLayer {
protected:
NVMatrix _correctProbs, _topkProbs;
NVMatrix _probsAccum;
int _numAccumed;
int _topk;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
LogregCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
NVMatrix& getProbsAccum();
};
/*
* Input 0: labels
* Input 1: logistic outputs
*/
class CrossEnt2CostLayer : public CostLayer {
protected:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
CrossEnt2CostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
class CrossEntOperator {
public:
__device__ inline float operator()(const float t, const float y) const {
return t * safelog(y) + (1.0f - t) * safelog(1.0f - y);
}
};
// Only for use with non-logistic units
class CrossEntGradientOperator {
private:
float _coeff;
public:
CrossEntGradientOperator(float coeff) : _coeff(coeff) {
}
__device__ inline float operator()(const float t, const float y) const {
return _coeff * (__fdividef(t, y) + __fdividef(1.0f - t, 1.0f - y));
}
};
};
/*
* Input 0: labels
* Input 1: logistic outputs
*/
class RobustFlickrCost : public CostLayer {
protected:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
RobustFlickrCost(ConvNetGPU* convNetGPU, PyObject* paramsDict);
class RobustFlickrCostOperator {
public:
__device__ inline float operator()(const float t, const float y) const {
const float d = (y-t) * (y-t);
return __logf(1 + d);// - (t * safelog(y));
}
};
// Only for use with non-logistic units
class RobustFlickrCostGradientOperator {
private:
float _coeff;
public:
RobustFlickrCostGradientOperator(float coeff) : _coeff(coeff) {
}
__device__ inline float operator()(const float t, const float y) const {
const float d = y - t;
return -_coeff * (__fdividef(2.0f * d, 1.0f + d*d) /*- __fdividef(t, y)*/);
}
};
};
class SumOfSquaresCostLayer : public CostLayer {
protected:
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
SumOfSquaresCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
/*
* Input 0: labels
* Input 1: energies
*/
class MultiSoftmaxCostLayer : public CostLayer {
protected:
NVMatrix _probsT;
Matrix _cpuProbs, _cpuLabels, _energies_T_CPU;
std::vector<Matrix*> B;
int _setSize, _numOut, _threads;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
MultiSoftmaxCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
void computeCost(bool useEnergies);
};
/*
* input 0: gates
* input 1: what to sum and square
*/
class GatedSumOfSquaresCostLayer : public CostLayer {
protected:
NVMatrix _ungated;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
GatedSumOfSquaresCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
class TICACostLayer : public CostLayer {
protected:
int _sizeX, _channels;
void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
public:
TICACostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
};
#endif /* LAYER_CUH */

65
include/layer_kernels.cuh Normal file
View file

@ -0,0 +1,65 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LAYER_KERNELS_CUH
#define LAYER_KERNELS_CUH
#include <vector>
#include <helper_cuda.h>
#include <nvmatrix.cuh>
#define LOGREG_GRAD_THREADS_X 32
#define LOGREG_GRAD_THREADS_Y 4
#define LOGREG_ERR_THREADS_X 128
#define LOGREG_ERR_THREADS_Y 1
__device__ inline float safelog(const float x) {
return x > 0.0f ? __logf(x) : -50.0f;
}
void computeCrossEntCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out);
void computeCrossEntGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add);
void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out);
void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
// Numerical stability optimization: this routine combines computeLogregGrad with computeSoftmaxGrad
// to avoi dividing and then multiplying by quantities that may be near zero.
void computeCrossEntSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add);
void MSMBackward(NVMatrix& energies, NVMatrix& bLattice, int setSize);
void MultiSoftmaxCPU(Matrix& elts, Matrix& B, Matrix& probs, int size, int fixed);
void MultiSoftmaxCPU_T(Matrix& elts, Matrix& B, Matrix& probs, Matrix& fixed, int size);
void computeMultiSoftmaxCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& energies, NVMatrix& labelLogProbs_out,
NVMatrix& correctProbs_out, NVMatrix& top5Probs_out, int setSize, bool useEnergies);
#endif /* LAYER_KERNELS_CUH */

77
include/lr.cuh Normal file
View file

@ -0,0 +1,77 @@
#ifndef LR_CUH
#define LR_CUH
#include <string>
#include <vector>
#include <iostream>
#include <helper_cuda.h>
#include <assert.h>
#include <nvmatrix.cuh>
#include <matrix.h>
#include <util.cuh>
#include <Python.h>
/*
* The maximum learning rate is _baseRate.
* The minimum learning rate is _baseRate / _tgtFactor.
*
* These classes define annealing schedules that interpolate between these
* two extrema.
*/
class LearningRateSchedule {
protected:
double _baseRate, _noiseStdev, _randnSpare;
bool _haveRandnSpare;
virtual double _getRate(double progress);
double randn();
double rand() const;
double abs(double x) const;
public:
LearningRateSchedule(double base);
LearningRateSchedule(double base, double noiseStdev);
double getRate(double progress);
double getBaseRate() const;
virtual ~LearningRateSchedule();
static LearningRateSchedule& make(PyObject* lrsDict, double base);
};
class LinearLRS : public LearningRateSchedule {
protected:
double _finalRate;
public:
LinearLRS(double base, double tgtFactor, double noiseStdev);
virtual double _getRate(double progress);
};
class ExpLRS : public LearningRateSchedule {
protected:
double _pow;
public:
ExpLRS(double baseRate, double tgtFactor, double noiseStdev);
virtual double _getRate(double progress);
};
class TanhLRS : public LearningRateSchedule {
protected:
double _alpha, _beta;
public:
TanhLRS(double baseRate, double tgtFactor, double noiseStdev);
virtual double _getRate(double progress);
};
class DiscreteExpLRS : public LearningRateSchedule {
protected:
std::vector<double> _rates;
public:
DiscreteExpLRS(double baseRate, double tgtFactor, double noiseStdev, int numSteps);
virtual double _getRate(double progress);
};
class JumpyDiscreteExpLRS : public DiscreteExpLRS {
public:
JumpyDiscreteExpLRS(double baseRate, double tgtFactor, double noiseStdev, int numSteps);
virtual double _getRate(double progress);
};
#endif /* LR_CUH */

133
include/messages.cuh Normal file
View file

@ -0,0 +1,133 @@
/*
* messages.cuh
*
* Created on: 2013-02-25
* Author: spoon
*/
#ifndef MESSAGES_CUH_
#define MESSAGES_CUH_
#include <string>
enum MESSAGES { FPROP_TERMINAL,
BPROP_TERMINAL,
BPROP_READY,
FPROP_READY,
SYNC,
COPY_TO_CPU,
COPY_TO_GPU,
UPDATE_WEIGHTS,
RESET,
COST_COMPUTED,
BPROP_START,
// COPY,
// DEQUANTIZE,
RUNME};
class Message {
protected:
MESSAGES _messageType;
public:
MESSAGES getMessageType() {
return _messageType;
}
Message(MESSAGES messageType) : _messageType(messageType) {
}
virtual ~Message() {
}
};
/*
* A message that performs some simple function in its run method.
*/
class RunMeMessage : public Message {
public:
RunMeMessage() : Message(RUNME) {
}
virtual void run() = 0;
virtual ~RunMeMessage() {
}
};
class CopyMessage : public RunMeMessage {
protected:
NVMatrix* _src, *_tgt;
public:
CopyMessage(NVMatrix* src, NVMatrix* tgt) : _src(src), _tgt(tgt), RunMeMessage() {
}
void run() {
_src->copy(*_tgt);
}
~CopyMessage() {
assert(_src->isView());
delete _src;
}
};
class DequantizeMessage : public RunMeMessage {
protected:
Quantizer* _q;
NVMatrix *_tgt;
public:
DequantizeMessage(Quantizer* q, NVMatrix* tgt) : _q(q), _tgt(tgt), RunMeMessage() {
}
void run() {
_q->dequantize(*_tgt);
}
~DequantizeMessage() {
}
};
class PropMessage : public Message {
protected:
std::string _fromLayer, _toLayer;
PASS_TYPE _passType;
public:
std::string& getFromLayer() {
return _fromLayer;
}
std::string& getToLayer() {
return _toLayer;
}
PASS_TYPE getPassType() {
return _passType;
}
PropMessage(std::string fromLayer, std::string toLayer, PASS_TYPE passType, MESSAGES msgType)
: _fromLayer(fromLayer), _toLayer(toLayer), _passType(passType), Message(msgType) {
}
};
class FpropMessage : public PropMessage {
public:
FpropMessage(std::string fromLayer, std::string toLayer, PASS_TYPE passType)
: PropMessage(fromLayer, toLayer, passType, FPROP_READY) {
}
};
class BpropMessage : public PropMessage {
public:
BpropMessage(std::string fromLayer, std::string toLayer, PASS_TYPE passType)
: PropMessage(fromLayer, toLayer, passType, BPROP_READY) {
}
};
class BpropStartMessage : public Message {
protected:
PASS_TYPE _passType;
public:
PASS_TYPE getPassType() {
return _passType;
}
BpropStartMessage(PASS_TYPE passType)
: _passType(passType), Message(BPROP_START) {
}
};
#endif /* MESSAGES_CUH_ */

38
include/multisoftmax.h Normal file
View file

@ -0,0 +1,38 @@
/*
* File: multisoftmax.h
* Author: Alex Krizhevsky
*
* Created on May 9, 2012, 5:36 PM
*/
#ifndef MULTISOFTMAX_H
#define MULTISOFTMAX_H
#include <algorithm>
#include <thread.h>
#include <matrix.h>
#include <vector>
#ifndef DIVUP
#define DIVUP(x, y) (((x) + (y) - 1) / (y))
#endif
#define EXP exp
#define LOG log
#define INF 1e35f
class MultiSoftmaxWorker : public Thread {
protected:
Matrix* _elts, *_B, *_probs, *_fixed;
int _size;
bool _nofix;
void* run();
public:
MultiSoftmaxWorker(Matrix* elts, Matrix* B, Matrix* probs, Matrix* _fixed, int size, bool nofix);
virtual ~MultiSoftmaxWorker();
};
void MultiSoftmaxCPU_T_parallel(Matrix& elts, std::vector<Matrix*>& B, Matrix& probs, Matrix& fixed, int size, bool nofix);
#endif /* MULTISOFTMAX_H */

529
include/neuron.cuh Normal file
View file

@ -0,0 +1,529 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NEURONS_CUH
#define NEURONS_CUH
#include <assert.h>
#include <string>
#include <nvmatrix.cuh>
#include <helper_cuda.h>
template <class GradientOp>
class AddGradientBinaryOperator {
GradientOp _op;
public:
AddGradientBinaryOperator(GradientOp op) : _op(op) {
}
__device__ inline float operator()(const float unitActGrad, const float unitAct, const float target) const {
return _op(unitActGrad, unitAct) + target;
}
};
template <class GradientOp>
class AddGradientOperator {
GradientOp _op;
public:
AddGradientOperator(GradientOp op) : _op(op) {
}
__device__ inline float operator()(const float unitActGrad, const float target) const {
return target + _op(unitActGrad);
}
};
/* =======================
* Neuron
* -----------------------
*
* f(x) = x
* =======================
*/
class Neuron {
protected:
bool _activated;
// Inputs and outputs potentially point to the same matrix, depending on the neuron
NVMatrix* _inputs, *_outputs;
virtual void _activate() {
if (_inputs != _outputs) {
_inputs->copy(*_outputs);
}
}
virtual void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
if (&target != &actsGrad) {
actsGrad.copy(target);
}
}
virtual void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
if (&target != &actsGrad) {
target.add(actsGrad);
}
}
public:
Neuron() : _activated(false), _inputs(NULL), _outputs(NULL) {
}
virtual void activate(NVMatrix& inputs, NVMatrix& outputs) {
_activated = true;
_inputs = &inputs;
_outputs = &outputs;
_activate();
}
virtual void computeInputGrad(NVMatrix& actsGrad, NVMatrix& target, bool add) {
assert(_activated);
if (!add) {
target.resize(actsGrad);
_computeInputGrad(actsGrad, target);
} else {
_addInputGrad(actsGrad, target);
}
}
static Neuron& makeNeuron(PyObject* neuronDict);
};
/* =======================
* LogisticNeuron
* -----------------------
*
* f(x) = 1 / (1 + e^-x)
* =======================
*/
class LogisticNeuron : public Neuron {
protected:
void _activate() {
_inputs->apply(NVMatrixOps::Logistic(), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(LogisticGradientOperator(), *_outputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<LogisticGradientOperator>(LogisticGradientOperator()), *_outputs, target, target);
}
public:
class LogisticGradientOperator {
public:
__device__ inline float operator()(float unitActGrad, float unitAct) const {
return unitActGrad * unitAct * (1.0f - unitAct);
}
};
LogisticNeuron() : Neuron() {
}
};
/* =======================
* ReluNeuron
* -----------------------
*
* f(x) = max(0, x)
* =======================
*/
class ReluNeuron : public Neuron {
protected:
virtual void _activate() {
_inputs->apply(ReluOperator(), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(ReluGradientOperator(), *_outputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<ReluGradientOperator>(ReluGradientOperator()), *_outputs, target, target);
}
public:
class ReluOperator {
public:
__device__ inline float operator()(float x) const {
return x < 0.0f ? 0.0f : x;
}
};
class ReluGradientOperator {
public:
__device__ inline float operator()(float unitActGrad, float unitAct) const {
return unitActGrad * (unitAct > 0.0f);
}
};
ReluNeuron() : Neuron() {
}
};
/* =======================
* NoisyReluNeuron
* -----------------------
*
* f(x) = max(0, max(0, x) + gaussian noise with variance equal to max(0, x))
* =======================
*/
class NoisyReluNeuron : public ReluNeuron {
protected:
void _activate() {
ReluNeuron::_activate();
_outputs->addGaussianNoise(*_outputs, false);
_outputs->apply(ReluOperator());
}
public:
NoisyReluNeuron() : ReluNeuron() {
}
};
/* =======================
* BoundedReluNeuron
* -----------------------
*
* f(x) = min(a, max(0, x))
* =======================
*/
class BoundedReluNeuron : public Neuron {
protected:
float _a;
void _activate() {
_inputs->apply(BoundedReluOperator(_a), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(BoundedReluGradientOperator(_a), *_outputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<BoundedReluGradientOperator>(BoundedReluGradientOperator(_a)), *_outputs, target, target);
}
public:
class BoundedReluOperator {
private:
float _a;
public:
BoundedReluOperator(float a) : _a(a) {
}
__device__ inline float operator()(float x) const {
return x < 0.0f ? 0.0f : x > _a ? _a : x;
}
};
class BoundedReluGradientOperator {
private:
float _a;
public:
BoundedReluGradientOperator(float a) : _a(a) {
}
__device__ inline float operator()(float unitActGrad, float unitAct) const {
return unitActGrad * (unitAct > 0.0f) * (unitAct < _a);
}
};
BoundedReluNeuron(float a) : Neuron(), _a(a) {
}
};
/* =======================
* AbsNeuron
* -----------------------
*
* f(x) = abs(x)
* =======================
*/
class AbsNeuron : public Neuron {
protected:
void _activate() {
assert(_inputs != _outputs);
_inputs->apply(NVMatrixOps::Abs(), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(AbsGradientOperator(), *_inputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<AbsGradientOperator>(AbsGradientOperator()), *_inputs, target, target);
}
public:
class AbsGradientOperator {
public:
__device__ inline float operator()(float unitActGrad, float unitInput) const {
return unitActGrad * (unitInput > 0.0f ? 1.0f : -1.0f);
}
};
AbsNeuron() : Neuron() {
}
};
/* =======================
* TanhNeuron
* -----------------------
*
* f(x) = a*tanh(b*x)
* =======================
*/
class TanhNeuron : public Neuron {
protected:
float _a, _b;
void _activate() {
_inputs->apply(TanhOperator(_a, _b), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(TanhGradientOperator(_a, _b), *_outputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<TanhGradientOperator>(TanhGradientOperator(_a, _b)), *_outputs, target, target);
}
public:
class TanhOperator {
private:
float _a, _n2b;
public:
TanhOperator(float a, float b) : _a(a), _n2b(-2*b) {
}
virtual __device__ inline float operator()(float x) const {
return _a * (__fdividef(2.0f, 1.0f + __expf(x * _n2b)) - 1.0f);
}
};
class TanhGradientOperator {
private:
float _b, _a;
public:
TanhGradientOperator(float a, float b) : _b(b), _a(a) {
}
__device__ inline float operator()(float unitActGrad, float unitAct) const {
// const float t = (1.0f - __fdividef(unitAct, _a)) / 2.0f;
// return unitActGrad * _n4ab * (t * (t - 1.0f));
return unitActGrad * _b * (_a - __fdividef(unitAct * unitAct, _a));
}
};
TanhNeuron(float a, float b) : Neuron(), _a(a), _b(b) {
}
};
/* =======================
* DoubleReluNeuron
* -----------------------
*
* f(x) = x - a*tanh(x/a)
* =======================
*/
class DoubleReluNeuron : public Neuron {
protected:
float _a;
void _activate() {
assert(_inputs != _outputs);
_inputs->apply(DoubleReluOperator(_a), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(DoubleReluGradientOperator(_a), *_inputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<DoubleReluGradientOperator>(DoubleReluGradientOperator(_a)), *_inputs, target, target);
}
public:
class DoubleReluOperator {
private:
float _a, _n2a;
public:
DoubleReluOperator(float a) : _a(a), _n2a(-2.0f / a) {
}
virtual __device__ inline float operator()(float x) const {
return x - _a * (__fdividef(2.0f, 1.0f + __expf(_n2a * x)) - 1.0f);
}
};
class DoubleReluGradientOperator {
private:
float _n2a;
public:
DoubleReluGradientOperator(float a) : _n2a(-2.0f / a) {
}
__device__ inline float operator()(float unitActGrad, float unitInput) const {
const float tanh = __fdividef(2.0f, 1.0f + __expf(_n2a * unitInput)) - 1.0f;
return unitActGrad * (tanh*tanh);
}
};
DoubleReluNeuron(float a) : Neuron(), _a(a) {
}
};
/* =======================
* SoftReluNeuron
* -----------------------
*
* f(x) = log(1 + e^x)
* =======================
*/
class SoftReluNeuron : public Neuron {
protected:
void _activate() {
assert(_inputs != _outputs);
_inputs->apply(SoftReluOperator(), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(SoftReluGradientOperator(), *_inputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<SoftReluGradientOperator>(SoftReluGradientOperator()), *_inputs, target, target);
}
public:
class SoftReluOperator {
public:
__device__ inline float operator()(float x) const {
// This piece-wise implementation has better numerical stability than
// simply computing log(1 + e^x).
return x > 4.0f ? x : __logf(1.0f + __expf(x));
}
};
class SoftReluGradientOperator {
public:
__device__ inline float operator()(float unitActGrad, float unitInput) const {
if (unitInput > 4.0f) {
return unitActGrad;
}
const float f = __expf(unitInput);
return unitActGrad * __fdividef(f, 1.0f + f);
}
};
SoftReluNeuron() : Neuron() {
}
};
/* =======================
* SquareNeuron
* -----------------------
*
* f(x) = x^2
* =======================
*/
class SquareNeuron : public Neuron {
protected:
void _activate() {
assert(_inputs != _outputs);
_inputs->apply(NVMatrixOps::Square(), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(SquareGradientOperator(), *_inputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<SquareGradientOperator>(SquareGradientOperator()), *_inputs, target, target);
}
public:
class SquareGradientOperator {
public:
__device__ inline float operator()(float unitActGrad, float unitInput) const {
return unitActGrad * 2.0f * unitInput;
}
};
SquareNeuron() : Neuron() {
}
};
/* =======================
* SqrtNeuron
* -----------------------
*
* f(x) = sqrt(x)
* =======================
*/
class SqrtNeuron : public Neuron {
protected:
void _activate() {
_inputs->apply(NVMatrixOps::Sqrt(), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(SqrtGradientOperator(), *_outputs, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyTernary(AddGradientBinaryOperator<SqrtGradientOperator>(SqrtGradientOperator()), *_outputs, target, target);
}
public:
class SqrtGradientOperator {
public:
__device__ inline float operator()(float unitActGrad, float unitAct) const {
return __fdividef(unitActGrad, 2.0f * unitAct);
}
};
SqrtNeuron() : Neuron() {
}
};
/* =======================
* LinearNeuron
* -----------------------
*
* f(x) = a*x + b
* =======================
*/
class LinearNeuron : public Neuron {
protected:
float _a, _b;
void _activate() {
_inputs->apply(LinearOperator(_a, _b), *_outputs);
}
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.scale(_a, target);
}
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
actsGrad.applyBinary(AddGradientOperator<NVMatrixOps::MultByScalar>(NVMatrixOps::MultByScalar(_a)), target, target);
}
public:
class LinearOperator {
protected:
float _a, _b;
public:
__device__ inline float operator()(float x) const {
return _a * x + _b;
}
LinearOperator(float a, float b) : _a(a), _b(b) {
}
};
LinearNeuron(float a, float b) : Neuron(), _a(a), _b(b) {
}
};
#endif /* NEURONS_CUH */

139
include/pipedispenser.cuh Normal file
View file

@ -0,0 +1,139 @@
/*
* pipedispenser.cuh
*
* Created on: 2013-03-01
* Author: spoon
*/
#ifndef PIPEDISPENSER_CUH_
#define PIPEDISPENSER_CUH_
#include <pthread.h>
#include <set>
#include <algorithm>
#include <iterator>
#include <util.cuh>
class PipeDispenser {
protected:
int _numPipes;
seti _pipes;
pthread_mutex_t *_mutex;
void lock() {
pthread_mutex_lock(_mutex);
}
void unlock() {
pthread_mutex_unlock(_mutex);
}
public:
PipeDispenser(const seti& pipes) {
_pipes.insert(pipes.begin(), pipes.end());
_mutex = (pthread_mutex_t*)(malloc(sizeof (pthread_mutex_t)));
pthread_mutex_init(_mutex, NULL);
}
virtual ~PipeDispenser() {
pthread_mutex_destroy(_mutex);
free(_mutex);
}
virtual int getPipe(const seti& interested) = 0;
int getPipe(int interested) {
seti tmp;
tmp.insert(interested);
return getPipe(tmp);
}
virtual void freePipe(int pipe) = 0;
};
/*
* This one blocks until there is a free pipe to return.
*/
class PipeDispenserBlocking : public PipeDispenser {
protected:
pthread_cond_t *_cv;
void wait() {
pthread_cond_wait(_cv, _mutex);
}
void broadcast() {
pthread_cond_broadcast(_cv);
}
int getAvailablePipes(const seti& interested, intv& available) {
available.clear();
std::set_intersection(_pipes.begin(), _pipes.end(), interested.begin(), interested.end(), std::back_inserter(available));
return available.size();
}
public:
PipeDispenserBlocking(const seti& pipes) : PipeDispenser(pipes) {
_cv = (pthread_cond_t*)(malloc(sizeof (pthread_cond_t)));
pthread_cond_init(_cv, NULL);
}
~PipeDispenserBlocking() {
pthread_cond_destroy(_cv);
free(_cv);
}
int getPipe(const seti& interested) {
lock();
intv avail;
while (getAvailablePipes(interested, avail) == 0) {
wait();
}
int pipe = avail[0];
_pipes.erase(pipe);
unlock();
return pipe;
}
void freePipe(int pipe) {
lock();
_pipes.insert(pipe);
broadcast();
unlock();
}
};
/*
* This one returns the least-occupied pipe.
*/
class PipeDispenserNonBlocking : public PipeDispenser {
protected:
std::map<int,int> _pipeUsers;
public:
PipeDispenserNonBlocking(const seti& pipes) : PipeDispenser(pipes) {
for (seti::iterator it = pipes.begin(); it != pipes.end(); ++it) {
_pipeUsers[*it] = 0;
}
}
int getPipe(const seti& interested) {
lock();
int pipe = -1, users = 1 << 30;
for (seti::iterator it = _pipes.begin(); it != _pipes.end(); ++it) {
if (interested.count(*it) > 0 && _pipeUsers[*it] < users) {
pipe = *it;
users = _pipeUsers[*it];
}
}
if (pipe >= 0) {
_pipeUsers[pipe]++;
}
unlock();
return pipe;
}
void freePipe(int pipe) {
lock();
_pipeUsers[pipe]--;
unlock();
}
};
#endif /* PIPEDISPENSER_CUH_ */

43
include/pyconvnet.cuh Normal file
View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef PYCONVNET3_CUH
#define PYCONVNET3_CUH
#define _QUOTEME(x) #x
#define QUOTEME(x) _QUOTEME(x)
extern "C" void INITNAME();
PyObject* initModel(PyObject *self, PyObject *args);
PyObject* startBatch(PyObject *self, PyObject *args);
PyObject* finishBatch(PyObject *self, PyObject *args);
PyObject* checkGradients(PyObject *self, PyObject *args);
PyObject* syncWithHost(PyObject *self, PyObject *args);
PyObject* startMultiviewTest(PyObject *self, PyObject *args);
PyObject* startFeatureWriter(PyObject *self, PyObject *args);
PyObject* startDataGrad(PyObject *self, PyObject *args);
#endif

43
include/quantizer.cuh Normal file
View file

@ -0,0 +1,43 @@
/*
* quantizer.cuh
*
* Created on: 2013-02-15
* Author: spoon
*/
#ifndef QUANTIZER_CUH_
#define QUANTIZER_CUH_
#include <Python.h>
#include <util.cuh>
#include <string>
#include <nvmatrix.cuh>
#include <conv_util.cuh>
class Quantizer {
protected:
NVMatrix* _quantized;
int _numRows, _numCols;
bool _trans;
virtual void _quantize(NVMatrix& src, NVMatrix& tgt);
virtual void _dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput);
public:
Quantizer();
virtual ~Quantizer();
void quantize(NVMatrix& src, NVMatrix& tgt);
void dequantize(NVMatrix& tgt);
void dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput);
static Quantizer& make(PyObject* qDict);
};
class HalfQuantizer : public Quantizer {
protected:
void _quantize(NVMatrix& src, NVMatrix& tgt);
void _dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput);
public:
HalfQuantizer();
};
#endif /* QUANTIZER_CUH_ */

144
include/softmaxtree.cuh Normal file
View file

@ -0,0 +1,144 @@
/*
* File: softmaxtree.h
* Author: Alex Krizhevsky
*
* Created on September 9, 2012, 5:50 PM
*/
#ifndef SOFTMAXTREE_H
#define SOFTMAXTREE_H
#include <helper_cuda.h>
#include <string>
#include <map>
#include <vector>
#include <algorithm>
#include <assert.h>
#include <nvmatrix.cuh>
#include <matrix.h>
class SoftmaxNode;
class SoftmaxTree;
typedef std::vector<SoftmaxNode*> SoftmaxNodeV;
class SoftmaxNode {
friend class SoftmaxTree;
protected:
SoftmaxNodeV _children;
SoftmaxNode* _parent;
int _depth, _height, _size;
int _label;
/*
* Computes height for entire subtree rooted at this node and populates
* given height->nodes map.
*/
int setDistances(std::map<int, SoftmaxNodeV*>& nodeHeights,
std::map<int, SoftmaxNodeV*>& nodeDepths);
void setNodeCounts(int &nodes, int& leaves);
/*
* Compute the number of leaves in this subtree, which is a good estimate
* of the number of training cases it represents.
*/
int setSizes(ushort* nodeSizes);
public:
SoftmaxNode(SoftmaxNode* parent, int label);
~SoftmaxNode();
SoftmaxNode& addChild(int label);
int getDepth() const;
int getHeight() const;
int getLabel() const;
int getSize() const;
SoftmaxNode* getParent(); // Might be null, so must be pointer
SoftmaxNodeV& getChildren();
};
/*
* numLabels: the number of leaves in the tree (normally 1000)
* numNodes: the total number of nodes in the tree
*/
class SoftmaxTree {
friend class SoftmaxNode;
protected:
SoftmaxNode* _root;
std::map<int, SoftmaxNodeV*> _nodeHeights, _nodeDepths;
/*
* Map from depth --> ushort2[]
* where each ushort2 gives the index and parent index
* of a node at the given depth.
*/
std::map<int, ushort2*> _nodeFwdMeta;
/*
* Map from height --> ushort2[]
* where each ushort2 gives the index and number of children
* of a node at the given height.
*/
std::map<int, ushort2*> _nodeBwdMeta;
/*
* Map from height --> ushort[][]
* where each ushort[] gives children of a given node at a given height.
*/
std::map<int, ushort**> _nodeChildMeta;
/*
* An array of length numNodes with index i storing the number
* of leaves in subtree rooted at node with label i.
*/
ushort* _nodeSizes;
int _numNodes, _numLeaves;
void setDistances();
void setNodeCounts();
void setNodeSizes();
void setFwdMeta();
void setBwdMeta();
void preprocess(NVMatrix& inp);
void postprocess(NVMatrix& inp);
public:
SoftmaxTree(int rootLabel);
~SoftmaxTree();
void finalize();
SoftmaxNode& getRoot();
SoftmaxNodeV& getNodesAtHeight(int height);
SoftmaxNodeV& getNodesAtDepth(int depth);
int getHeight() const;
int getDepth() const;
int getNumLeaves() const;
int getNumNodes() const;
/*
* offsets: (numNodes, numFeatures)
* targets: (numNodes, numFeatures)
*/
void makeWeights(NVMatrix& offsets, NVMatrix& targets);
/*
* grads: (numNodes, numFeatures)
*
* The idea is that grads contains gradients for the leaves
* (i.e. the first numLabels rows), so this routine will
* distribute them up the tree.
*/
void distributeGradients(NVMatrix& grads);
/*
* inc := mom * inc - wc * epsW * weight + epsW * grad
* weight := weight + inc
*
* weights: (numNodes, numFeatures)
* incs: (numNodes, numFeatures)
* grads: (numNodes , numFeatures)
*/
void updateWeights(NVMatrix& weights, NVMatrix& incs, NVMatrix& grads, float epsWBase, float mom, float wcBase);
};
#endif /* SOFTMAXTREE_H */

113
include/util.cuh Normal file
View file

@ -0,0 +1,113 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTIL_H
#define UTIL_H
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <vector>
#include <map>
#include <set>
#include <string>
#include <sstream>
#include <string>
#include <Python.h>
#include <nvmatrix.cuh>
#include <matrix.h>
/*
* The types of passes that the convnet supports. Used in the fprop and bprop functions in
* ConvNet class. Most of the layers ignore the pass type, but some make use of it.
*/
//enum PASS_TYPE {PASS_TRAIN,
// PASS_TEST,
// PASS_GC,
// PASS_MULTIVIEW_TEST,
// PASS_MULTIVIEW_TEST_START,
// PASS_MULTIVIEW_TEST_END,
// PASS_FEATURE_GEN};
#define PASS_TYPE uint
#define PASS_TRAIN 0x1
#define PASS_TEST 0x2
#define PASS_GC 0x4
#define PASS_MULTIVIEW_TEST (PASS_TEST | 0x8)
#define PASS_MULTIVIEW_TEST_START (PASS_MULTIVIEW_TEST | 0x10)
#define PASS_MULTIVIEW_TEST_END (PASS_MULTIVIEW_TEST | 0x20)
#define PASS_FEATURE_GEN 0x40
#define HAS_FLAG(f, x) (((x) & (f)) == (f))
#define IS_MULTIVIEW_TEST(x) HAS_FLAG(PASS_MULTIVIEW_TEST, x)
#define IS_MULTIVIEW_TEST_START(x) HAS_FLAG(PASS_MULTIVIEW_TEST_START, x)
#define IS_MULTIVIEW_TEST_END(x) HAS_FLAG(PASS_MULTIVIEW_TEST_END, x)
// For gradient checking
#define GC_SUPPRESS_PASSES false
#define GC_REL_ERR_THRESH 0.02
/*
* Generates a random floating point number in the range 0-1.
*/
#define randf ((float)rand() / RAND_MAX)
typedef std::vector<Matrix*> MatrixV;
typedef std::vector<NVMatrix*> NVMatrixV;
typedef std::map<std::string,std::vector<double>*> CostMap;
typedef std::map<std::string,double> CostCoeffMap;
typedef std::vector<double> doublev;
typedef std::vector<float> floatv;
typedef std::vector<int> intv;
typedef std::vector<std::string> stringv;
typedef std::set<int> seti;
stringv* getStringV(PyObject* pyList);
floatv* getFloatV(PyObject* pyList);
intv* getIntV(PyObject* pyList);
MatrixV* getMatrixV(PyObject* pyList);
MatrixV* getMatrixV(PyObject* pyList, int len);
int* getIntA(PyObject* pyList);
int pyDictGetInt(PyObject* dict, const char* key);
intv* pyDictGetIntV(PyObject* dict, const char* key);
std::string pyDictGetString(PyObject* dict, const char* key);
float pyDictGetFloat(PyObject* dict, const char* key);
floatv* pyDictGetFloatV(PyObject* dict, const char* key);
Matrix* pyDictGetMatrix(PyObject* dict, const char* key);
MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key);
int* pyDictGetIntA(PyObject* dict, const char* key);
stringv* pyDictGetStringV(PyObject* dict, const char* key);
template<typename T>
std::string tostr(T n) {
std::ostringstream result;
result << n;
return result.str();
}
#endif /* UTIL_H */

150
include/weights.cuh Normal file
View file

@ -0,0 +1,150 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef WEIGHTS_CUH
#define WEIGHTS_CUH
#include <string>
#include <vector>
#include <iostream>
#include <helper_cuda.h>
#include <assert.h>
#include <nvmatrix.cuh>
#include <matrix.h>
#include "util.cuh"
#include "softmaxtree.cuh"
#include <lr.cuh>
using namespace std;
class Weights {
protected:
Matrix* _hWeights, *_hWeightsInc;
NVMatrix* _weights, *_weightsInc, *_weightsGrad;
NVMatrix* _weightsGradAvg, *_weightsGrad2Avg;
LearningRateSchedule* _lrs;
float _wc, _mom, _wball, _superEps;
bool _onGPU, _useGrad, _cleanup;
int _numUpdates;
// Non-NULL if these weights are really shared from some other layer
Weights* _srcWeights;
public:
class Grad2AvgOperator {
private:
float _mom;
public:
Grad2AvgOperator(float mom) : _mom(mom) {
}
__device__ inline float operator()(const float G2, const float g) const {
return _mom * G2 + (1.0f - _mom) * g * g;
}
};
NVMatrix& operator*() const;
Weights(Weights& srcWeights, LearningRateSchedule& lrs);
Weights(Matrix& hWeights, Matrix& hWeightsInc, LearningRateSchedule& lrs, float wc, float wball, float mom, float superEps, bool useGrad, bool cleanup=true);
virtual ~Weights();
virtual NVMatrix& getW() const;
virtual NVMatrix& getInc() const;
virtual NVMatrix& getGrad() const;
virtual Matrix& getCPUW() const;
virtual Matrix& getCPUWInc() const;
virtual LearningRateSchedule& getLearningRateSchedule() const;
virtual int getNumRows() const;
virtual int getNumCols() const;
virtual void copyToCPU();
// This function is assumed to be called in the order in which the layers
// were defined
virtual void copyToGPU();
virtual void update(float progress);
int incNumUpdates();
// Returns the number of times a gradient has been computed for this
// weight matrix during the current pass (interval between two calls of update())
// through the net. This number will only be greater than 1 if this weight matrix
// is *shared* by multiple layers in the net.
int getNumUpdates() const;
float getEps(float progress) const;
float getMom() const;
float getWC() const;
float getWBall() const;
bool isUseGrad() const;
bool isOwner() const;
float getSuperEps() const;
};
class TreeWeights : public Weights {
protected:
NVMatrix _effWeights;
NVMatrix* _leafWeights, *_leafGrad, *_leafInc;
SoftmaxTree* _tree;
public:
void copyToGPU();
void update(float progress);
NVMatrix& getW() const;
NVMatrix& getInc() const;
NVMatrix& getGrad() const;
NVMatrix& getAllW() const;
NVMatrix& getAllInc() const;
NVMatrix& getAllGrad() const;
int getNumRows() const;
void makeWeights();
void distributeGradients();
TreeWeights(SoftmaxTree& tree, Matrix& hWeights, Matrix& hWeightsInc, LearningRateSchedule& lrs, float wcBase, float mom);
};
class DummyWeights : public Weights {
public:
DummyWeights(Matrix& hWeights, Matrix& hWeightsInc, NVMatrix& weights, NVMatrix& incs, NVMatrix& grads);
};
class WeightList {
private:
std::vector<Weights*> _weightList;
public:
Weights& operator[](const int idx) const;
~WeightList();
WeightList();
void addWeights(Weights& w);
void update(float progress);
void copyToCPU();
void copyToGPU();
int getSize() const;
};
#endif /* WEIGHTS_CUH */

122
include/worker.cuh Normal file
View file

@ -0,0 +1,122 @@
/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef WORKER_CUH
#define WORKER_CUH
#include "convnet.cuh"
#include "cost.cuh"
#include "data.cuh"
class ConvNet;
class Cost;
class WorkResult {
public:
enum RESULTS {BATCH_DONE, SYNC_DONE};
protected:
WorkResult::RESULTS _resultType;
Cost* _results;
public:
WorkResult(WorkResult::RESULTS resultType, Cost& results);
WorkResult(WorkResult::RESULTS resultType);
virtual ~WorkResult();
Cost& getResults() const;
WorkResult::RESULTS getResultType() const;
};
class Worker {
protected:
ConvNet* _convNet;
public:
Worker(ConvNet& convNet);
virtual void run() = 0;
};
class DataWorker : public Worker {
protected:
CPUData* _data;
DataProvider* _dp;
public:
DataWorker(ConvNet& convNet, CPUData& data);
virtual ~DataWorker();
};
class TrainingWorker : public DataWorker {
protected:
bool _test;
double _progress;
public:
TrainingWorker(ConvNet& convNet, CPUData& data, double progress, bool test);
void run();
};
class SyncWorker : public Worker {
public:
SyncWorker(ConvNet& convNet);
void run();
};
class GradCheckWorker : public DataWorker {
public:
GradCheckWorker(ConvNet& convNet, CPUData& data);
void run();
};
class MultiviewTestWorker : public DataWorker {
protected:
int _numViews;
Matrix* _cpuProbs;
std::string _logregName;
public:
MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, Matrix& cpuProbs, const char* softmaxName);
MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews);
~MultiviewTestWorker();
virtual void run();
};
class FeatureWorker : public DataWorker {
protected:
MatrixV *_ftrs;
stringv *_layerNames;
public:
FeatureWorker(ConvNet& convNet, CPUData& data, MatrixV& ftrs, stringv& layerNames);
~FeatureWorker();
void run();
};
class DataGradWorker : public DataWorker {
protected:
Matrix* _dataGrads;
int _dataLayerIdx, _softmaxLayerIdx;
public:
DataGradWorker(ConvNet& convNet, CPUData& data, Matrix& dataGrads, int dataLayerIdx, int softmaxLayerIdx);
~DataGradWorker();
void run();
};
#endif /* WORKER_CUH */

21
initw.py Executable file
View file

@ -0,0 +1,21 @@
from gpumodel import *
import numpy as n
import numpy.random as nr
def get_src():
src = IGPUModel.load_checkpoint('/nobackup/kriz/tmp/ConvNet__2012-09-19_23.29.04')
return src['model_state']['layers']
def makew(name, idx, shapes, params):
src, src_layer = get_src(), params[0]
if name == 'localcombine' and idx == 2:
return n.array(0.01 * nr.randn(shapes[0], shapes[1]), dtype=n.single, order='C')
return src[src_layer]['weights'][idx]
def makeb(name, shapes, params):
src, src_layer = get_src(), params[0]
return src[src_layer]['biases']
def makec(name, idx, shapes, params):
src, src_layer = get_src(), params[0]
return src[src_layer]['filterConns'][idx]

1418
layer.py Executable file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,47 @@
# 18% error on CIFAR-10 in 20 minutes - layer definition file
# Reduce all learning rates by factor of 10 after 120 epochs.
# Then another factor of 10 after 10 more epochs.
[conv1]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
schedW=linear[1,1]
[conv2]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
schedW=linear[1,1]
[conv3]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
schedW=linear[1,1]
[fc10]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=1
schedW=linear[1,1]
[logprob]
coeff=1
[rnorm1]
scale=0.00005
pow=.75
[rnorm2]
scale=0.00005
pow=.75

View file

@ -0,0 +1,45 @@
# 13% error on CIFAR-10 - layer parameter file
# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
[conv1]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.00
schedW=linear[1,1]
[conv2]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.00
schedW=linear[1,1]
[local3]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
schedW=linear[1,1]
[local4]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
schedW=linear[1,1]
[fc10]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.004
schedW=linear[1,1]
[logprob]
coeff=1

View file

@ -0,0 +1,106 @@
# 18% error on CIFAR-10 in 20 minutes - layer definition file
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv1]
type=conv
inputs=data
channels=3
filters=32
padding=2
stride=1
filterSize=5
initW=0.0001
partialSum=4
sharedBiases=1
gpu=0
[pool1]
type=pool
pool=max
inputs=conv1
start=0
sizeX=3
stride=2
outputsX=0
channels=32
neuron=relu
[rnorm1]
type=rnorm
inputs=pool1
channels=32
size=3
[conv2]
type=conv
inputs=rnorm1
filters=32
padding=2
stride=1
filterSize=5
channels=32
neuron=relu
initW=0.01
partialSum=4
sharedBiases=1
[pool2]
type=pool
pool=avg
inputs=conv2
start=0
sizeX=3
stride=2
outputsX=0
channels=32
[rnorm2]
type=rnorm
inputs=pool2
channels=32
size=3
[conv3]
type=conv
inputs=rnorm2
filters=64
padding=2
stride=1
filterSize=5
channels=32
neuron=relu
initW=0.01
partialSum=4
sharedBiases=1
[pool3]
type=pool
pool=avg
inputs=conv3
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[fc10]
type=fc
outputs=10
inputs=pool3
initW=0.01
[probs]
type=softmax
inputs=fc10
[logprob]
type=cost.logreg
inputs=labels,probs
gpu=0

View file

@ -0,0 +1,95 @@
# 13% error on CIFAR-10 in 20 minutes - layer definition file
# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
[data]
type=data
dataIdx=0
[labels]
type=data
dataIdx=1
[conv1]
type=conv
inputs=data
channels=3
filters=64
padding=2
stride=1
filterSize=5
neuron=relu
initW=0.0001
partialSum=4
sharedBiases=1
gpu=0
[pool1]
type=pool
pool=max
inputs=conv1
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[conv2]
type=conv
inputs=pool1
filters=64
padding=2
stride=1
filterSize=5
channels=64
neuron=relu
initW=0.01
partialSum=8
sharedBiases=1
[pool2]
type=pool
pool=max
inputs=conv2
start=0
sizeX=3
stride=2
outputsX=0
channels=64
[local3]
type=local
inputs=pool2
filters=32
padding=1
stride=1
filterSize=3
channels=64
neuron=relu
initW=0.04
[local4]
type=local
inputs=local3
filters=32
padding=1
stride=1
filterSize=3
channels=32
neuron=relu
initW=0.04
[fc10]
type=fc
outputs=10
inputs=local4
initW=0.01
neuron=ident
[probs]
type=softmax
inputs=fc10
[logprob]
type=cost.logreg
inputs=labels,probs
gpu=0

157
layers/layer-params-100.cfg Normal file
View file

@ -0,0 +1,157 @@
[conv1a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv1b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv2a]
epsW=0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[conv2b]
epsW=0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[conv3a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.001
pow=0.25
[rnorm1b]
scale=0.001
pow=0.25
[rnorm2a]
scale=0.001
pow=0.25
[rnorm2b]
scale=0.001
pow=0.25
# on guppy7
# this is like #97 (on gpu) but with different rnorm coeffs
# /nobackup/kriz/tmp/ConvNet__2012-06-27_14.03.18
# epoch 15: set epsw to 0.001 from 0.01
# epoch 43: killed, seems slightly worse than using my old rnorm coeffs

184
layers/layer-params-106.cfg Normal file
View file

@ -0,0 +1,184 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #101 but with contrast normalization layers over rnorm2
# on guppy7
# logs/layers-106.log
# /nobackup/kriz/tmp/ConvNet__2012-07-07_21.11.34
# epoch 22: set epsw to 0.001 from 0.01
# epoch 31: killed, turns out weight contrast normalization is better
# restart after fixing cnorm
# on guppy9
# logs/layers-106a.log
# /nobackup/kriz/tmp/ConvNet__2012-07-17_19.06.09
# epoch 21: set epsw to 0.001 from 0.01
# restart with proper learning rate
# logs/layers-106b.log
# /nobackup/kriz/tmp/ConvNet__2012-07-19_04.15.40
# epoch 23: set epsw to 0.001 from 0.01
# epoch 46: set epsw to 0.0001 from 0.001
# epoch 61: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 72: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 79: set epsw to 0.00001 from 0.0001
# epoch 93: killed
# [1.5942473039940013, 0.3705782743769917, 0.16672222296297284]

167
layers/layer-params-107.cfg Normal file
View file

@ -0,0 +1,167 @@
[conv1a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
wcNormMin=0.001
wcNormMax=0.002
[conv1b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
wcNormMin=0.001
wcNormMax=0.002
[conv2a]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
# this is like #101 but uses wcnorm in conv1/conv2. so it uses def file #101.
# it's also like #104, but #104 only does wcnorm on conv2
# on guppy7
# logs/layers-107.log
# /nobackup/kriz/tmp/ConvNet__2012-07-09_19.20.14

187
layers/layer-params-109.cfg Normal file
View file

@ -0,0 +1,187 @@
[conv1a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
[conv1b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
[conv2a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1536a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1536b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1536ba]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1536bb]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1536ca]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1536cb]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs3a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[hs3b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
# this is like #101 but uses wcnorm in conv2 and also has 3 fc layers.
# on guppy9
# logs/layers-109.log
# /nobackup/kriz/tmp/ConvNet__2012-07-10_00.46.52
# epoch 17: set epsw to 0.001 from 0.01
# epoch 26: enabled dropout on hs3a,hs3b
# epoch 27: killed -- overfitting as feared

146
layers/layer-params-110.cfg Normal file
View file

@ -0,0 +1,146 @@
[conv1a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
# this is like #101 but without rnorm. it's about time i found out how helpful it is to modern nets
# on guppy7
# logs/layers-110.log
# /nobackup/kriz/tmp/ConvNet__2012-07-11_00.26.55
# epoch 19: set epsw to 0.001 from 0.01
# epoch 46: set epsw to 0.0001 from 0.001
# epoch 67: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 66: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 75: killed, it looks to be about 1% worse than #101

187
layers/layer-params-111.cfg Normal file
View file

@ -0,0 +1,187 @@
[conv1a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
[conv1b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
[conv2a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ca]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048cb]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs3a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[hs3b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
# this is like #101 but uses wcnorm in conv2 and also has 3 fc layers.
# its also like #109 but uses wider fc layers with dropout in all cos 109 overfit
# on guppy9
# logs/layers-111.log
# /nobackup/kriz/tmp/ConvNet__2012-07-12_23.59.48
# epoch 19: set epsw to 0.001 from 0.01
# epoch 42: this is quite a bit worse, and in an underfitting way, so i'm starting #104, which will be like this but the fc layers will be 3072 each instead of 2048

163
layers/layer-params-112.cfg Normal file
View file

@ -0,0 +1,163 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
# on guppy7
# this is like #101 but with rnorm region of size 5 instead of 9
# logs/layers-112.log
# epoch 22: set epsw to 0.001 from 0.01
# epoch 46: set epsw to 0.0001 from 0.001
# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 71: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 79: set epsw to 0.00001 from 0.0001
# epoch 90: killed
# [1.6064990917001289, 0.37237829837731168, 0.16815557540767209]

154
layers/layer-params-113.cfg Normal file
View file

@ -0,0 +1,154 @@
[conv1a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv3a]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[conv3b]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[conv4a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
# this is like #101 but with conv3 taking both conv2 and conv1 as input, and conv2 just taking the low res img as input
# on guppy9

187
layers/layer-params-114.cfg Normal file
View file

@ -0,0 +1,187 @@
[conv1a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
[conv1b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
[conv2a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc3072a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072ba]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072bb]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072ca]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072cb]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs3a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[hs3b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
# this is like #101 but uses wcnorm in conv2 and also has 3 fc layers, with width 6144.
# on guppy9
# logs/layers-114.log
# 140523240 params (incl biases)
# /nobackup/kriz/tmp/ConvNet__2012-07-15_14.56.24
# epoch 20: set epsw to 0.001 from 0.01
# epoch 40: killed, doing worse than 115 which is the same but has only 2 fc layers

View file

@ -0,0 +1,181 @@
[conv1a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
[conv1b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
wcnorm=0.00
[conv2a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc3072a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072ba]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072bb]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072ca]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc3072cb]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
# this is like 115 (on gpu) but trained on ilya's new imgnet-2010 jpeg
# on guppy7
# logs/layers-115-jpeg.log
# /nobackup/kriz/tmp/ConvNet__2012-07-18_20.56.13
# epoch 22: set epsw to 0.001 from 0.01
# epoch 48: set epsw to 0.0001 from 0.001
# epoch 58: killed, since this was a duplicate (jpeg) of a suboptimal net anyway

303
layers/layer-params-116.cfg Normal file
View file

@ -0,0 +1,303 @@
[conv1a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.01,0.01
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2c]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2d]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024a]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024b]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024c]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024d]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.01,0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[hs1c]
enable=true
[hs2c]
enable=true
[hs1d]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
# on guppy8
# this is like 112, but has wcnorm in conv2, and also its on 4 gpus

279
layers/layer-params-117.cfg Normal file
View file

@ -0,0 +1,279 @@
[conv1a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.01,0.01
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2c]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2d]
epsW=0.01,0.01
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[conv3b]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[conv3c]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[conv4a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.01
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1408a]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[fc1408b]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[fc1408c]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[fc1408-2a]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[fc1408-2b]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[fc1408-2c]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[fc1000]
epsW=0.01,0.01,0.01
epsB=0.002
momW=0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005
wball=0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[hs1c]
enable=true
[hs2c]
enable=true
[hs1d]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
# on guppy8
# this is like 112, but has wcnorm in conv2, and also its on 4 gpus

168
layers/layer-params-118.cfg Normal file
View file

@ -0,0 +1,168 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
# on guppy7
# this is like #112 but with wcnorm on conv2, and also trained on jpeg
# logs/layers-118.log
# /nobackup/kriz/tmp/ConvNet__2012-07-19_18.35.31
# epoch 23: set epsw to 0.001 from 0.01
# epoch 46: set epsw to 0.0001 from 0.001
# epoch 65: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 84: set epsw to 0.00001 from 0.0001
# epcoh 98: killed
# [1.640873252105713, 0.37831333333333333, 0.17355999999999999]

View file

@ -0,0 +1,174 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #120 (so uses def file #120) but trained on lsvrc-2012 (full)
# on gpu
# /storage/tmp/ConvNet__2012-07-26_04.06.44
# logs/layers-120-2012-full.log
# epoch 23: set epsw to 0.001 from 0.01
# epoch 38: moved to guppy9
# /nobackup/kriz/tmp/ConvNet__2012-07-26_04.06.44
# epoch 49: set epsw to 0.0001 from 0.001
# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 73: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 87: set epsw to 0.00001 from 0.0001
# epoch 94: killed
#

View file

@ -0,0 +1,173 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #120 (so uses def file #120) but trained on lsvrc-2012 (non-full)
# on guppy9
# /nobackup/kriz/tmp/ConvNet__2012-07-24_23.16.15
# epoch 22: set epsw to 0.001 from 0.01
# epoch 49: set epsw to 0.0001 from 0.001
# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 73: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 81: set epsw to 0.00001 from 0.0001
# epoch 95: killed
# validation multiview error:
# logprob: 1.765247, 0.410440, 0.187140

View file

@ -0,0 +1,313 @@
[conv1a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2c]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2d]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024-1a]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1b]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1c]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1d]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs1c]
enable=true
[hs1d]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[hs2c]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm2c]
scale=0.001
pow=0.75
[cnorm2d]
scale=0.001
pow=0.75
# this is like #120 but on 4 gpus. trained on 2012 (non-full)
# on guppy
# logs/layers-120-4gpu.log
# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23

View file

@ -0,0 +1,313 @@
[conv1a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2c]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2d]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024-1a]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1b]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1c]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1d]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs1c]
enable=true
[hs1d]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[hs2c]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm2c]
scale=0.001
pow=0.75
[cnorm2d]
scale=0.001
pow=0.75
# this is like #120 but on 4 gpus. trained on 2012 (non-full)
# on guppy
# logs/layers-120-4gpu.log
# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23

View file

@ -0,0 +1,313 @@
[conv1a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2c]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2d]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024-1a]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1b]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1c]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1d]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs1c]
enable=true
[hs1d]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[hs2c]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm2c]
scale=0.001
pow=0.75
[cnorm2d]
scale=0.001
pow=0.75
# this is like #120 but on 4 gpus. trained on 2012 (non-full)
# on guppy
# logs/layers-120-4gpu.log
# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23

View file

@ -0,0 +1,313 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2c]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2d]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024-1a]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1b]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1c]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1d]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs1c]
enable=true
[hs1d]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[hs2c]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm2c]
scale=0.001
pow=0.75
[cnorm2d]
scale=0.001
pow=0.75
# this is like #120 but on 4 gpus. trained on 2012 (non-full)
# on guppy
# logs/layers-120-4gpu.log
# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23

View file

@ -0,0 +1,313 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2c]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2d]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024-1a]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1b]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1c]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1d]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.00001,0.00001,0.00001,0.00001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs1c]
enable=true
[hs1d]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[hs2c]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm2c]
scale=0.001
pow=0.75
[cnorm2d]
scale=0.001
pow=0.75
# this is like #120 but on 4 gpus. trained on 2012 (non-full)
# on guppy
# logs/layers-120-4gpu.log
# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23

View file

@ -0,0 +1,314 @@
[conv1a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2c]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2d]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024-1a]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1b]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1c]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1d]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.01,0.01,0.01,0.01
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs1c]
enable=true
[hs1d]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[hs2c]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm2c]
scale=0.001
pow=0.75
[cnorm2d]
scale=0.001
pow=0.75
# this is like #120 but on 4 gpus. trained on 2012 (non-full)
# on guppy
# logs/layers-120-4gpu.log
# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23
# epoch 56: killed, this is overfitting. will try reducing the # of params.

174
layers/layer-params-120.cfg Normal file
View file

@ -0,0 +1,174 @@
[conv1a]
epsW=0.0000
epsB=0.0
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.0
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.0
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.0
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #106 but with rnorm of size 5, also train on jpegs
# on gpu
# logs/layers-120.log
# /storage/tmp/ConvNet__2012-07-22_04.40.34
# moving to guppy7
# /nobackup/kriz/tmp/ConvNet__2012-07-22_04.40.34/
# epoch 26: set epsw to 0.001 from 0.01
# epoch 47: set epsw to 0.0001 from 0.001
# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 72: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 82: set epsw to 0.00001 from 0.0001
# epoch 106: killed
# logprob: 1.634692, 0.378533, 0.172360

179
layers/layer-params-121.cfg Normal file
View file

@ -0,0 +1,179 @@
[conv1a]
epsW=0.0000
epsB=0.0
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.0
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.0
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.0
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm1a]
scale=0.001
pow=0.75
[cnorm1b]
scale=0.001
pow=0.75
# this is like #120 but with cnorm over conv1 as well
# on guppy8
# logs/layers-121.log
# /nobackup/kriz/tmp/ConvNet__2012-07-22_15.59.00
# epoch 25: set epsw to 0.001 from 0.01
# epoch 51: set epsw to 0.0001 from 0.001
# epoch 63: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 76: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 90: set epsw to 0.00001 from 0.0001
# worse than 120

View file

@ -0,0 +1,165 @@
[conv1a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #120-2012-full but also has horiz reflection for gpu2
# on guppy8
# logs/layers-126.log
# /nobackup/kriz/tmp/ConvNet__2012-07-31_22.55.59
# killed after 19 epochs..seems no good, and also full sucks we now know

174
layers/layer-params-127.cfg Normal file
View file

@ -0,0 +1,174 @@
[conv1a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv3b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# trained on lsvrc-2012 (full), like #120 but
# this examines whether communication is necessary at conv3
# .. meaning it has no communication at conv3
# on gpu
# /storage/tmp/ConvNet__2012-08-01_02.35.01
# logs/layers-127.log
# killed, since we know now that full sucks.
# trained on lsvrc-2012 (non-full). like #120 but now also make conv3,conv4 wider to compensate for lost connections
# on guppy8
# logs/layers-127a.log
# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-02_00.18.36
# epoch 21: set epsw to 0.001 from 0.01
# epoch 36: killed, significantly worse than 120

167
layers/layer-params-128.cfg Normal file
View file

@ -0,0 +1,167 @@
[conv1a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv5a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #120
# and has narrower columns which communicate more. i'm running this because #127 suggests
# that communication is good
# on guppy9
# logs/layers-128.log
# epoch 25: set epsw to 0.001 from 0.01
# on hold

316
layers/layer-params-129.cfg Normal file
View file

@ -0,0 +1,316 @@
[conv1a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2c]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2d]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.001,0.001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024-1a]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1b]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1c]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1d]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.001,0.001,0.001,0.001
epsB=0.002
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs1c]
enable=true
[hs1d]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[hs2c]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm2c]
scale=0.001
pow=0.75
[cnorm2d]
scale=0.001
pow=0.75
# 4 gpus, based on 120
# on guppy7
# logs/layers-129.log
# /nobackup/kriz/tmp/ConvNet__2012-08-06_22.23.16
# epoch 22: set epsw to 0.001 from 0.01
# uhh.. relu wiped this. nice.

320
layers/layer-params-130.cfg Normal file
View file

@ -0,0 +1,320 @@
[conv1a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1c]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1d]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2c]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2d]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3c]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3d]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4c]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4d]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5c]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5d]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc1024-1a]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1b]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1c]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-1d]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2a]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2b]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2c]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1024-2d]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[fc1000]
epsW=0.0001,0.0001,0.0001,0.0001
epsB=0.02
momW=0.9,0.9,0.9,0.9
momB=0.9
wc=0.0005,0.0005,0.0005,0.0005
wball=0,0,0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs1b]
enable=true
[hs1c]
enable=true
[hs1d]
enable=true
[hs2a]
enable=true
[hs2b]
enable=true
[hs2c]
enable=true
[hs2d]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm1c]
scale=0.0001
pow=0.75
[rnorm1d]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[rnorm2c]
scale=0.0001
pow=0.75
[rnorm2d]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
[cnorm2c]
scale=0.001
pow=0.75
[cnorm2d]
scale=0.001
pow=0.75
# this is like #129, but with 2x as many filters in conv2
# on guppy8
# /nobackup/kriz/tmp/ConvNet__2012-08-07_13.31.34
# logs/layers-130.log
# uhh.. relu wiped this. nice.
# on guppy9
# logs/layers-130a.log
# /nobackup/kriz/tmp/ConvNet__2012-08-09_14.09.20
# epoch 22: set epsw to 0.001 from 0.01
# epoch 46: set epsw to 0.0001 from 0.001
# epoch 62: killed. surprisingly, this is hardly (if at all) better than 2-gpu net

View file

@ -0,0 +1,172 @@
[conv1a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# on guppy9
# logs/layers-131-2009.log
# /nobackup/kriz/tmp/ConvNet__2012-08-18_15.41.20
# epoch 7: set epsw to 0.001 from 0.01
# epoch 14: set epsw to 0.0001 from 0.001
# epoch 20: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 24: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 31: set epsw to 0.00001 from 0.0001
# epoch 36: killed
# logprob: 3.466260, 0.694209, 0.437308
# a bit worse than previous 2009 thing!

175
layers/layer-params-131.cfg Normal file
View file

@ -0,0 +1,175 @@
[conv1a]
epsW=0.0000
epsB=0.0
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.0
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.0
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.0
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #120, but puts rnorm1 right over conv1 (trained on 2012-nonfull)
# on gpu
# /storage/tmp/ConvNet__2012-08-09_12.33.33
# logs/layers-131.log
# moved to guppy7
# /nobackup/kriz/tmp/ConvNet__2012-08-09_12.33.33/
# epoch 22: set epsw to 0.001 from 0.01
# epoch 46: set epsw to 0.0001 from 0.001
# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 81: set epsw to 0.00001 from 0.0001
# epoch 100: killed
# validation multiview error:
# logprob: 1.755725, 0.409340, 0.185740

179
layers/layer-params-132.cfg Normal file
View file

@ -0,0 +1,179 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv3b]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4a]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4b]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv5a]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like 120 but with communication in conv4 instead of conv3
# on gpu
# logs/layers-132.log
# /storage/tmp/ConvNet__2012-08-11_02.23.36
# epoch 20: set epsw to 0.001 from 0.01
# epoch 44: set epsw to 0.0001 from 0.001
# moved to guppy9
# @#$%&!, killed, i accidentally trained this on full
# restart:
# /nobackup/kriz/tmp/ConvNet__2012-08-13_16.47.07
# logs/layers-132a.log
# epoch 23: set epsw to 0.001 from 0.01
# epoch 4x: set epsw to 0.0001 from 0.001
# epoch 65: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 71: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 88: killed, worse than 131

167
layers/layer-params-133.cfg Normal file
View file

@ -0,0 +1,167 @@
[conv1a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv3b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv5a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
[rnorm1b]
scale=0.0001
pow=0.75
[rnorm2a]
scale=0.0001
pow=0.75
[rnorm2b]
scale=0.0001
pow=0.75
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is a hybrid of 131 and 132: so it's like 120, but has communication in conv4 instead of conv3, and it also puts rnorm1 directly over conv1
# on guppy7
# logs/layers-133.log
# /nobackup/kriz/tmp/ConvNet__2012-08-15_16.08.23
# epoch 21: set epsw to 0.001 from 0.01
# epoch 48: set epsw to 0.0001 from 0.001
# epoch 50: killed, worse than 131

169
layers/layer-params-134.cfg Normal file
View file

@ -0,0 +1,169 @@
[conv1a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=0.25
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=0.25
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=0.25
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=0.25
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #131, but with minDiv of 0.25 on rnorms
# on guppy9
# /nobackup/kriz/tmp/ConvNet__2012-08-20_23.26.41
# logs/layers-134.log
# epoch 13: on hold

View file

@ -0,0 +1,199 @@
[conv1a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this trains 135 on 2012, initialized from 2009 1-8800
# on guppy9
# init epsw 0.001
# logs/layers-135-2012-pretrain-2009.log
# /nobackup/kriz/tmp/ConvNet__2012-09-09_15.20.47
# epoch 22: set epsw to 0.0001 from 0.001
# epoch 23: putting on hold to train softmax tree
# this is doing worse than 141-2009 anyway, which has an extra 6th conv layer (1.97 vs 2.00)
# 135 notes:
# this is like #131, but with minDiv of 2 on rnorms
# on guppy8
# /nobackup/kriz/tmp/ConvNet__2012-08-21_01.49.23
# logs/layers-135.log
# epoch 20: set epsw to 0.001 from 0.01
# epoch 47: set epsw to 0.0001 from 0.001
# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 81: set epsw to 0.00001 from 0.0001
# epoch 96: killed
# validation multiview:
# logprob: 1.757653, 0.410700, 0.184160
# now let's train on 2009 1-8800
# logs/layers-135-2009-bigtrain.log
# on guppy9
# /nobackup/kriz/tmp/ConvNet__2012-08-26_22.39.45
# epoch 4.7822: set epsw to 0.001 from 0.01
# epoch 8.1299: set epsw to 0.0001 from 0.001
# epoch 10.3697: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 11.4731: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 14.3906: set epsw to 0.00001 from 0.0001
# epoch 17: killed

View file

@ -0,0 +1,189 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #131, but with minDiv of 2 on rnorms
# on guppy8
# /nobackup/kriz/tmp/ConvNet__2012-08-21_01.49.23
# logs/layers-135.log
# epoch 20: set epsw to 0.001 from 0.01
# epoch 47: set epsw to 0.0001 from 0.001
# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 81: set epsw to 0.00001 from 0.0001
# epoch 96: killed
# validation multiview:
# logprob: 1.757653, 0.410700, 0.184160
# now let's train on 2009 1-8800
# logs/layers-135-2009-bigtrain.log
# on guppy9
# /nobackup/kriz/tmp/ConvNet__2012-08-26_22.39.45
# epoch 4.7822: set epsw to 0.001 from 0.01
# epoch 8.1299: set epsw to 0.0001 from 0.001
# epoch 10.3697: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 11.4731: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 14.3906: set epsw to 0.00001 from 0.0001
# epoch 17: killed

177
layers/layer-params-135.cfg Normal file
View file

@ -0,0 +1,177 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #131, but with minDiv of 2 on rnorms
# on guppy8
# /nobackup/kriz/tmp/ConvNet__2012-08-21_01.49.23
# logs/layers-135.log
# epoch 20: set epsw to 0.001 from 0.01
# epoch 47: set epsw to 0.0001 from 0.001
# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 81: set epsw to 0.00001 from 0.0001
# epoch 96: killed
# validation multiview:
# logprob: 1.757653, 0.410700, 0.184160

169
layers/layer-params-136.cfg Normal file
View file

@ -0,0 +1,169 @@
[conv1a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.01
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.01,0.01
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #135 (so uses def file 135), but subtracts scalar mean
# on guppy7
# logs/layers-136.log
# /nobackup/kriz/tmp/ConvNet__2012-08-23_04.38.51
# epoch 15: eh, this is no better, and has no reason to be better. screw it.

View file

@ -0,0 +1,196 @@
[conv1a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.001,0.001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like 137, but with treefc
# on guppy9
# init epsw 0.01 -- this run does not sale epsw by node size
# /nobackup/kriz/tmp/ConvNet__2012-09-10_22.47.57
# logs/layers-137-tree.log
# epoch 14: set epsw to 0.001 from 0.01
# epoch 38: killed..its stuck at 2.17 nats.. should be nearer to 2.06. perhaps resume later
# 137 notes:
# this is like #135, but changes the cnorm layers to rnorm
# on guppy8
# logs/layers-137.log
# /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04
# epoch 26: set epsw to 0.001 from 0.01
# epoch 50: set epsw to 0.0001 from 0.001
# epoch 75: set epsw to 0 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 84: set epsw to 0.00001 from 0.0001
# epoch 92: made backup to /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04.bak
# set epsw to 0.0001 from 0.00001 (conv1/2 still 0)
# using BRIGHTNESS NOISE of 0.2 (in other words i zeroed out the other components of the color noise)
# epoch 101: set color (brightness) noise to 0 from 0.2
# epoch 105: set epsw to 0.00001 from 0.0001
# experiment a failure. going back to training /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04.bak
# epoch 99: killed
# logprob: 1.751138, 0.407820, 0.183440
# batch size 128 x 8:
# /nobackup/kriz/tmp/ConvNet__2012-09-07_17.08.47
# epoch 25: set epsw to 0.001 from 0.01
# made backup to /nobackup/kriz/tmp/ConvNet__2012-09-07_17.08.47.bak
# epoch 34: killed, its not good

207
layers/layer-params-137.cfg Normal file
View file

@ -0,0 +1,207 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #135, but changes the cnorm layers to rnorm
# on lsvrc-2010:
# logs/layers-137-2010.log
# on guppy9
# /nobackup/kriz/tmp/ConvNet__2012-09-13_02.47.12
# epoch 25: set epsw to 0.001 from 0.01
# epoch 49: set epsw to 0.0001 from 0.001
# epoch 81: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 85: set epsw to 0 from 0.00001 on conv1,conv2
# set epsw to 0.00001 from 0.0001 elsewhere
# epoch 103: killed
# validation:
# logprob: 1.727592, 0.394153, 0.182784
# validation multiview:
# logprob: 1.632875, 0.377960, 0.171020
# test multiview:
# logprob: 1.623185, 0.376167, 0.171247
# on lsvrc-2012:
# on guppy8
# logs/layers-137.log
# /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04
# epoch 26: set epsw to 0.001 from 0.01
# epoch 50: set epsw to 0.0001 from 0.001
# epoch 75: set epsw to 0 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 84: set epsw to 0.00001 from 0.0001
# epoch 92: made backup to /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04.bak
# set epsw to 0.0001 from 0.00001 (conv1/2 still 0)
# using BRIGHTNESS NOISE of 0.2 (in other words i zeroed out the other components of the color noise)
# epoch 101: set color (brightness) noise to 0 from 0.2
# epoch 105: set epsw to 0.00001 from 0.0001
# experiment a failure. going back to training /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04.bak
# epoch 99: killed
# logprob: 1.751138, 0.407820, 0.183440
# batch size 128 x 8:
# /nobackup/kriz/tmp/ConvNet__2012-09-07_17.08.47
# epoch 25: set epsw to 0.001 from 0.01
# made backup to /nobackup/kriz/tmp/ConvNet__2012-09-07_17.08.47.bak
# epoch 34: killed, its not good

172
layers/layer-params-139.cfg Normal file
View file

@ -0,0 +1,172 @@
[conv1a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv2b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
wcNormMin=0.001,0
wcNormMax=0.002,0
[conv3a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #137 (hence uses same file) but has wcnorm on conv2[0]
# epoch 19: set epsw to 0.001 from 0.01
# epoch 49: set epsw to 0.0001 from 0.001
# epoch 62: killed, about 0.01 nat worse than 137 (which is pretty significant at this stage)

View file

@ -0,0 +1,203 @@
[conv1a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv6a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv6b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048a]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.0001,0.0001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like 141, but trained on half of 2009 imgnet, to be comparable to google's results
# logs/layers-141-2009-half.log
# /nobackup/kriz/tmp/ConvNet__2012-09-09_00.26.31
# on guppy9
# epoch 6.2600: set epsw to 0.001 from 0.01
# epoch 13.3361: set epsw to 0.0001 from 0.001
# epoch 18.2396: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 21.1949: set epsw to 0 from 0.00001 on conv1,conv2
# epoch 25.3718: set epsw to 0.00001 from 0.0001
# epoch 28.3271: killed
# ok test erro rate is a bit worse than 131, restarting with epsw 0.001, color noise 0.1
# epoch 44.183: set epsw to 0.0001 from 0.001
# epoch 56: eek, it started getting worse on validation :/
# 141 notes:
# this is like #137 but with conv6, also communication in conv6
# /nobackup/kriz/tmp/ConvNet__2012-09-03_16.27.48
# logs/layers-141.log
# epoch 23: set epsw to 0.001 from 0.01
# epoch 48: set epsw to 0.0001 from 0.001
# epoch 60: this seems overfitty....killing
# but will use these weights to initialize a net on 2009... why the hell not?

View file

@ -0,0 +1,231 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0000,0.0000
epsB=0.00
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv6a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv6b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# training on lsvrc-2010
# initialized from 141 trained on lsvrc-2012, then 2009
# using def file layers-141-2009-2010.cfg
# /nobackup/kriz/tmp/ConvNet__2012-09-12_01.06.32
# on guppy8
# init epsw 0.001
# logs/layers-141-2010-pretrain-2009-pretrain-2012.log
# epoch 14: set epsw to 0.0001 from 0.001
# epoch 30: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 36: set epsw to 0 on conv1/2
# epoch 47: set epsw to 0.00001 from 0.0001
# epoch 54: killed
# logprob: 1.511725, 0.356707, 0.154893
# training on lsvrc-2012
# initialized from 141 trained on lsvrc-2012, then 2009
# using def file layers-141-2009-2012.cfg
# init epsw 0.001
# logs/layers-141-2012-pretrain-2009-pretrain-2012.log
# /nobackup/kriz/tmp/ConvNet__2012-09-09_03.36.13
# backup: /ais/gobi3/u/kriz/tmp/ConvNet__2012-09-09_03.36.13
# also /ais/gobi3/u/kriz/net-backups/
# on guppy8
# epoch 13: set epsw to 0.0001 from 0.001
# epoch 26: set epsw to 0.00001 from 0.0001 on conv1,conv2
# set color noise to 0 from 0.1
# epoch 32: set epsw to 0 on conv1/2
# epoch 43: set epsw to 0.00001 from 0.0001
# epoch 54: killed
# python convnet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-09_03.36.13 --test-only=1 --test-one=0 --multiview-test=1
# logprob: 1.671316, 0.395620, 0.172060
#python convnet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-09_03.36.13 --test-only=1 --test-one=0 --multiview-test=0
# logprob: 1.779082, 0.415920, 0.186780
# 141-2009 notes, before going back to 2012:
# initialized from 141 trained on lsvrc-2012
# init epsw 0.001
# logs/layers-141-2009-pretrain-2012.log
# /nobackup/kriz/tmp/ConvNet__2012-09-07_05.22.51
# epoch 4.1189: set epsw to 0.0001 from 0.001
# epoch 5.1596: killed, not improving much. lets go back to training on lsvrc-2012 with these weights now.
#
# 141 notes:
# this is like #137 but with conv6, also communication in conv6
# /nobackup/kriz/tmp/ConvNet__2012-09-03_16.27.48
# logs/layers-141.log
# epoch 23: set epsw to 0.001 from 0.01
# epoch 48: set epsw to 0.0001 from 0.001
# epoch 60: this seems overfitty....killing
# but will use these weights to initialize a net on 2009... why the hell not?

187
layers/layer-params-141.cfg Normal file
View file

@ -0,0 +1,187 @@
[conv1a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv2b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0.00,0.00
[conv3a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.0001
epsB=0.02
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv6a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv6b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048a]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.0001,0.0001
epsB=0.02
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #137 but with conv6, also communication in conv6
# /nobackup/kriz/tmp/ConvNet__2012-09-03_16.27.48
# logs/layers-141.log
# epoch 23: set epsw to 0.001 from 0.01
# epoch 48: set epsw to 0.0001 from 0.001
# epoch 60: this seems overfitty....killing
# but will use these weights to initialize a net on 2009... why the hell not?

View file

@ -0,0 +1,206 @@
[conv1a]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv1b]
epsW=0.0000
epsB=0.00
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv2b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0.00
[conv3a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv3b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[conv4a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv4b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5a]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[conv5b]
epsW=0.00001
epsB=0.002
momW=0.9
momB=0.9
wc=0.0005
wball=0
[fc2048a]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048b]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048ba]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc2048bb]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[fc1000]
epsW=0.00001,0.00001
epsB=0.002
momW=0.9,0.9
momB=0.9
wc=0.0005,0.0005
wball=0,0
[logprob]
coeff=1
topk=5
[hs1a]
enable=true
[hs2a]
enable=true
[hs1b]
enable=true
[hs2b]
enable=true
[rnorm1a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm1b]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2a]
scale=0.0001
pow=0.75
minDiv=2
[rnorm2b]
scale=0.0001
pow=0.75
minDiv=2
[cnorm2a]
scale=0.001
pow=0.75
[cnorm2b]
scale=0.001
pow=0.75
# this is like #137 but without low-res stuff
# on lsvrc-2010:
# guppy9
# logs/layers-145-2010.log
# /nobackup/kriz/tmp/ConvNet__2012-09-27_12.39.44
# epoch 23: set epsw to 0.001 from 0.01
# epoch 51: set epsw to 0.0001 from 0.001
# epoch 68: set epsw to 0.00001 from 0.0001 on conv1
# set color noise to 0 from 0.1
# epoch 72: set epsw to 0 on conv1
# epoch 78: set epsw to 0.00001 from 0.0001
# epoch 93: killed
# test multliview:
# logprob: 1.614660, 0.374727, 0.169987
# test center patch:
# logprob: 1.706031, 0.390247, 0.182953 (NOTE, NOT MULTIVIEW!!)
# on gpu (now guppy8)
# logs/layers-145.log
# /storage/tmp/ConvNet__2012-09-13_03.43.56
# epoch 25: set epsw to 0.001 from 0.01
# epoch 36: paused for localization experiments
# resuming on guppy9
# logs/layers-145-cont.log
# /nobackup/kriz/tmp/ConvNet__2012-09-13_03.43.56
# epoch 51: set epsw to 0.0001 from 0.001
# epoch 58: paused for imgnet-20k experiments
# moved to guppy8
# epoch 67: set epsw to 0.00001 from 0.0001 on conv1
# set color noise to 0 from 0.1
# epoch 72: set epsw to 0 on conv1
# epoch 79: set epsw to 0.00001 from 0.0001
# epoch 91: killed
# logprob: 1.741473, 0.406640, 0.182100
# on 2012-full:
# on guppy7
# logs/layers-145-full.log
# /nobackup/kriz/tmp/ConvNet__2012-09-23_19.38.45
# epoch 19: set epsw to 0.001 from 0.01
# epoch 47: set epsw to 0.0001 from 0.001
# epoch 61: moved to gpu

Some files were not shown because too many files have changed in this diff Show more