From 9fdd561586a4a3a42b42f73f3bdd8fdd9752ecce Mon Sep 17 00:00:00 2001
From: Laurent El Shafey <shafey@google.com>
Date: Tue, 10 Dec 2024 08:56:11 -0800
Subject: [PATCH] Initial commit

---
 CONTRIBUTING.md                               |    1 +
 LICENSE                                       |    9 +
 Makefile-distrib                              |   28 +
 README.md                                     |    9 +
 SdkMasterLog.csv                              |    2 +
 avg-test.py                                   |   23 +
 avg-valid.py                                  |  108 +
 build.sh                                      |   19 +
 convdata.py                                   |  336 +++
 convdata_cifar.py                             |  115 +
 convdata_flickr.py                            |  297 +++
 convdata_jpeg.py                              |  270 +++
 convnet.py                                    |  218 ++
 deviceQuery.txt                               |  143 ++
 example-layers/layer-params-18pct.cfg         |   35 +
 example-layers/layer-params-19pct.cfg         |   33 +
 example-layers/layer-params-80sec.cfg         |   39 +
 .../layer-params-conv-local-12pct.cfg         |   40 +
 .../layer-params-conv-local-13pct.cfg         |   40 +
 example-layers/layer-params-example.cfg       |   44 +
 example-layers/layer-params.gc.cfg            |   66 +
 example-layers/layers-18pct.cfg               |  109 +
 example-layers/layers-19pct.cfg               |   93 +
 example-layers/layers-80sec.cfg               |  100 +
 example-layers/layers-conv-local-12pct.cfg    |   92 +
 example-layers/layers-conv-local-13pct.cfg    |   93 +
 example-layers/layers-example.cfg             |  115 +
 example-layers/layers.gc.cfg                  |  112 +
 findsimilar.py                                |   78 +
 fix-big-imgnet.py                             |   40 +
 fix-flickr.py                                 |   41 +
 gen-py-interface.py                           |   65 +
 include/convnet.cuh                           |  163 ++
 include/cost.cuh                              |   66 +
 include/cpuCNN.cuh                            |   31 +
 include/data.cuh                              |  111 +
 include/hostmem.cuh                           |   51 +
 include/layer.cuh                             |  654 ++++++
 include/layer_kernels.cuh                     |   65 +
 include/lr.cuh                                |   77 +
 include/messages.cuh                          |  133 ++
 include/multisoftmax.h                        |   38 +
 include/neuron.cuh                            |  529 +++++
 include/pipedispenser.cuh                     |  139 ++
 include/pyconvnet.cuh                         |   43 +
 include/quantizer.cuh                         |   43 +
 include/softmaxtree.cuh                       |  144 ++
 include/util.cuh                              |  113 +
 include/weights.cuh                           |  150 ++
 include/worker.cuh                            |  122 +
 initw.py                                      |   21 +
 layer.py                                      | 1418 ++++++++++++
 layers-cifar/layer-params-18pct-noisylr.cfg   |   47 +
 .../layer-params-conv-local-13pct-noisylr.cfg |   45 +
 layers-cifar/layers-18pct.cfg                 |  106 +
 layers-cifar/layers-conv-local-13pct.cfg      |   95 +
 layers/layer-params-100.cfg                   |  157 ++
 layers/layer-params-106.cfg                   |  184 ++
 layers/layer-params-107.cfg                   |  167 ++
 layers/layer-params-109.cfg                   |  187 ++
 layers/layer-params-110.cfg                   |  146 ++
 layers/layer-params-111.cfg                   |  187 ++
 layers/layer-params-112.cfg                   |  163 ++
 layers/layer-params-113.cfg                   |  154 ++
 layers/layer-params-114.cfg                   |  187 ++
 layers/layer-params-115-jpeg.cfg              |  181 ++
 layers/layer-params-116.cfg                   |  303 +++
 layers/layer-params-117.cfg                   |  279 +++
 layers/layer-params-118.cfg                   |  168 ++
 layers/layer-params-120-2012-full.cfg         |  174 ++
 layers/layer-params-120-2012.cfg              |  173 ++
 layers/layer-params-120-4gpu-auto2.cfg        |  313 +++
 layers/layer-params-120-4gpu-auto3.cfg        |  313 +++
 layers/layer-params-120-4gpu-auto4.cfg        |  313 +++
 layers/layer-params-120-4gpu-auto5.cfg        |  313 +++
 layers/layer-params-120-4gpu-auto6.cfg        |  313 +++
 layers/layer-params-120-4gpu.cfg              |  314 +++
 layers/layer-params-120.cfg                   |  174 ++
 layers/layer-params-121.cfg                   |  179 ++
 layers/layer-params-126-2012-full.cfg         |  165 ++
 layers/layer-params-127.cfg                   |  174 ++
 layers/layer-params-128.cfg                   |  167 ++
 layers/layer-params-129.cfg                   |  316 +++
 layers/layer-params-130.cfg                   |  320 +++
 layers/layer-params-131-2009.cfg              |  172 ++
 layers/layer-params-131.cfg                   |  175 ++
 layers/layer-params-132.cfg                   |  179 ++
 layers/layer-params-133.cfg                   |  167 ++
 layers/layer-params-134.cfg                   |  169 ++
 layers/layer-params-135-2009-2012.cfg         |  199 ++
 layers/layer-params-135-2009.cfg              |  189 ++
 layers/layer-params-135.cfg                   |  177 ++
 layers/layer-params-136.cfg                   |  169 ++
 layers/layer-params-137-tree.cfg              |  196 ++
 layers/layer-params-137.cfg                   |  207 ++
 layers/layer-params-139.cfg                   |  172 ++
 layers/layer-params-141-2009-half.cfg         |  203 ++
 layers/layer-params-141-2009.cfg              |  231 ++
 layers/layer-params-141.cfg                   |  187 ++
 layers/layer-params-145-2010.cfg              |  206 ++
 layers/layer-params-145-half.cfg              |  100 +
 layers/layer-params-145.cfg                   |  204 ++
 layers/layer-params-146-2009-tree.cfg         |  182 ++
 layers/layer-params-146-2009.cfg              |  188 ++
 layers/layer-params-146-2011.cfg              |  221 ++
 layers/layer-params-146-2012-2009.cfg         |  203 ++
 layers/layer-params-146-2012-2011.cfg         |  205 ++
 layers/layer-params-147.cfg                   |  174 ++
 layers/layer-params-147.cfg.save              |  169 ++
 layers/layer-params-148.cfg                   |  148 ++
 layers/layer-params-149.cfg                   |  182 ++
 layers/layer-params-150.cfg                   |  180 ++
 layers/layer-params-153.cfg                   |  184 ++
 layers/layer-params-154.cfg                   |  372 +++
 layers/layer-params-155.cfg                   |  190 ++
 layers/layer-params-156.cfg                   |  190 ++
 layers/layer-params-157.cfg                   |  188 ++
 layers/layer-params-158.cfg                   |  189 ++
 layers/layer-params-160.cfg                   |  187 ++
 layers/layer-params-161.cfg                   |  183 ++
 layers/layer-params-162.cfg                   |  187 ++
 layers/layer-params-163.cfg                   |  188 ++
 layers/layer-params-165.cfg                   |  204 ++
 layers/layer-params-166.cfg                   |  204 ++
 layers/layer-params-167.cfg                   |  187 ++
 layers/layer-params-169.cfg                   |  212 ++
 layers/layer-params-170-256-0.015.cfg         |  201 ++
 layers/layer-params-170-256-double.cfg        |  203 ++
 layers/layer-params-170-256.cfg               |  199 ++
 layers/layer-params-170-4gpu-exp.cfg          |  412 ++++
 layers/layer-params-170-4gpu.cfg              |  416 ++++
 layers/layer-params-170-quant.cfg             |  234 ++
 layers/layer-params-170.cfg                   |  242 ++
 layers/layer-params-171.cfg                   |  202 ++
 layers/layer-params-172.cfg                   |  202 ++
 layers/layer-params-174.cfg                   |  209 ++
 layers/layer-params-175.cfg                   |  202 ++
 layers/layer-params-177.cfg                   |  210 ++
 layers/layer-params-178.cfg                   |  215 ++
 layers/layer-params-180.cfg                   |  202 ++
 layers/layer-params-183-4gpu-26epc.cfg        |  451 ++++
 layers/layer-params-183-4gpu-exp.cfg          |  450 ++++
 layers/layer-params-183-4gpu.cfg              |  450 ++++
 layers/layer-params-184-4gpu-26epc.cfg        |  448 ++++
 layers/layer-params-184-4gpu.cfg              |  450 ++++
 layers/layer-params-2009-101.cfg              |  162 ++
 layers/layer-params-96-16k.cfg                |  156 ++
 layers/layer-params-98-16kinit.cfg            |  164 ++
 layers/layer-params-99.cfg                    |  162 ++
 layers/layer-params-flickr-102-inet-init.cfg  |  158 ++
 layers/layer-params-flickr-102.cfg            |  161 ++
 layers/layer-params-flickr-103.cfg            |  155 ++
 layers/layer-params-flickr-105.cfg            |  156 ++
 .../layer-params-inet-5layer-conv94-2gpu.cfg  |  164 ++
 layers/layers-100.cfg                         |  314 +++
 layers/layers-106.cfg                         |  322 +++
 layers/layers-109.cfg                         |  340 +++
 layers/layers-110.cfg                         |  286 +++
 layers/layers-111.cfg                         |  340 +++
 layers/layers-112.cfg                         |  310 +++
 layers/layers-113.cfg                         |  310 +++
 layers/layers-114.cfg                         |  340 +++
 layers/layers-115-jpeg.cfg                    |  310 +++
 layers/layers-116.cfg                         |  616 +++++
 layers/layers-117.cfg                         |  471 ++++
 layers/layers-118.cfg                         |  310 +++
 layers/layers-120-4gpu.cfg                    |  605 +++++
 layers/layers-120.cfg                         |  322 +++
 layers/layers-121.cfg                         |  334 +++
 layers/layers-126.cfg                         |  334 +++
 layers/layers-127.cfg                         |  322 +++
 layers/layers-128.cfg                         |  324 +++
 layers/layers-129.cfg                         |  605 +++++
 layers/layers-130.cfg                         |  605 +++++
 layers/layers-131-2009.cfg                    |  322 +++
 layers/layers-131.cfg                         |  322 +++
 layers/layers-132.cfg                         |  323 +++
 layers/layers-133.cfg                         |  323 +++
 layers/layers-134.cfg                         |  322 +++
 layers/layers-135-2009-2012.cfg               |  352 +++
 layers/layers-135-2009.cfg                    |  322 +++
 layers/layers-135.cfg                         |  322 +++
 layers/layers-137-tree.cfg                    |  326 +++
 layers/layers-137.cfg                         |  322 +++
 layers/layers-141-2009-2010.cfg               |  381 ++++
 layers/layers-141-2009-2012.cfg               |  381 ++++
 layers/layers-141-2009-half.cfg               |  347 +++
 layers/layers-141-2009.cfg                    |  381 ++++
 layers/layers-141.cfg                         |  347 +++
 layers/layers-145-half.cfg                    |  165 ++
 layers/layers-145.cfg                         |  308 +++
 layers/layers-146-2009-tree.cfg               |  336 +++
 layers/layers-146-2009.cfg                    |  354 +++
 layers/layers-146-2011.cfg                    |  390 ++++
 layers/layers-146-2012-2009.cfg               |  366 +++
 layers/layers-146-2012-2011.cfg               |  366 +++
 layers/layers-147.cfg                         |  307 +++
 layers/layers-148.cfg                         |  272 +++
 layers/layers-149.cfg                         |  321 +++
 layers/layers-150.cfg                         |  308 +++
 layers/layers-153-4gpu.cfg                    |  609 +++++
 layers/layers-153.cfg                         |  308 +++
 layers/layers-166.cfg                         |  308 +++
 layers/layers-167.cfg                         |  284 +++
 layers/layers-177.cfg                         |  317 +++
 layers/layers-178.cfg                         |  308 +++
 layers/layers-183-4gpu.cfg                    |  665 ++++++
 layers/layers-184-4gpu.cfg                    |  665 ++++++
 layers/layers-2009-101.cfg                    |  310 +++
 layers/layers-96-16k.cfg                      |  321 +++
 layers/layers-98-16kinit.cfg                  |  357 +++
 layers/layers-99.cfg                          |  314 +++
 layers/layers-flickr-102-inet-init.cfg        |  341 +++
 layers/layers-flickr-102.cfg                  |  312 +++
 layers/layers-flickr-103.cfg                  |  308 +++
 layers/layers-flickr-105.cfg                  |  314 +++
 layers/layers-inet-5layer-conv94-2gpu.cfg     |  321 +++
 multisoft-normed.py                           |  155 ++
 multisoft.py                                  |  122 +
 package.sh                                    |   30 +
 pyInterface.cutemp                            |  196 ++
 readme.html                                   |    1 +
 run4.sh                                       |    8 +
 shownet.py                                    |  575 +++++
 src/convnet.cu                                |  594 +++++
 src/cost.cu                                   |  126 ++
 src/cpuCNN.cu                                 |   65 +
 src/data.cu                                   |   98 +
 src/hostmem.cu                                |   34 +
 src/layer.cu                                  | 2002 +++++++++++++++++
 src/layer_kernels.cu                          |  720 ++++++
 src/lr.cu                                     |  186 ++
 src/multisoftmax.cpp                          |  126 ++
 src/neuron.cu                                 |   85 +
 src/pyconvnet.cu                              |  242 ++
 src/quantizer.cu                              |   65 +
 src/softmaxtree.cu                            |  441 ++++
 src/test.cu                                   |  378 ++++
 src/util.cu                                   |  124 +
 src/weights.cu                                |  378 ++++
 src/worker.cu                                 |  279 +++
 test.py                                       |   32 +
 test.sh                                       |    8 +
 tm.sh                                         |   43 +
 txt-preds.py                                  |   15 +
 verify-test-preds.py                          |   31 +
 246 files changed, 58283 insertions(+)
 create mode 100644 CONTRIBUTING.md
 create mode 100644 LICENSE
 create mode 100755 Makefile-distrib
 create mode 100644 README.md
 create mode 100644 SdkMasterLog.csv
 create mode 100755 avg-test.py
 create mode 100755 avg-valid.py
 create mode 100755 build.sh
 create mode 100755 convdata.py
 create mode 100755 convdata_cifar.py
 create mode 100755 convdata_flickr.py
 create mode 100755 convdata_jpeg.py
 create mode 100755 convnet.py
 create mode 100644 deviceQuery.txt
 create mode 100644 example-layers/layer-params-18pct.cfg
 create mode 100644 example-layers/layer-params-19pct.cfg
 create mode 100644 example-layers/layer-params-80sec.cfg
 create mode 100644 example-layers/layer-params-conv-local-12pct.cfg
 create mode 100644 example-layers/layer-params-conv-local-13pct.cfg
 create mode 100644 example-layers/layer-params-example.cfg
 create mode 100644 example-layers/layer-params.gc.cfg
 create mode 100644 example-layers/layers-18pct.cfg
 create mode 100644 example-layers/layers-19pct.cfg
 create mode 100644 example-layers/layers-80sec.cfg
 create mode 100644 example-layers/layers-conv-local-12pct.cfg
 create mode 100644 example-layers/layers-conv-local-13pct.cfg
 create mode 100644 example-layers/layers-example.cfg
 create mode 100644 example-layers/layers.gc.cfg
 create mode 100755 findsimilar.py
 create mode 100755 fix-big-imgnet.py
 create mode 100755 fix-flickr.py
 create mode 100755 gen-py-interface.py
 create mode 100644 include/convnet.cuh
 create mode 100644 include/cost.cuh
 create mode 100644 include/cpuCNN.cuh
 create mode 100644 include/data.cuh
 create mode 100644 include/hostmem.cuh
 create mode 100644 include/layer.cuh
 create mode 100644 include/layer_kernels.cuh
 create mode 100644 include/lr.cuh
 create mode 100644 include/messages.cuh
 create mode 100644 include/multisoftmax.h
 create mode 100644 include/neuron.cuh
 create mode 100644 include/pipedispenser.cuh
 create mode 100644 include/pyconvnet.cuh
 create mode 100644 include/quantizer.cuh
 create mode 100644 include/softmaxtree.cuh
 create mode 100644 include/util.cuh
 create mode 100644 include/weights.cuh
 create mode 100644 include/worker.cuh
 create mode 100755 initw.py
 create mode 100755 layer.py
 create mode 100644 layers-cifar/layer-params-18pct-noisylr.cfg
 create mode 100644 layers-cifar/layer-params-conv-local-13pct-noisylr.cfg
 create mode 100644 layers-cifar/layers-18pct.cfg
 create mode 100644 layers-cifar/layers-conv-local-13pct.cfg
 create mode 100644 layers/layer-params-100.cfg
 create mode 100644 layers/layer-params-106.cfg
 create mode 100644 layers/layer-params-107.cfg
 create mode 100644 layers/layer-params-109.cfg
 create mode 100644 layers/layer-params-110.cfg
 create mode 100644 layers/layer-params-111.cfg
 create mode 100644 layers/layer-params-112.cfg
 create mode 100644 layers/layer-params-113.cfg
 create mode 100644 layers/layer-params-114.cfg
 create mode 100644 layers/layer-params-115-jpeg.cfg
 create mode 100644 layers/layer-params-116.cfg
 create mode 100644 layers/layer-params-117.cfg
 create mode 100644 layers/layer-params-118.cfg
 create mode 100644 layers/layer-params-120-2012-full.cfg
 create mode 100644 layers/layer-params-120-2012.cfg
 create mode 100644 layers/layer-params-120-4gpu-auto2.cfg
 create mode 100644 layers/layer-params-120-4gpu-auto3.cfg
 create mode 100644 layers/layer-params-120-4gpu-auto4.cfg
 create mode 100644 layers/layer-params-120-4gpu-auto5.cfg
 create mode 100644 layers/layer-params-120-4gpu-auto6.cfg
 create mode 100644 layers/layer-params-120-4gpu.cfg
 create mode 100644 layers/layer-params-120.cfg
 create mode 100644 layers/layer-params-121.cfg
 create mode 100644 layers/layer-params-126-2012-full.cfg
 create mode 100644 layers/layer-params-127.cfg
 create mode 100644 layers/layer-params-128.cfg
 create mode 100644 layers/layer-params-129.cfg
 create mode 100644 layers/layer-params-130.cfg
 create mode 100644 layers/layer-params-131-2009.cfg
 create mode 100644 layers/layer-params-131.cfg
 create mode 100644 layers/layer-params-132.cfg
 create mode 100644 layers/layer-params-133.cfg
 create mode 100644 layers/layer-params-134.cfg
 create mode 100644 layers/layer-params-135-2009-2012.cfg
 create mode 100644 layers/layer-params-135-2009.cfg
 create mode 100644 layers/layer-params-135.cfg
 create mode 100644 layers/layer-params-136.cfg
 create mode 100644 layers/layer-params-137-tree.cfg
 create mode 100644 layers/layer-params-137.cfg
 create mode 100644 layers/layer-params-139.cfg
 create mode 100644 layers/layer-params-141-2009-half.cfg
 create mode 100644 layers/layer-params-141-2009.cfg
 create mode 100644 layers/layer-params-141.cfg
 create mode 100644 layers/layer-params-145-2010.cfg
 create mode 100644 layers/layer-params-145-half.cfg
 create mode 100644 layers/layer-params-145.cfg
 create mode 100644 layers/layer-params-146-2009-tree.cfg
 create mode 100644 layers/layer-params-146-2009.cfg
 create mode 100644 layers/layer-params-146-2011.cfg
 create mode 100644 layers/layer-params-146-2012-2009.cfg
 create mode 100644 layers/layer-params-146-2012-2011.cfg
 create mode 100644 layers/layer-params-147.cfg
 create mode 100644 layers/layer-params-147.cfg.save
 create mode 100644 layers/layer-params-148.cfg
 create mode 100644 layers/layer-params-149.cfg
 create mode 100644 layers/layer-params-150.cfg
 create mode 100644 layers/layer-params-153.cfg
 create mode 100644 layers/layer-params-154.cfg
 create mode 100644 layers/layer-params-155.cfg
 create mode 100644 layers/layer-params-156.cfg
 create mode 100644 layers/layer-params-157.cfg
 create mode 100644 layers/layer-params-158.cfg
 create mode 100644 layers/layer-params-160.cfg
 create mode 100644 layers/layer-params-161.cfg
 create mode 100644 layers/layer-params-162.cfg
 create mode 100644 layers/layer-params-163.cfg
 create mode 100644 layers/layer-params-165.cfg
 create mode 100644 layers/layer-params-166.cfg
 create mode 100644 layers/layer-params-167.cfg
 create mode 100644 layers/layer-params-169.cfg
 create mode 100644 layers/layer-params-170-256-0.015.cfg
 create mode 100644 layers/layer-params-170-256-double.cfg
 create mode 100644 layers/layer-params-170-256.cfg
 create mode 100644 layers/layer-params-170-4gpu-exp.cfg
 create mode 100644 layers/layer-params-170-4gpu.cfg
 create mode 100644 layers/layer-params-170-quant.cfg
 create mode 100644 layers/layer-params-170.cfg
 create mode 100644 layers/layer-params-171.cfg
 create mode 100644 layers/layer-params-172.cfg
 create mode 100644 layers/layer-params-174.cfg
 create mode 100644 layers/layer-params-175.cfg
 create mode 100644 layers/layer-params-177.cfg
 create mode 100644 layers/layer-params-178.cfg
 create mode 100644 layers/layer-params-180.cfg
 create mode 100644 layers/layer-params-183-4gpu-26epc.cfg
 create mode 100644 layers/layer-params-183-4gpu-exp.cfg
 create mode 100644 layers/layer-params-183-4gpu.cfg
 create mode 100644 layers/layer-params-184-4gpu-26epc.cfg
 create mode 100644 layers/layer-params-184-4gpu.cfg
 create mode 100644 layers/layer-params-2009-101.cfg
 create mode 100644 layers/layer-params-96-16k.cfg
 create mode 100644 layers/layer-params-98-16kinit.cfg
 create mode 100644 layers/layer-params-99.cfg
 create mode 100644 layers/layer-params-flickr-102-inet-init.cfg
 create mode 100644 layers/layer-params-flickr-102.cfg
 create mode 100644 layers/layer-params-flickr-103.cfg
 create mode 100644 layers/layer-params-flickr-105.cfg
 create mode 100644 layers/layer-params-inet-5layer-conv94-2gpu.cfg
 create mode 100644 layers/layers-100.cfg
 create mode 100644 layers/layers-106.cfg
 create mode 100644 layers/layers-109.cfg
 create mode 100644 layers/layers-110.cfg
 create mode 100644 layers/layers-111.cfg
 create mode 100644 layers/layers-112.cfg
 create mode 100644 layers/layers-113.cfg
 create mode 100644 layers/layers-114.cfg
 create mode 100644 layers/layers-115-jpeg.cfg
 create mode 100644 layers/layers-116.cfg
 create mode 100644 layers/layers-117.cfg
 create mode 100644 layers/layers-118.cfg
 create mode 100644 layers/layers-120-4gpu.cfg
 create mode 100644 layers/layers-120.cfg
 create mode 100644 layers/layers-121.cfg
 create mode 100644 layers/layers-126.cfg
 create mode 100644 layers/layers-127.cfg
 create mode 100644 layers/layers-128.cfg
 create mode 100644 layers/layers-129.cfg
 create mode 100644 layers/layers-130.cfg
 create mode 100644 layers/layers-131-2009.cfg
 create mode 100644 layers/layers-131.cfg
 create mode 100644 layers/layers-132.cfg
 create mode 100644 layers/layers-133.cfg
 create mode 100644 layers/layers-134.cfg
 create mode 100644 layers/layers-135-2009-2012.cfg
 create mode 100644 layers/layers-135-2009.cfg
 create mode 100644 layers/layers-135.cfg
 create mode 100644 layers/layers-137-tree.cfg
 create mode 100644 layers/layers-137.cfg
 create mode 100644 layers/layers-141-2009-2010.cfg
 create mode 100644 layers/layers-141-2009-2012.cfg
 create mode 100644 layers/layers-141-2009-half.cfg
 create mode 100644 layers/layers-141-2009.cfg
 create mode 100644 layers/layers-141.cfg
 create mode 100644 layers/layers-145-half.cfg
 create mode 100644 layers/layers-145.cfg
 create mode 100644 layers/layers-146-2009-tree.cfg
 create mode 100644 layers/layers-146-2009.cfg
 create mode 100644 layers/layers-146-2011.cfg
 create mode 100644 layers/layers-146-2012-2009.cfg
 create mode 100644 layers/layers-146-2012-2011.cfg
 create mode 100644 layers/layers-147.cfg
 create mode 100644 layers/layers-148.cfg
 create mode 100644 layers/layers-149.cfg
 create mode 100644 layers/layers-150.cfg
 create mode 100644 layers/layers-153-4gpu.cfg
 create mode 100644 layers/layers-153.cfg
 create mode 100644 layers/layers-166.cfg
 create mode 100644 layers/layers-167.cfg
 create mode 100644 layers/layers-177.cfg
 create mode 100644 layers/layers-178.cfg
 create mode 100644 layers/layers-183-4gpu.cfg
 create mode 100644 layers/layers-184-4gpu.cfg
 create mode 100644 layers/layers-2009-101.cfg
 create mode 100644 layers/layers-96-16k.cfg
 create mode 100644 layers/layers-98-16kinit.cfg
 create mode 100644 layers/layers-99.cfg
 create mode 100644 layers/layers-flickr-102-inet-init.cfg
 create mode 100644 layers/layers-flickr-102.cfg
 create mode 100644 layers/layers-flickr-103.cfg
 create mode 100644 layers/layers-flickr-105.cfg
 create mode 100644 layers/layers-inet-5layer-conv94-2gpu.cfg
 create mode 100755 multisoft-normed.py
 create mode 100755 multisoft.py
 create mode 100755 package.sh
 create mode 100755 pyInterface.cutemp
 create mode 100644 readme.html
 create mode 100755 run4.sh
 create mode 100755 shownet.py
 create mode 100644 src/convnet.cu
 create mode 100644 src/cost.cu
 create mode 100644 src/cpuCNN.cu
 create mode 100644 src/data.cu
 create mode 100644 src/hostmem.cu
 create mode 100644 src/layer.cu
 create mode 100644 src/layer_kernels.cu
 create mode 100644 src/lr.cu
 create mode 100644 src/multisoftmax.cpp
 create mode 100644 src/neuron.cu
 create mode 100644 src/pyconvnet.cu
 create mode 100644 src/quantizer.cu
 create mode 100644 src/softmaxtree.cu
 create mode 100644 src/test.cu
 create mode 100644 src/util.cu
 create mode 100644 src/weights.cu
 create mode 100644 src/worker.cu
 create mode 100755 test.py
 create mode 100755 test.sh
 create mode 100755 tm.sh
 create mode 100755 txt-preds.py
 create mode 100755 verify-test-preds.py

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..8cc085b
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1 @@
+External contributions are not accepted, sorry!
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d750cc1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,9 @@
+Copyright 2023 Google LLC.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile-distrib b/Makefile-distrib
new file mode 100755
index 0000000..f91be2b
--- /dev/null
+++ b/Makefile-distrib
@@ -0,0 +1,28 @@
+MODELNAME := _ConvNet
+
+INCLUDES :=  -I$(PYTHON_INCLUDE_PATH) -I$(NUMPY_INCLUDE_PATH) -I./include -I./include/common -I./include/cudaconv2 -I./include/nvmatrix
+LIB := -lpthread -L$(ATLAS_LIB_PATH) -L$(CUDA_INSTALL_PATH)/lib64 -lcblas
+
+USECUBLAS   := 1
+
+PYTHON_VERSION=$(shell python -V 2>&1 | cut -d ' ' -f 2 | cut -d '.' -f 1,2)
+LIB += -lpython$(PYTHON_VERSION)
+
+GENCODE_ARCH := -gencode=arch=compute_20,code=\"sm_20,compute_20\"
+COMMONFLAGS := -DNUMPY_INTERFACE -DMODELNAME=$(MODELNAME) -DINITNAME=init$(MODELNAME)
+
+EXECUTABLE	:= $(MODELNAME).so
+
+CUFILES				:= $(shell echo src/*.cu src/cudaconv2/*.cu src/nvmatrix/*.cu)
+CU_DEPS				:= $(shell echo include/*.cuh include/cudaconv2/*.cuh include/nvmatrix/*.cuh)
+CCFILES				:= $(shell echo src/common/*.cpp)
+C_DEPS				:= $(shell echo include/common/*.h)
+
+include common-gcc-cuda-4.0.mk
+	
+makedirectories:
+	$(VERBOSE)mkdir -p $(LIBDIR)
+	$(VERBOSE)mkdir -p $(OBJDIR)/src/cudaconv2
+	$(VERBOSE)mkdir -p $(OBJDIR)/src/nvmatrix
+	$(VERBOSE)mkdir -p $(OBJDIR)/src/common
+	$(VERBOSE)mkdir -p $(TARGETDIR)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..921f8df
--- /dev/null
+++ b/README.md
@@ -0,0 +1,9 @@
+# AlexNet
+
+This package contains the original AlexNet code.
+
+Krizhevsky, A., Sutskever, I. & Hinton, G. E. (2012).
+ImageNet Classification with Deep Convolutional Neural Networks.
+In F. Pereira, C. J. C. Burges, L. Bottou & K. Q. Weinberger (ed.),
+Advances in Neural Information Processing Systems 25 (pp. 1097--1105).
+Curran Associates, Inc. .
diff --git a/SdkMasterLog.csv b/SdkMasterLog.csv
new file mode 100644
index 0000000..05b64ee
--- /dev/null
+++ b/SdkMasterLog.csv
@@ -0,0 +1,2 @@
+deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 4, Device = Tesla S2050, Device = Tesla S2050
+deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 4, Device = Tesla S2050, Device = Tesla S2050
diff --git a/avg-test.py b/avg-test.py
new file mode 100755
index 0000000..a6b4f06
--- /dev/null
+++ b/avg-test.py
@@ -0,0 +1,23 @@
+from util import *
+import os
+import sys
+import re
+import random as r
+import os
+
+def do_avg(paths, tgtpath, coeffs):
+    for i,f in enumerate(sorted(os.listdir(paths[0]))):
+        b = int(re.match('test_preds_(\d+)', f).group(1))
+        dics = [unpickle(os.path.join(p, f)) for p in paths]
+        preds = sum(c * d['data'] for c,d in zip(coeffs, dics))
+        pickle(os.path.join(tgtpath, 'test_preds_%d' % b), {'data': preds})
+        print "Wrote batch %d" % b
+
+if __name__ == "__main__":
+    paths = sys.argv[1].split(',')
+    tgtpath = sys.argv[2]
+    if not os.path.exists(tgtpath):
+        os.makedirs(tgtpath)
+    coeffs = [float(x) for x in sys.argv[3].split(',')]
+    do_avg(paths, tgtpath, coeffs)
+
diff --git a/avg-valid.py b/avg-valid.py
new file mode 100755
index 0000000..d0616e1
--- /dev/null
+++ b/avg-valid.py
@@ -0,0 +1,108 @@
+from util import *
+import os
+import sys
+import re
+import random as r
+import numpy.random as nr
+from math import sqrt
+
+#VALID_PATH = '/ais/gobi3/u/kriz/lsvrc-2012'
+VALID_PATH = '/storage/lsvrc-2012'
+
+def compute_top5(preds, labels):
+    errs = 0
+    for c in xrange(preds.shape[0]):
+        err = True
+        for i in xrange(5):
+            top = preds[c,:].argmax()
+            if top == labels[c]:
+                err = False
+                break
+            preds[c, top] = -1
+        errs += err
+    return errs
+    #top5 = [[k[0] for k in sorted(zip(xrange(preds.shape[1]), preds[c,:]), key=lambda x:x[1], reverse=True)[:5]] for c in xrange(preds.shape[0])]
+    #return sum(l not in t for l,t in zip(labels, top5))
+
+def do_avg(paths, coeffs, top5=False):
+    #coeffs = [float(x) for x in sys.argv[2].split(',')]
+    off = unpickle(os.path.join(VALID_PATH, 'batches.meta'))['label_offset']
+    errs1, errs5, cases = 0, 0, 0
+    for i,f in enumerate(sorted(os.listdir(paths[0]))):
+        b = int(re.match('test_preds_(\d+)', f).group(1))
+        dics = [unpickle(os.path.join(p, f)) for p in paths]
+        dicv = unpickle(os.path.join(VALID_PATH, 'data_batch_%d' % b))
+        labels = n.array([d[1]+off for d in dicv[2]])
+        assert labels.min >= 0 and labels.max() < 1000 
+        preds = sum(c * d['data'] for c,d in zip(coeffs, dics))
+        assert preds.shape[1] == 1000
+        err1 = sum(preds.argmax(1) != labels)
+        err5 = compute_top5(preds, labels) if top5 else 0
+        errs1 += err1
+        errs5 += err5
+        cases += preds.shape[0]
+
+        #print "%.4f %.4f" % (float(err1) / preds.shape[0], float(err5) / preds.shape[0])
+    return errs1 / float(cases), errs5 / float(cases)
+    #print "Average error rate with coeffs %s: %.4f %.4f" % (", ".join("%.2f" % f for f in coeffs), errs1 / float(cases), errs5 / float(cases))
+
+def find_coeffs(paths, passes=5, cmin=0.0, cmax=1.0, step=0.05):
+    coeffs = [(cmax-cmin)/2 for i in xrange(len(paths))]
+    #coeffs = [cmin + (r.random() * (cmax-cmin)) for i in xrange(len(paths))]
+    best1 = do_avg(paths, coeffs, top5=True)[1]
+    changed = -1
+    for p in xrange(passes):
+        print "Pass %d" % p
+        for i in xrange(len(coeffs)):
+            if changed == i:
+                changed = -2
+                break
+            for c in [cmin + c * step for c in xrange(1+int((cmax-cmin)/step))]:
+                oldc = coeffs[i]
+                coeffs[i] = c
+                err = do_avg(paths, coeffs, top5=True)[1]
+                if err < best1:
+                    best1 = err
+                    changed = i
+                else:
+                    coeffs[i] = oldc
+            print "Best error rate: %.4f, coeffs: [%s]" % (best1, ",".join("%.2f" % f for f in coeffs))
+        if changed == -2:
+            break
+            
+def find_coeffs2(paths, passes=50):
+    #coeffs = n.array([r.random() for i in xrange(len(paths))])
+    coeffs = n.array([0.5 for i in xrange(len(paths))])
+    coeffs /= coeffs.sum()
+    
+    
+    #crange = [[cmin + c * step for c in xrange(1+int((cmax-cmin)/step))] for i in xrange(len(paths))]
+    for p in xrange(passes):
+        print "Pass %d" % p
+        for i in nr.permutation(range(coeffs.shape[0])):
+            #bigger = r.randint(0,2) == 0
+            #c = coeffs[i] + r.random() * (1 - coeffs[i]) if bigger else r.random() * coeffs[i]
+            c = min(1, max(0, coeffs[i] + nr.randn() / (2*sqrt(1+p))))
+            oldc = coeffs[i]
+            coeffs[i] = c
+            err = do_avg(paths, coeffs, top5=True)[1]
+            changed = ""
+            if err < best1:
+                best1 = err
+                changed = "*"
+                #crange = [[cmin + x * step for x in xrange(1+int((cmax-cmin)/step))] for i in xrange(len(paths))]
+            else:
+                coeffs[i] = oldc
+            coeffs /= coeffs.sum()
+            #crange[i].remove(c)
+            print "Best error rate: %.4f, coeffs: [%s]%s" % (best1, ",".join("%.4f" % f for f in coeffs), changed)
+
+
+if __name__ == "__main__":
+    paths = sys.argv[1].split(',')
+    if len(sys.argv) == 2:
+        find_coeffs(paths)
+    else:
+        coeffs = n.array([float(x) for x in sys.argv[2].split(',')])
+        errs = do_avg(paths, coeffs, top5=True)
+        print "Average error rate with coeffs %s: %.4f %.4f" % (", ".join("%.2f" % f for f in coeffs), errs[0], errs[1])
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..ead4fd9
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+NVMATRIX=$NVMATRIX_K20X_INCLUDE/..
+CUDACONV=$NVCONV2_K20X_INCLUDE/..
+
+rm -rf build
+rm *.so
+mkdir -p build
+
+cp -r src build/
+cp -r include build/
+cp $NVMATRIX/src/nvmatrix.cu $NVMATRIX/src/nvmatrix_kernels.cu $NVMATRIX/src/gpu_locking.cpp build/src
+cp $NVMATRIX/include/nvmatrix.cuh $NVMATRIX/include/nvmatrix_kernels.cuh $NVMATRIX/include/nvmatrix_operators.cuh $NVMATRIX/include/gpu_locking.h build/include
+cp $CUDACONV/src/conv_util.cu $CUDACONV/src/filter_acts.cu $CUDACONV/src/weight_acts.cu $CUDACONV/src/img_acts.cu build/src
+cp $CUDACONV/include/conv_util.cuh $CUDACONV/include/cudaconv2.cuh build/include
+cp Makefile-all build/Makefile
+
+cd build && make -j kepler=1 $* && cd ..
+ln -fs build/*.so ./
diff --git a/convdata.py b/convdata.py
new file mode 100755
index 0000000..22f1967
--- /dev/null
+++ b/convdata.py
@@ -0,0 +1,336 @@
+from data import *
+import numpy.random as nr
+import numpy as n
+import random as r
+from time import time
+from threading import Thread
+from math import sqrt
+import sys
+from pylab import *
+
+class FlatMemoryDataProvider(LabeledMemoryDataProvider):
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        self.data_mean = self.batch_meta['data_mean'].reshape((self.batch_meta['data_mean'].shape[0], 1))
+        # Subtract the mean from the data and make sure that both data and
+        # labels are in single-precision floating point.
+        for d in self.data_dic:
+            # This converts the data matrix to single precision and makes sure that it is C-ordered
+            d['data'] = n.require((d['data'] - self.data_mean), dtype=n.single, requirements='C')
+            d['labels'] = d['labels'].astype(n.int)
+            d['labelprobs'] = n.zeros((self.get_num_classes(), d['data'].shape[1]), dtype=n.single)
+            for c in xrange(d['data'].shape[1]):
+                d['labelprobs'][d['labels'][c],c] = 1.0
+            
+    def get_next_batch(self):
+        epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self)
+        return epoch, batchnum, [datadic['data'], datadic['labelprobs']]
+    
+    def get_data_dims(self, idx=0):
+        return self.batch_meta['num_vis'] if idx == 0 else self.get_num_classes()
+
+class ImageNetDP(LabeledDataProvider):
+    MAX_PCA_COMPONENTS = 1024 # Use this many components for noise generation
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        self.init_commons(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        
+    def init_commons(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        self.data_mean = self.batch_meta['data_mean'].astype(n.single)
+        self.color_eig = self.batch_meta['color_pca'][1].astype(n.single)
+        self.color_stdevs = n.c_[self.batch_meta['color_pca'][0].astype(n.single)]
+        self.color_noise_coeff = dp_params['color_noise']
+        self.pca_noise_coeff = dp_params['pca_noise']
+        self.num_colors = 3
+        self.img_size = int(sqrt(self.batch_meta['num_vis'] / self.num_colors))
+
+    def get_labels(self, datadic):
+        pass
+    
+    def showimg(self, img):
+        pixels = img.shape[0] / 3
+        size = int(sqrt(pixels))
+        img = img.reshape((3,size,size)).swapaxes(0,2).swapaxes(0,1)
+        imshow(img, interpolation='nearest')
+        show()
+        
+    def get_next_batch(self):
+        epoch, batchnum, datadic = LabeledDataProvider.get_next_batch(self)
+        # This takes about 1 sec per batch :(
+        # If I don't convert both to single ahead of time, it takes even longer.
+        data = n.require(datadic['data'] - self.data_mean, dtype=n.single, requirements='C')
+        
+        labels = self.get_labels(datadic)
+#        wordvecs = datadic['wordvecs']
+        wordpres = datadic['wordpres']
+        
+        # Labels have to be in the range 0-(number of classes - 1)
+        assert labels.max() < self.get_num_classes(), "Invalid labels!"
+        assert labels.min() == 0, "Invalid labels!"
+        return epoch, batchnum, [data, labels, wordpres]
+    
+        
+    # Takes as input an array returned by get_next_batch
+    # Returns a (numCases, imgSize, imgSize, 3) array which can be
+    # fed to pylab for plotting.
+    # This is used by shownet.py to plot test case predictions.
+    def get_plottable_data(self, data, add_mean=True):
+        return n.require((data + (self.data_mean if add_mean else 0)).T.reshape(data.shape[1], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
+    
+class ImageNetLogRegDP(ImageNetDP):
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        ImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        
+    def get_labels(self, datadic):
+        return n.array(datadic['labels'], dtype=n.single).reshape((1, datadic['data'].shape[1]))
+    
+    def get_data_dims(self, idx=0):
+        if idx == 0:
+            return self.img_size**2 * self.num_colors
+        if idx == 2:
+            return 100
+        return 1
+    
+class BatchLoaderThread(Thread):
+    def __init__(self, data_dir, path, list_out):
+        Thread.__init__(self)
+        self.data_dir = data_dir
+        self.path = path
+        self.list_out = list_out
+        #print "loading %d" % self.bnum
+        
+    def run(self):
+        self.list_out.append(unpickle(self.path))
+    
+class ColorNoiseMakerThread(Thread):
+    def __init__(self, pca_stdevs, pca_vecs, num_noise, list_out):
+        Thread.__init__(self)
+        self.pca_stdevs, self.pca_vecs = pca_stdevs, pca_vecs
+        self.num_noise = num_noise
+        self.list_out = list_out
+        
+    def run(self):
+        noise = n.dot(self.pca_vecs, nr.randn(3, self.num_noise).astype(n.single) * self.pca_stdevs)
+        self.list_out.append(noise)
+
+class CroppedImageNetDP(ImageNetDP):
+    def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        ImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        
+        self.border_size = dp_params['crop_border']
+        self.inner_size = self.img_size - self.border_size*2
+        self.multiview = dp_params['multiview_test'] and test
+        self.num_views = 5*2
+        self.data_mult = self.num_views if self.multiview else 1
+        self.crop_chunk = 32 # This many images will be cropped in the same way
+        
+        # Maintain poitners to previously-returned data matrices so they don't get garbage collected.
+        # I've never seen this happen but it's a safety measure.
+        self.data = [None, None]
+        self.cropped_data = [n.zeros((self.get_data_dims(), 0*self.data_mult), dtype=n.single) for x in xrange(2)]
+        
+        self.loader_thread, self.color_noise_thread = None, None
+        self.convnet = dp_params['convnet']
+
+        self.num_noise = 1024
+        self.batches_generated = 0
+        self.data_mean_crop = self.data_mean.reshape((3,self.img_size,self.img_size))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((3*self.inner_size**2, 1))
+        
+    def get_data_dims(self, idx=0):
+        if idx == 0:
+            return self.inner_size**2 * 3
+        return 1
+
+    def start_color_noise_maker(self):
+        color_noise_list = []
+        self.color_noise_thread = ColorNoiseMakerThread(self.color_stdevs, self.color_eig, self.num_noise, color_noise_list)
+        self.color_noise_thread.start()
+        return color_noise_list
+         
+    def get_labels(self, datadic):
+        pass
+    
+    def start_loader(self, batch_idx):
+        self.load_data = []
+        self.loader_thread = BatchLoaderThread(self.data_dir, self.get_data_file_name(self.batch_range[batch_idx]), self.load_data)
+        self.loader_thread.start()
+
+    def get_next_batch(self):
+        self.d_idx = self.batches_generated % 2
+        if self.test:
+            epoch, batchnum, self.data[self.d_idx] = LabeledDataProvider.get_next_batch(self)
+        else:
+            epoch, batchnum = self.curr_epoch, self.curr_batchnum
+            if self.loader_thread is None:
+                self.start_loader(self.batch_idx)
+                self.loader_thread.join()
+                self.data[self.d_idx] = self.load_data[0] 
+                self.start_loader(self.get_next_batch_idx())
+            else:
+                # Set the argument to join to 0 to re-enable batch reuse
+                self.loader_thread.join()
+                if not self.loader_thread.is_alive():
+                    self.data[self.d_idx] = self.load_data[0]
+                    self.start_loader(self.get_next_batch_idx())
+            self.advance_batch()
+
+        cropped = self.get_cropped_data(self.data[self.d_idx])
+        if self.color_noise_coeff > 0 and not self.test:
+            # At this point the data already has 0 mean.
+            # So I'm going to add noise to it, but I'm also going to scale down
+            # the original data. This is so that the overall scale of the training
+            # data doesn't become too different from the test data.
+            s = cropped.shape
+            cropped_size = self.get_data_dims(0) / 3
+            ncases = s[1]
+
+            if self.color_noise_thread is None:
+                self.color_noise_list = self.start_color_noise_maker()
+                self.color_noise_thread.join()
+                self.color_noise = self.color_noise_list[0]
+                self.color_noise_list = self.start_color_noise_maker()
+            else:
+                self.color_noise_thread.join(0)
+                if not self.color_noise_thread.is_alive():
+                    self.color_noise = self.color_noise_list[0]
+                    self.color_noise_list = self.start_color_noise_maker()
+#                    print "Generated new noise"
+#                else:
+#                    print "Reusing old noise"
+                # If the noise thread IS alive, then we'll just re-use the noise from the last run
+            cropped = self.cropped_data[self.d_idx] = cropped.reshape((3, cropped_size, ncases)).swapaxes(0,1).reshape((cropped_size, ncases*3))
+            self.color_noise = self.color_noise[:,:ncases].reshape((1, 3*ncases))
+            cropped += self.color_noise * self.color_noise_coeff
+            cropped = self.cropped_data[self.d_idx] = cropped.reshape((cropped_size, 3, ncases)).swapaxes(0,1).reshape(s)
+            cropped /= 1.0 + self.color_noise_coeff
+            
+#        cropped -= cropped.min()
+#        cropped /= cropped.max()
+#        self.showimg(cropped[:,0])
+        
+        self.data[self.d_idx]['labels'] = self.get_labels(self.data[self.d_idx])
+        self.data[self.d_idx]['data'] = cropped
+        self.batches_generated += 1
+        return epoch, batchnum, [self.data[self.d_idx]['data'], self.data[self.d_idx]['labels']]
+       
+    def get_cropped_data(self, data):
+        cropped = self.cropped_data[self.d_idx]
+        if cropped.shape[1] != data['data'].shape[1] * self.data_mult:
+            cropped = self.cropped_data[self.d_idx] = n.zeros((cropped.shape[0], data['data'].shape[1] * self.data_mult), dtype=n.single)
+        self.__trim_borders(data['data'], cropped)
+
+        return self.subtract_mean(cropped)
+        
+    def subtract_mean(self,data):
+        data -= self.data_mean_crop
+        return data
+        
+    # Takes as input an array returned by get_next_batch
+    # Returns a (numCases, imgSize, imgSize, 3) array which can be
+    # fed to pylab for plotting.
+    # This is used by shownet.py to plot test case predictions.
+    def get_plottable_data(self, data, add_mean=True):
+        return n.require((data + (self.data_mean_crop if add_mean else 0)).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
+    
+    def __trim_borders(self, x, target):
+        y = x.reshape(3, self.img_size, self.img_size, x.shape[1])
+        
+        if self.test: # don't need to loop over cases
+            if self.multiview:
+                start_positions = [(0,0),  (0, self.border_size*2),
+                                   (self.border_size, self.border_size),
+                                  (self.border_size*2, 0), (self.border_size*2, self.border_size*2)]
+                end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
+                for i in xrange(self.num_views/2):
+                    pic = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:]
+                    target[:,i * x.shape[1]:(i+1)* x.shape[1]] = pic.reshape((self.get_data_dims(),x.shape[1]))
+                    target[:,(self.num_views/2 + i) * x.shape[1]:(self.num_views/2 +i+1)* x.shape[1]] = pic[:,:,::-1,:].reshape((self.get_data_dims(),x.shape[1]))
+            else:
+                pic = y[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size, :] # just take the center for now
+                target[:,:] = pic.reshape((self.get_data_dims(), x.shape[1]))
+        else:
+            for c in xrange(0, x.shape[1], self.crop_chunk): # loop over cases in chunks
+                startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
+
+                endY, endX = startY + self.inner_size, startX + self.inner_size
+                c_end = min(c + self.crop_chunk, x.shape[1])
+                pic = y[:,startY:endY,startX:endX, c:c_end]
+                if nr.randint(2) == 0: # also flip the images with 50% probability
+                    pic = pic[:,:,::-1,:]
+                target[:,c:c_end] = pic.reshape((self.get_data_dims(),c_end-c))
+            #target[:] = n.require(target[:,nr.permutation(x.shape[1])], requirements='C')
+
+class CroppedImageNetLogRegDP(CroppedImageNetDP):
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        CroppedImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        
+    def get_labels(self, datadic):
+        return n.require(n.tile(n.array(datadic['labels'], dtype=n.single).reshape((1, datadic['data'].shape[1])), (1, self.data_mult)), requirements='C')
+        
+class RandomScaleImageNetLogRegDP(CroppedImageNetLogRegDP):
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        CroppedImageNetLogRegDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        del self.cropped_data
+        self.data_mean_mean = self.data_mean.mean()
+        
+    def get_cropped_data(self):
+        if self.test and self.multiview:
+            x = self.data['data']
+            y = x.reshape(3, self.img_size, self.img_size, x.shape[1])
+            target = n.zeros((self.inner_size**2*3, self.data['data'].shape[1]*self.num_views), dtype=n.uint8)
+            start_positions = [(0,0), (0, self.border_size), (0, self.border_size*2),
+                               (self.border_size, 0), (self.border_size, self.border_size), (self.border_size, self.border_size*2),
+                              (self.border_size*2, 0), (self.border_size*2, self.border_size), (self.border_size*2, self.border_size*2)]
+            end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
+            for i in xrange(self.num_views):
+                target[:,i * x.shape[1]:(i+1)* x.shape[1]] = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:].reshape((self.inner_size**2*3,x.shape[1]))
+            return self.subtract_mean(target)
+        elif not self.test:
+            # it should be ok to flip it into the same matrix
+            # since if it ends up being reused, flips are invertible.
+            self.reflect_data(self.data['data'], self.data['data'])
+        return self.subtract_mean(self.data['data'])
+    
+    def reflect_data(self, x, target):
+        y = x.reshape(3, self.img_size, self.img_size, x.shape[1])
+        for c in xrange(0, x.shape[1], self.crop_chunk): # loop over cases in chunks
+            c_end = min(c + self.crop_chunk, x.shape[1])
+            pic = y[:,:,:, c:c_end]
+            if nr.randint(2) == 0: # flip the images with 50% probability
+                pic = pic[:,:,::-1,:]
+
+            target[:,c:c_end] = pic.reshape((self.get_data_dims(),c_end-c))
+    
+    # Note that this variant subtracts the same scalar from each pixel
+    def subtract_mean(self, data):
+        return n.require(data - self.data_mean_mean, dtype=n.single, requirements='C') 
+    
+    def get_data_dims(self, idx=0):
+        return self.img_size**2 * 3 if idx == 0 else 1
+        
+class DummyConvNetLogRegDP(LabeledDummyDataProvider):
+    def __init__(self, data_dim):
+        LabeledDummyDataProvider.__init__(self, data_dim)
+        self.batch_meta['tree'] = dict([(i, []) for i in xrange(self.num_classes)])
+        self.batch_meta['tree'][10] = [0, 1, 2]
+        self.batch_meta['tree'][11] = [3, 4, 5]
+        self.batch_meta['tree'][12] = [6, 7]
+        self.batch_meta['tree'][13] = [8, 9]
+        self.batch_meta['tree'][14] = [10, 11]
+        self.batch_meta['tree'][15] = [12, 13]
+        self.batch_meta['tree'][16] = [14, 15]
+        self.batch_meta['all_wnids'] = {'gproot': 16}
+        self.img_size = int(sqrt(data_dim/3))
+        
+    def get_next_batch(self):
+        epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self)
+        
+        dic['data'] = n.require(dic['data'].T, requirements='C')
+        dic['labels'] = n.require(dic['labels'].T, requirements='C')
+        dic['gates'] = nr.rand(1, dic['data'].shape[1]).astype(n.single)
+        
+        return epoch, batchnum, [dic['data'], dic['labels'], dic['gates']]
+    
+    # Returns the dimensionality of the two data matrices returned by get_next_batch
+    def get_data_dims(self, idx=0):
+        return self.batch_meta['num_vis'] if idx == 0 else 1
diff --git a/convdata_cifar.py b/convdata_cifar.py
new file mode 100755
index 0000000..d01559b
--- /dev/null
+++ b/convdata_cifar.py
@@ -0,0 +1,115 @@
+from data import *
+import numpy.random as nr
+import numpy as n
+import random as r
+
+class CIFARDataProvider(LabeledDataProvider):
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        self.data_mean = self.batch_meta['data_mean']
+        self.num_colors = 3
+        self.img_size = 32
+        self.data_dims = [self.img_size**2 * self.num_colors, 1, self.get_num_classes()]
+
+    def get_next_batch(self):
+        epoch, batchnum, datadic = LabeledDataProvider.get_next_batch(self)
+        if 'processed' not in datadic:
+            datadic['data'] = n.require((datadic['data'] - self.data_mean), dtype=n.single, requirements='C')
+            datadic['labelsVec'] = n.require(n.array(datadic['labels']).reshape((1, datadic['data'].shape[1])), requirements='C', dtype=n.single)
+            datadic['labelsMat'] = n.zeros((self.get_num_classes(), datadic['data'].shape[1]), dtype=n.single)
+            datadic['labelsMat'][datadic['labels'],n.c_[0:datadic['data'].shape[1]]] = 1
+            
+            datadic['processed'] = True
+
+        return epoch, batchnum, [datadic['data'], datadic['labelsVec'], datadic['labelsMat']]
+
+    # Returns the dimensionality of the two data matrices returned by get_next_batch
+    # idx is the index of the matrix. 
+    def get_data_dims(self, idx=0):
+        return self.data_dims[idx]
+    
+    # Takes as input an array returned by get_next_batch
+    # Returns a (numCases, imgSize, imgSize, 3) array which can be
+    # fed to pylab for plotting.
+    # This is used by shownet.py to plot test case predictions.
+    def get_plottable_data(self, data):
+        return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
+    
+class CroppedCIFARDataProvider(LabeledMemoryDataProvider):
+    def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
+        LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+
+        self.border_size = dp_params['crop_border']
+        self.inner_size = 32 - self.border_size*2
+        self.multiview = dp_params['multiview_test'] and test
+        self.num_views = 9
+        self.data_mult = self.num_views if self.multiview else 1
+        self.num_colors = 3
+        
+        for d in self.data_dic:
+            d['data'] = n.require(d['data'], requirements='C')
+            d['labels'] = n.require(n.tile(d['labels'].reshape((1, d['data'].shape[1])), (1, self.data_mult)), requirements='C')
+        
+        self.cropped_data = [n.zeros((self.get_data_dims(), self.data_dic[0]['data'].shape[1]*self.data_mult), dtype=n.single) for x in xrange(2)]
+
+        self.batches_generated = 0
+        self.data_mean = self.batch_meta['data_mean'].reshape((3,32,32))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((self.get_data_dims(), 1))
+
+    def get_next_batch(self):
+        epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self)
+
+        cropped = self.cropped_data[self.batches_generated % 2]
+
+        self.__trim_borders(datadic['data'], cropped)
+        cropped -= self.data_mean
+        self.batches_generated += 1
+        return epoch, batchnum, [cropped, datadic['labels']]
+        
+    def get_data_dims(self, idx=0):
+        return self.inner_size**2 * 3 if idx == 0 else 1
+
+    # Takes as input an array returned by get_next_batch
+    # Returns a (numCases, imgSize, imgSize, 3) array which can be
+    # fed to pylab for plotting.
+    # This is used by shownet.py to plot test case predictions.
+    def get_plottable_data(self, data):
+        return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
+    
+    def __trim_borders(self, x, target):
+        y = x.reshape(3, 32, 32, x.shape[1])
+
+        if self.test: # don't need to loop over cases
+            if self.multiview:
+                start_positions = [(0,0), (0, self.border_size), (0, self.border_size*2),
+                                  (self.border_size, 0), (self.border_size, self.border_size), (self.border_size, self.border_size*2),
+                                  (self.border_size*2, 0), (self.border_size*2, self.border_size), (self.border_size*2, self.border_size*2)]
+                end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
+                for i in xrange(self.num_views):
+                    target[:,i * x.shape[1]:(i+1)* x.shape[1]] = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:].reshape((self.get_data_dims(),x.shape[1]))
+            else:
+                pic = y[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size, :] # just take the center for now
+                target[:,:] = pic.reshape((self.get_data_dims(), x.shape[1]))
+        else:
+            for c in xrange(x.shape[1]): # loop over cases
+                startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
+                endY, endX = startY + self.inner_size, startX + self.inner_size
+                pic = y[:,startY:endY,startX:endX, c]
+                if nr.randint(2) == 0: # also flip the image with 50% probability
+                    pic = pic[:,:,::-1]
+                target[:,c] = pic.reshape((self.get_data_dims(),))
+    
+class DummyConvNetDataProvider(LabeledDummyDataProvider):
+    def __init__(self, data_dim):
+        LabeledDummyDataProvider.__init__(self, data_dim)
+        
+    def get_next_batch(self):
+        epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self)
+        
+        dic['data'] = n.require(dic['data'].T, requirements='C')
+        dic['labels'] = n.require(dic['labels'].T, requirements='C')
+        
+        return epoch, batchnum, [dic['data'], dic['labels']]
+    
+    # Returns the dimensionality of the two data matrices returned by get_next_batch
+    def get_data_dims(self, idx=0):
+        return self.batch_meta['num_vis'] if idx == 0 else 1
diff --git a/convdata_flickr.py b/convdata_flickr.py
new file mode 100755
index 0000000..c94ad80
--- /dev/null
+++ b/convdata_flickr.py
@@ -0,0 +1,297 @@
+from data import *
+import numpy.random as nr
+import numpy as n
+import random as r
+from time import time
+from threading import Thread
+from math import sqrt
+import sys
+from pylab import *
+from PIL import Image
+from StringIO import StringIO
+    
+class JPEGBatchLoaderThread(Thread):
+    def __init__(self, data_dir, path, freq_to_id, tgt, tgt_labels, list_out):
+        Thread.__init__(self)
+        self.data_dir = data_dir
+        self.path = path
+        self.tgt = tgt
+        self.tgt_labels = tgt_labels
+        self.list_out = list_out
+        self.freq_to_id = freq_to_id
+        #print "loading %d" % self.bnum
+
+    @staticmethod
+    def raw_to_freq_id(raw_tags, freq_to_id):
+        raw_tags = [''.join(t.lower().strip().split()) for t in raw_tags]
+        return [freq_to_id[t] for t in raw_tags if t in freq_to_id]
+
+    @staticmethod
+    def load_jpeg_batch((strings, sizes, labels), freq_to_id, tgt, tgt_labels):
+        tgt_labels[:] = 0
+        for k,s in enumerate(strings):
+            ima = n.asarray(Image.open(StringIO(s)).convert('RGB'))
+            tgt[k,:] = ima.swapaxes(0,2).swapaxes(1,2).flatten()
+            tgt_labels[k, JPEGBatchLoaderThread.raw_to_freq_id(labels[k], freq_to_id)] = 1
+
+        return {'data': tgt[:len(strings),:],
+                'labels': tgt_labels[:len(strings),:]}
+    
+    def run(self):
+        p = self.load_jpeg_batch(unpickle(self.path),
+                                 self.freq_to_id,
+                                 self.tgt,
+                                 self.tgt_labels)
+        self.list_out.append(p)
+        
+class ColorNoiseMakerThread(Thread):
+    def __init__(self, pca_stdevs, pca_vecs, num_noise, list_out):
+        Thread.__init__(self)
+        self.pca_stdevs, self.pca_vecs = pca_stdevs, pca_vecs
+        self.num_noise = num_noise
+        self.list_out = list_out
+        
+    def run(self):
+        noise = n.dot(nr.randn(self.num_noise, 3).astype(n.single) * self.pca_stdevs.T, self.pca_vecs.T)
+        self.list_out.append(noise)
+
+class FlickrDP(LabeledDataProvider):
+    MAX_PCA_COMPONENTS = 1024 # Use this many components for noise generation
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        self.init_commons(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+
+    def init_commons(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        self.data_mean = self.batch_meta['data_mean'].astype(n.single)
+        self.color_eig = self.batch_meta['color_pca'][1].astype(n.single)
+        self.color_stdevs = n.c_[self.batch_meta['color_pca'][0].astype(n.single)]
+        self.color_noise_coeff = dp_params['color_noise']
+        self.pca_noise_coeff = dp_params['pca_noise']
+        self.num_colors = 3
+        self.img_size = int(sqrt(self.batch_meta['num_vis'] / self.num_colors))
+        self.freq_to_id = self.batch_meta['freq_to_id']
+        
+    def get_labels(self, datadic):
+        pass
+    
+    def showimg(self, img):
+        pixels = img.shape[0] / 3
+        size = int(sqrt(pixels))
+        img = img.reshape((3,size,size)).swapaxes(0,2).swapaxes(0,1)
+        imshow(img, interpolation='nearest')
+        show()
+        
+    def get_next_batch(self):
+        epoch, batchnum, datadic = LabeledDataProvider.get_next_batch(self)
+        # This takes about 1 sec per batch :(
+        # If I don't convert both to single ahead of time, it takes even longer.
+        data = n.require(datadic['data'] - self.data_mean, dtype=n.single, requirements='C')
+        
+        labels = self.get_labels(datadic)
+        
+        # Labels have to be in the range 0-(number of classes - 1)
+        assert labels.max() < self.get_num_classes(), "Invalid labels!"
+        assert labels.min() >= 0, "Invalid labels!"
+        return epoch, batchnum, [data, labels]
+    
+        
+    # Takes as input an array returned by get_next_batch
+    # Returns a (numCases, imgSize, imgSize, 3) array which can be
+    # fed to pylab for plotting.
+    # This is used by shownet.py to plot test case predictions.
+    def get_plottable_data(self, data, add_mean=True):
+        return n.require((data + (self.data_mean if add_mean else 0)).reshape(data.shape[0], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
+
+class JPEGCroppedFlickrDP(FlickrDP):
+    def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
+        LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        self.init_commons(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        
+        self.img_size = int(sqrt(self.batch_meta['num_vis'] / self.num_colors))
+        self.border_size = dp_params['crop_border']
+        self.inner_size = self.img_size - self.border_size*2
+        self.multiview = dp_params['multiview_test'] and test
+        self.num_views = 5*2
+        self.data_mult = self.num_views if self.multiview else 1
+        self.crop_chunk = 32 # This many images will be cropped in the same way
+        self.batch_size = self.batch_meta['batch_size']
+        
+        # Maintain poitners to previously-returned data matrices so they don't get garbage collected.
+        # I've never seen this happen but it's a safety measure.
+        self.data = [None, None]
+        self.cropped_data = [n.zeros((0*self.data_mult, self.get_data_dims()), dtype=n.float32) for x in xrange(2)]
+        if self.test:
+            self.orig_data = [n.zeros((self.batch_size, self.img_size**2*3), dtype=n.uint8) for x in xrange(1)]
+            self.orig_labels = [n.zeros((self.batch_size, self.get_num_classes()), dtype=n.float32) for x in xrange(2)]
+        else:
+            self.orig_data = [n.zeros((self.batch_size, self.img_size**2*3), dtype=n.uint8) for x in xrange(2)]
+            # There have to be 3 copies of labels because this matrix actually gets used by the training code
+            self.orig_labels = [n.zeros((self.batch_size, self.get_num_classes()), dtype=n.float32) for x in xrange(3)]
+            
+        self.loader_thread, self.color_noise_thread = None, None
+        self.convnet = dp_params['convnet']
+            
+        self.num_noise = self.batch_size
+        self.batches_generated, self.loaders_started = 0, 0
+        self.data_mean_crop = self.data_mean.reshape((3,self.img_size,self.img_size))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((1,3*self.inner_size**2))
+
+    def get_data_dims(self, idx=0):
+        assert idx in (0,1), "Invalid index: %d" % idx
+        if idx == 0:
+            return self.inner_size**2 * 3
+        return self.get_num_classes()
+
+    def start_loader(self, batch_idx):
+        self.load_data = []
+        #print "loading %d" % self.batch_range_perm[self.batch_idx]
+        self.loader_thread = JPEGBatchLoaderThread(self.data_dir, self.get_data_file_name(self.batch_range[batch_idx]), self.freq_to_id,
+                                                   self.orig_data[self.loaders_started % 2], self.orig_labels[self.loaders_started % 3],
+                                                   self.load_data)
+        self.loader_thread.start()
+        self.loaders_started += 1
+        
+    def start_color_noise_maker(self):
+        color_noise_list = []
+        self.color_noise_thread = ColorNoiseMakerThread(self.color_stdevs, self.color_eig, self.num_noise, color_noise_list)
+        self.color_noise_thread.start()
+        return color_noise_list
+         
+    def get_labels(self, datadic):
+        pass
+    
+    def get_next_batch(self):
+        self.d_idx = self.batches_generated % 2
+        if self.test:
+            epoch, batchnum, self.data[self.d_idx] = LabeledDataProvider.get_next_batch(self)
+            self.data[self.d_idx] = JPEGBatchLoaderThread.load_jpeg_batch(self.data[self.d_idx], self.freq_to_id, self.orig_data[0], self.orig_labels[self.d_idx])
+        else:
+            epoch, batchnum = self.curr_epoch, self.curr_batchnum
+
+            if self.loader_thread is None:
+                self.start_loader(self.batch_idx)
+                self.loader_thread.join()
+                self.data[self.d_idx] = self.load_data[0]
+
+                self.start_loader(self.get_next_batch_idx())
+            else:
+                # Set the argument to join to 0 to re-enable batch reuse
+                self.loader_thread.join()
+                if not self.loader_thread.is_alive():
+                    self.data[self.d_idx] = self.load_data[0]
+                    self.start_loader(self.get_next_batch_idx())
+#                else:
+#                    print "Re-using batch"
+            self.advance_batch()
+        
+        cropped = self.get_cropped_data(self.data[self.d_idx])
+        if self.color_noise_coeff > 0 and not self.test:
+            # At this point the data already has 0 mean.
+            # So I'm going to add noise to it, but I'm also going to scale down
+            # the original data. This is so that the overall scale of the training
+            # data doesn't become too different from the test data.
+            s = cropped.shape
+            cropped_size = self.get_data_dims(0) / 3
+            ncases = s[0]
+
+            if self.color_noise_thread is None:
+                self.color_noise_list = self.start_color_noise_maker()
+                self.color_noise_thread.join()
+                self.color_noise = self.color_noise_list[0]
+                self.color_noise_list = self.start_color_noise_maker()
+            else:
+                self.color_noise_thread.join(0)
+                if not self.color_noise_thread.is_alive():
+                    self.color_noise = self.color_noise_list[0]
+                    self.color_noise_list = self.start_color_noise_maker()
+
+            cropped = self.cropped_data[self.d_idx] = cropped.reshape((ncases*3, cropped_size))
+            self.color_noise = self.color_noise[:ncases,:].reshape((3*ncases, 1))
+            cropped += self.color_noise * self.color_noise_coeff
+            cropped = self.cropped_data[self.d_idx] = cropped.reshape((ncases, 3* cropped_size))
+            cropped /= (1.0 + self.color_noise_coeff)
+            
+        self.data[self.d_idx]['labels'] = self.get_labels(self.data[self.d_idx])
+        self.data[self.d_idx]['data'] = cropped
+        self.batches_generated += 1
+
+#        idx = 1000
+#        cropped -= cropped.min()
+#        cropped /= cropped.max()
+#        
+#        print [self.batch_meta['label_names'][i] for i in n.where(self.data['labels'][idx,:]==1)[0]]
+#        self.showimg(cropped[idx,:])
+        #print cropped.shape
+        return epoch, batchnum, [self.data[self.d_idx]['data'].T, self.data[self.d_idx]['labels'].T]
+       
+    def get_cropped_data(self, data):
+        cropped = self.cropped_data[self.d_idx]
+        if cropped.shape[0] != data['data'].shape[0] * self.data_mult:
+            cropped = self.cropped_data[self.d_idx] = n.zeros((data['data'].shape[0] * self.data_mult, cropped.shape[1]), dtype=n.float32)
+        self.__trim_borders(data['data'], cropped)
+
+        return self.subtract_mean(cropped)
+        
+    def subtract_mean(self,data):
+        data -= self.data_mean_crop
+        return data
+        
+    # Takes as input an array returned by get_next_batch
+    # Returns a (numCases, imgSize, imgSize, 3) array which can be
+    # fed to pylab for plotting.
+    # This is used by shownet.py to plot test case predictions.
+    def get_plottable_data(self, data, add_mean=True):
+        return n.require((data.T + (self.data_mean_crop if add_mean else 0)).reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
+    
+    def __trim_borders(self, x, target):
+        y = x.reshape(x.shape[0], 3, self.img_size, self.img_size)
+        
+        if self.test: # don't need to loop over cases
+            if self.multiview:
+                start_positions = [(0,0),  (0, self.border_size*2),
+                                   (self.border_size, self.border_size),
+                                  (self.border_size*2, 0), (self.border_size*2, self.border_size*2)]
+                end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
+                for i in xrange(self.num_views/2):
+                    pic = y[:,:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1]]
+                    target[i * x.shape[0]:(i+1)* x.shape[0],:] = pic.reshape((x.shape[0], self.get_data_dims()))
+                    target[(self.num_views/2 + i) * x.shape[0]:(self.num_views/2 +i+1)* x.shape[0],:] = pic[:,:,:,::-1].reshape((x.shape[0],self.get_data_dims()))
+            else:
+                pic = y[:,:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size] # just take the center for now
+                target[:,:] = pic.reshape((x.shape[0], self.get_data_dims()))
+        else:
+            for c in xrange(0, x.shape[0], self.crop_chunk): # loop over cases in chunks
+                startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
+
+                endY, endX = startY + self.inner_size, startX + self.inner_size
+                c_end = min(c + self.crop_chunk, x.shape[0])
+                pic = y[c:c_end,:,startY:endY,startX:endX]
+                if nr.randint(2) == 0: # also flip the images with 50% probability
+                    pic = pic[:,:,:,::-1]
+                target[c:c_end,:] = pic.reshape((c_end-c, self.get_data_dims()))
+            #target[:] = n.require(target[:,nr.permutation(x.shape[1])], requirements='C')
+    
+class JPEGCroppedFlickrCEDP(JPEGCroppedFlickrDP):
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        JPEGCroppedFlickrDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        
+    def get_labels(self, data):
+        return n.require(n.tile(data['labels'], (self.data_mult, 1)), requirements='C')
+        
+class DummyConvNetCEDP(LabeledDummyDataProvider):
+    def __init__(self, data_dim):
+        LabeledDummyDataProvider.__init__(self, data_dim, num_classes=16, num_cases=16)
+        
+    def get_next_batch(self):
+        epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self)
+        
+        dic['data'] = n.require(dic['data'].T, requirements='F')
+        dic['labels'] = n.zeros((self.get_data_dims(idx=1), dic['data'].shape[1]), dtype=n.float32, order='F')
+        for c in xrange(dic['labels'].shape[1]): # loop over cases
+            r = nr.randint(0, dic['labels'].shape[0])
+            dic['labels'][r,c] = 1
+        
+        return epoch, batchnum, [dic['data'], dic['labels']]
+    
+    # Returns the dimensionality of the two data matrices returned by get_next_batch
+    def get_data_dims(self, idx=0):
+        return self.batch_meta['num_vis'] if idx == 0 else 16
diff --git a/convdata_jpeg.py b/convdata_jpeg.py
new file mode 100755
index 0000000..8f53b1c
--- /dev/null
+++ b/convdata_jpeg.py
@@ -0,0 +1,270 @@
+
+from data import *
+import numpy.random as nr
+import numpy as n
+import random as r
+from time import time
+from threading import Thread
+from math import sqrt
+import sys
+from pylab import *
+from PIL import Image
+from StringIO import StringIO
+from convdata import ImageNetDP
+    
+class JPEGBatchLoaderThread(Thread):
+    def __init__(self, data_dir, path, data_mean, no_crop, label_offset, tgt, list_out):
+        Thread.__init__(self)
+        self.data_dir = data_dir
+        self.path = path
+        self.tgt = tgt
+        self.list_out = list_out
+        self.label_offset = label_offset
+        self.data_mean = data_mean
+        self.no_crop = no_crop
+        #print "loading %d" % self.bnum
+        
+    @staticmethod
+    def load_jpeg_batch((strings, orig_sizes, labels), data_mean, no_crop, label_offset, tgt):
+        lab_arr = n.zeros((len(strings), 1), dtype=n.single)
+        failed = 0
+        img256 = n.zeros((256, 256, 3), dtype=n.uint8) if no_crop else None
+        for k,(s,l) in enumerate(zip(strings, labels)):
+            try:
+                ima = n.asarray(Image.open(StringIO(s)).convert('RGB'))
+                if no_crop:
+                    off_y, off_x = (256 - ima.shape[0]) / 2, (256 - ima.shape[1]) / 2
+                    img256[:] = data_mean
+                    img256[off_y:ima.shape[0]+off_y,off_x:ima.shape[1]+off_x,:] = ima
+                    tgt[k - failed,:] = img256.swapaxes(0,2).swapaxes(1,2).flatten()
+                else:
+                    tgt[k - failed,:] = ima.swapaxes(0,2).swapaxes(1,2).flatten()
+                # For the 2012 test set, the labels will be None
+                lab_arr[k - failed,0] = 0 if l[1] is None else l[1] + label_offset
+            except IOError:
+                failed += 1
+        return {'data': tgt[:len(strings) - failed,:],
+                'labels': lab_arr[:len(strings) - failed,:]}
+    
+    def run(self):
+        p = JPEGBatchLoaderThread.load_jpeg_batch(unpickle(self.path),
+                                                  self.data_mean,
+                                                  self.no_crop,
+                                                  self.label_offset,
+                                                  self.tgt)
+        self.list_out.append(p)
+        
+class ColorNoiseMakerThread(Thread):
+    def __init__(self, pca_stdevs, pca_vecs, num_noise, list_out):
+        Thread.__init__(self)
+        self.pca_stdevs, self.pca_vecs = pca_stdevs, pca_vecs
+        self.num_noise = num_noise
+        self.list_out = list_out
+        
+    def run(self):
+        noise = n.dot(nr.randn(self.num_noise, 3).astype(n.single) * self.pca_stdevs.T, self.pca_vecs.T)
+        self.list_out.append(noise)
+
+class JPEGCroppedImageNetDP(ImageNetDP):
+    def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
+        ImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        self.mini = dp_params['minibatch_size']
+        self.border_size = dp_params['crop_border']
+        self.inner_size = self.img_size - self.border_size*2
+        self.multiview = dp_params['multiview_test'] and test
+        self.num_views = 5*2
+        self.data_mult = self.num_views if self.multiview else 1
+        self.crop_chunk = 32 # This many images will be cropped in the same way
+        self.batch_size = self.batch_meta['batch_size']
+        self.label_offset = 0 if 'label_offset' not in self.batch_meta else self.batch_meta['label_offset']
+        self.no_crop = False if 'no_crop' not in self.batch_meta else self.batch_meta['no_crop']
+        self.scalar_mean = 'scalar_mean' in dp_params and dp_params['scalar_mean'] 
+        # Maintain poitners to previously-returned data matrices so they don't get garbage collected.
+        # I've never seen this happen but it's a safety measure.
+        self.data = [None, None] # These are pointers to previously-returned data matrices
+        # This is where I crop data into
+        self.cropped_data = [n.zeros((0*self.data_mult, self.get_data_dims()), dtype=n.float32) for x in xrange(2)] 
+        # This is where I load data into (jpeg --> uint8)
+        self.orig_data = [n.zeros((self.batch_size, self.img_size**2*3), dtype=n.uint8) for x in xrange(1 if test else 2)] 
+            
+        self.loader_thread, self.color_noise_thread = None, None
+        self.convnet = dp_params['convnet']
+            
+        self.num_noise = self.batch_size
+        self.batches_generated, self.loaders_started = 0, 0
+        self.data_mean_crop = self.data_mean.reshape((3,self.img_size,self.img_size))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((1,3*self.inner_size**2))
+        if self.no_crop or self.scalar_mean:
+            self.data_mean_crop = self.data_mean.mean()
+            
+    def get_data_dims(self, idx=0):
+        if idx == 0:
+            return self.inner_size**2 * 3
+        return 1
+
+    def start_loader(self, batch_idx):
+        self.load_data = []
+        #print "loading %d" % self.batch_range_perm[self.batch_idx]
+        self.loader_thread = JPEGBatchLoaderThread(self.data_dir,
+                                                   self.get_data_file_name(self.batch_range[batch_idx]),
+                                                   self.data_mean_crop,
+                                                   self.no_crop,
+                                                   self.label_offset,
+                                                   self.orig_data[self.loaders_started],
+                                                   self.load_data)
+        self.loader_thread.start()
+        self.loaders_started = (self.loaders_started + 1) % 2
+        
+    def start_color_noise_maker(self):
+        color_noise_list = []
+        self.color_noise_thread = ColorNoiseMakerThread(self.color_stdevs, self.color_eig, self.num_noise, color_noise_list)
+        self.color_noise_thread.start()
+        return color_noise_list
+         
+    def get_labels(self, datadic):
+        pass
+    
+    def get_next_batch(self):
+        self.d_idx = self.batches_generated % 2
+        if self.test:
+            epoch, batchnum, self.data[self.d_idx] = LabeledDataProvider.get_next_batch(self)
+            self.data[self.d_idx] = JPEGBatchLoaderThread.load_jpeg_batch(self.data[self.d_idx],
+                                                                          self.data_mean_crop,
+                                                                          self.no_crop,
+                                                                          self.label_offset,
+                                                                          self.orig_data[0])
+        else:
+            epoch, batchnum = self.curr_epoch, self.curr_batchnum
+
+            if self.loader_thread is None:
+                self.start_loader(self.batch_idx)
+                self.loader_thread.join()
+                self.data[self.d_idx] = self.load_data[0]
+
+                self.start_loader(self.get_next_batch_idx())
+            else:
+                # Set the argument to join to 0 to re-enable batch reuse
+                self.loader_thread.join()
+                if not self.loader_thread.is_alive():
+                    self.data[self.d_idx] = self.load_data[0]
+                    self.start_loader(self.get_next_batch_idx())
+                #else:
+                #    print "Re-using batch"
+            self.advance_batch()
+        
+        cropped = self.get_cropped_data(self.data[self.d_idx])
+        if self.color_noise_coeff > 0 and not self.test:
+            # At this point the data already has 0 mean.
+            # So I'm going to add noise to it, but I'm also going to scale down
+            # the original data. This is so that the overall scale of the training
+            # data doesn't become too different from the test data.
+            s = cropped.shape
+            cropped_size = self.get_data_dims(0) / 3
+            ncases = s[0]
+
+            if self.color_noise_thread is None:
+                self.color_noise_list = self.start_color_noise_maker()
+                self.color_noise_thread.join()
+                self.color_noise = self.color_noise_list[0]
+                self.color_noise_list = self.start_color_noise_maker()
+            else:
+                self.color_noise_thread.join(0)
+                if not self.color_noise_thread.is_alive():
+                    self.color_noise = self.color_noise_list[0]
+                    self.color_noise_list = self.start_color_noise_maker()
+
+            cropped = self.cropped_data[self.d_idx] = cropped.reshape((ncases*3, cropped_size))
+            self.color_noise = self.color_noise[:ncases,:].reshape((3*ncases, 1))
+            cropped += self.color_noise * self.color_noise_coeff
+            cropped = self.cropped_data[self.d_idx] = cropped.reshape((ncases, 3* cropped_size))
+            cropped /= (1.0 + self.color_noise_coeff)
+            
+        self.data[self.d_idx]['labels'] = self.get_labels(self.data[self.d_idx])
+        self.data[self.d_idx]['data'] = cropped
+        self.batches_generated += 1
+
+        if False and not self.test:
+            idx = 111
+            cropped -= cropped.min()
+            cropped /= cropped.max()
+            label = int(self.data[self.d_idx]['labels'][idx,0])
+            print label
+            print self.batch_meta['label_names'][label]
+            print cropped.max(), cropped.min()
+            print self.data[self.d_idx]['labels']
+            self.showimg(cropped[idx,:])
+        
+        # NOTE: It would be good to add some logic here to pad irregularly-sized
+        # batches by duplicating training cases. 
+
+        return epoch, batchnum, [self.data[self.d_idx]['data'].T, self.data[self.d_idx]['labels'].T]
+       
+    def get_cropped_data(self, data):
+        cropped = self.cropped_data[self.d_idx]
+        if cropped.shape[0] != data['data'].shape[0] * self.data_mult:
+            cropped = self.cropped_data[self.d_idx] = n.zeros((data['data'].shape[0] * self.data_mult, cropped.shape[1]), dtype=n.float32)
+        self.__trim_borders(data['data'], cropped)
+
+        return self.subtract_mean(cropped)
+        
+    def subtract_mean(self,data):
+        data -= self.data_mean_crop
+        return data
+        
+    # Takes as input an array returned by get_next_batch
+    # Returns a (numCases, imgSize, imgSize, 3) array which can be
+    # fed to pylab for plotting.
+    # This is used by shownet.py to plot test case predictions.
+    def get_plottable_data(self, data, add_mean=True):
+        mean = self.data_mean_crop if data.flags.f_contiguous or self.scalar_mean else self.data_mean_crop.T
+        return n.require((data + (mean if add_mean else 0)).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
+    
+    def __trim_borders(self, x, target):
+        y = x.reshape(x.shape[0], 3, self.img_size, self.img_size)
+
+        if self.test: # don't need to loop over cases
+            if self.multiview:
+                start_positions = [(0,0),  (0, self.border_size*2),
+                                   (self.border_size, self.border_size),
+                                  (self.border_size*2, 0), (self.border_size*2, self.border_size*2)]
+                end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
+                for i in xrange(self.num_views/2):
+                    pic = y[:,:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1]]
+                    target[i * x.shape[0]:(i+1)* x.shape[0],:] = pic.reshape((x.shape[0], self.get_data_dims()))
+                    target[(self.num_views/2 + i) * x.shape[0]:(self.num_views/2 +i+1)* x.shape[0],:] = pic[:,:,:,::-1].reshape((x.shape[0],self.get_data_dims()))
+            else:
+                pic = y[:,:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size] # just take the center for now
+                target[:,:] = pic.reshape((x.shape[0], self.get_data_dims()))
+        else:
+            for c in xrange(0, x.shape[0], self.crop_chunk): # loop over cases in chunks
+                startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
+
+                endY, endX = startY + self.inner_size, startX + self.inner_size
+                c_end = min(c + self.crop_chunk, x.shape[0])
+                pic = y[c:c_end,:,startY:endY,startX:endX]
+                if nr.randint(2) == 0: # also flip the images with 50% probability
+                    pic = pic[:,:,:,::-1]
+                target[c:c_end,:] = pic.reshape((c_end-c, self.get_data_dims()))
+                
+                # With 5% chance, replace this chunk with the average of this chunk and some future chunk
+                #if c >= self.crop_chunk and nr.rand() < 0.05:
+                    #r = nr.randint(0, c - self.crop_chunk + 1)
+                    #r_end = r + self.crop_chunk
+                    #target[c:c_end,:] = 0.75 * target[c:c_end,:] + 0.25 * target[r:r_end,:]
+                    #print "faded in past batch (%d,%d) to batch (%d,%d)" % (r, r_end, c, c_end)
+            #for c in xrange(0, x.shape[0]-self.crop_chunk, self.crop_chunk): # loop over cases in chunks
+            #    if nr.rand() < 0.05:
+            #        c_end = min(c + self.crop_chunk, x.shape[0])
+            #        r = nr.randint(c, x.shape[0] - self.crop_chunk+1)
+            #        r_end = r + self.crop_chunk
+            #        target[c:c_end,:] = 0.75 * target[c:c_end,:] + 0.25 * target[r:r_end,:]
+                    #print "faded in past batch (%d,%d) to batch (%d,%d)" % (r, r_end, c, c_end)
+                    
+            #target[:] = n.require(target[:,nr.permutation(x.shape[1])], requirements='C')
+    
+class JPEGCroppedImageNetLogRegDP(JPEGCroppedImageNetDP):
+    def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
+        JPEGCroppedImageNetDP.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
+        
+    def get_labels(self, data):
+        return n.require(n.tile(n.array(data['labels'], dtype=n.single).reshape((data['data'].shape[0], 1)), (self.data_mult, 1)), requirements='C')
+        
diff --git a/convnet.py b/convnet.py
new file mode 100755
index 0000000..d515d4d
--- /dev/null
+++ b/convnet.py
@@ -0,0 +1,218 @@
+import numpy as n
+import numpy.random as nr
+import random as r
+from util import *
+from data import *
+from options import *
+from gpumodel import *
+import sys
+import math as m
+import layer as lay
+from convdata import *
+from convdata_jpeg import JPEGCroppedImageNetLogRegDP
+from convdata_flickr import JPEGCroppedFlickrCEDP, DummyConvNetCEDP
+from convdata_cifar import CIFARDataProvider, CroppedCIFARDataProvider
+from os import linesep as NL
+import pylab as pl
+import copy as cp
+
+class ConvNet(IGPUModel):
+    def __init__(self, op, load_dic, dp_params={}):
+        filename_options = []
+        for v in ('color_noise', 'pca_noise', 'multiview_test', 'crop_border', 'scalar_mean', 'minibatch_size'):
+            dp_params[v] = op.get_value(v)
+
+        IGPUModel.__init__(self, "ConvNet", op, load_dic, filename_options, dp_params=dp_params)
+        self.writing_test = False
+        
+    def import_model(self):
+        lib_name = "_ConvNet_k20x" if is_kepler_machine() else "_ConvNet"
+        print "========================="
+        print "Importing %s C++ module" % lib_name
+        self.libmodel = __import__(lib_name) 
+        
+    def init_model_lib(self):
+        self.libmodel.initModel(self.layers, self.device_ids, self.device_cpus, self.minibatch_size, self.wupdate_freq)
+        
+    def init_model_state(self):
+        ms = self.model_state
+        if self.load_file:
+            ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self, ms['layers'])
+        else:    
+            ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self)
+        
+        # Convert convolutional layers to local
+        if len(self.op.get_value('conv_to_local')) > 0:
+            for i, layer in enumerate(ms['layers']):
+                if layer['type'] == 'conv' and layer['name'] in self.op.get_value('conv_to_local'):
+                    lay.LocalLayerParser.conv_to_local(ms['layers'], i)
+        # Decouple weight matrices
+        if len(self.op.get_value('unshare_weights')) > 0:
+            for name_str in self.op.get_value('unshare_weights'):
+                if name_str:
+                    name = lay.WeightLayerParser.get_layer_name(name_str)
+                    if name is not None:
+                        name, idx = name[0], name[1]
+                        if name not in ms['layers']:
+                            raise ModelStateException("Layer '%s' does not exist; unable to unshare" % name)
+                        layer = ms['layers'][name]
+                        lay.WeightLayerParser.unshare_weights(layer, ms['layers'], matrix_idx=idx)
+                    else:
+                        raise ModelStateException("Invalid layer name '%s'; unable to unshare." % name_str)
+        self.op.set_value('conv_to_local', [], parse=False)
+        self.op.set_value('unshare_weights', [], parse=False)
+        self.writing_test = False
+    
+    def get_layer_idx(self, layer_name, check_type=[]):
+        try:
+            layer_idx = [l['name'] for l in self.model_state['layers']].index(layer_name)
+            if check_type:
+                layer_type = self.model_state['layers'][layer_idx]['type']
+                if layer_type not in check_type:
+                    raise ModelStateException("Layer with name '%s' has type '%s'; should be one of %s." % (layer_name, layer_type, ",".join("'%s'" %s for s in check_type)))
+            return layer_idx
+        except ValueError:
+            raise ModelStateException("Layer with name '%s' not defined." % layer_name)
+
+    def fill_excused_options(self):
+        if self.op.get_value('check_grads'):
+            self.op.set_value('save_path', '')
+            self.op.set_value('train_batch_range', '0')
+            self.op.set_value('test_batch_range', '0')
+            self.op.set_value('data_path', '')
+            
+    # Make sure the data provider returned data in proper format
+    def parse_batch_data(self, batch_data, train=True):
+        if max(d.dtype != n.single for d in batch_data[2]):
+            raise DataProviderException("All matrices returned by data provider must consist of single-precision floats.")
+        return batch_data
+
+    def start_batch(self, batch_data, train=True):
+        data = batch_data[2]
+        self.writing_test = False
+        
+        if self.check_grads:
+            self.libmodel.checkGradients(data)
+        elif not train and self.multiview_test:
+            num_views = self.test_data_provider.num_views
+            if self.test_out != "" and self.logreg_name != "":
+                self.writing_test = True
+                self.test_file_name = os.path.join(self.test_out, 'test_preds_%d' % batch_data[1])
+                self.probs = n.zeros((data[0].shape[1]/num_views, self.test_data_provider.get_num_classes()), dtype=n.single)
+                self.libmodel.startMultiviewTest(data, num_views, self.probs, self.logreg_name)
+            else:
+                self.libmodel.startMultiviewTest(data, num_views)
+        else:
+            num_batches_total = self.num_epochs * len(self.train_batch_range)
+            progress = min(1.0, max(0.0, float(self.get_num_batches_done()-1) / num_batches_total))
+            self.libmodel.startBatch(data, progress, not train)
+            
+    def finish_batch(self):
+        ret = IGPUModel.finish_batch(self)
+        if self.writing_test:
+            if not os.path.exists(self.test_out):
+                os.makedirs(self.test_out)
+            pickle(self.test_file_name,  {'data': self.probs,
+                                          'note': 'generated from %s' % self.save_file})
+        return ret
+    
+    def print_iteration(self):
+        print "%d.%d..." % (self.epoch, self.batchnum),
+        
+    def print_train_time(self, compute_time_py):
+        print "(%.3f sec)" % (compute_time_py)
+        
+    def print_costs(self, cost_outputs):
+        costs, num_cases = cost_outputs[0], cost_outputs[1]
+        for errname in costs.keys():
+            costs[errname] = [(v/num_cases) for v in costs[errname]]
+            print "%s: " % errname,
+            print ", ".join("%.6f" % v for v in costs[errname]),
+            if sum(m.isnan(v) for v in costs[errname]) > 0 or sum(m.isinf(v) for v in costs[errname]):
+                print "^ got nan or inf!"
+                sys.exit(1)
+        
+    def print_train_results(self):
+        self.print_costs(self.train_outputs[-1])
+        
+    def print_test_status(self):
+        pass
+        
+    def print_test_results(self):
+        print NL + "======================Test output======================"
+        self.print_costs(self.test_outputs[-1])
+        print NL + "----------------------Averages-------------------------"
+        self.print_costs((self.aggregate_test_outputs(self.test_outputs[-len(self.test_batch_range):])[0], min(len(self.test_outputs), len(self.test_batch_range))))
+        print NL + "-------------------------------------------------------",
+        for name in sorted(self.layers.keys()): # This is kind of hacky but will do for now.
+            l = self.layers[name]
+            if 'weights' in l:
+                if type(l['weights']) == n.ndarray:
+                    print "%sLayer '%s' weights: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['weights'])), n.mean(n.abs(l['weightsInc']))),
+                elif type(l['weights']) == list:
+                    print ""
+                    print NL.join("Layer '%s' weights[%d]: %e [%e]" % (l['name'], i, n.mean(n.abs(w)), n.mean(n.abs(wi))) for i,(w,wi) in enumerate(zip(l['weights'],l['weightsInc']))),
+                print "%sLayer '%s' biases: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['biases'])), n.mean(n.abs(l['biasesInc']))),
+        print ""
+        
+    def conditional_save(self):
+        self.save_state()
+        print "-------------------------------------------------------"
+        print "Saved checkpoint to %s" % os.path.join(self.save_path, self.save_file)
+        print "=======================================================",
+        
+    def aggregate_test_outputs(self, test_outputs):
+        test_outputs = cp.deepcopy(test_outputs)
+        num_cases = sum(t[1] for t in test_outputs)
+        for i in xrange(1 ,len(test_outputs)):
+            for k,v in test_outputs[i][0].items():
+                for j in xrange(len(v)):
+                    test_outputs[0][0][k][j] += test_outputs[i][0][k][j]
+        
+        return (test_outputs[0][0], num_cases)
+    
+    @classmethod
+    def get_options_parser(cls):
+        op = IGPUModel.get_options_parser()
+        op.add_option("mini", "minibatch_size", IntegerOptionParser, "Minibatch size", default=128)
+        op.add_option("layer-def", "layer_def", StringOptionParser, "Layer definition file", set_once=True)
+        op.add_option("layer-params", "layer_params", StringOptionParser, "Layer parameter file")
+        op.add_option("check-grads", "check_grads", BooleanOptionParser, "Check gradients and quit?", default=0, excuses=['data_path','save_path','train_batch_range','test_batch_range'])
+        op.add_option("multiview-test", "multiview_test", BooleanOptionParser, "Cropped DP: test on multiple patches?", default=0)
+        op.add_option("crop-border", "crop_border", IntegerOptionParser, "Cropped DP: crop border size", default=4, set_once=True)
+        op.add_option("conv-to-local", "conv_to_local", ListOptionParser(StringOptionParser), "Convert given conv layers to unshared local", default=[])
+        op.add_option("unshare-weights", "unshare_weights", ListOptionParser(StringOptionParser), "Unshare weight matrices in given layers", default=[])
+        op.add_option("conserve-mem", "conserve_mem", BooleanOptionParser, "Conserve GPU memory (slower)?", default=0)
+        op.add_option("color-noise", "color_noise", FloatOptionParser, "Add PCA noise to color channels with given scale", default=0.0)
+        op.add_option("test-out", "test_out", StringOptionParser, "Output test case predictions to given path", default="", requires=['logreg_name', 'multiview_test'])
+        op.add_option("logreg-name", "logreg_name", StringOptionParser, "Logreg cost layer name (for --test-out)", default="")
+        op.add_option("pca-noise", "pca_noise", FloatOptionParser, "Add PCA noise to pixels with given scale", default=0.0)
+        op.add_option("scalar-mean", "scalar_mean", FloatOptionParser, "Subtract scalar pixel mean (as opposed to vector)?", default=False)
+        op.add_option("wupdate-freq", "wupdate_freq", IntegerOptionParser, "Weight update (inverse) frequency, in minibatches (1 = every minibatch)", default=1)
+        
+        op.delete_option('max_test_err')
+        op.options["max_filesize_mb"].default = 0
+        op.options["testing_freq"].default = 50
+        op.options["num_epochs"].default = 50000
+        op.options['dp_type'].default = None
+
+        DataProvider.register_data_provider('dummy-lr-n', 'Dummy ConvNet logistic regression', DummyConvNetLogRegDP)
+        DataProvider.register_data_provider('inet-lr', 'ImageNet logistic regression', ImageNetLogRegDP)
+        DataProvider.register_data_provider('inet-lr-cropped', 'ImageNet logistic regression cropped', CroppedImageNetLogRegDP)
+        DataProvider.register_data_provider('inet-lr-cropped-jpeg', 'ImageNet logistic regression cropped JPEG', JPEGCroppedImageNetLogRegDP)
+        DataProvider.register_data_provider('inet-rs-lr-cropped', 'Random scale cropped ImageNet logistic regression', RandomScaleImageNetLogRegDP)
+        DataProvider.register_data_provider('flickr-ce-cropped', 'Flickr cross-entropy cropped', JPEGCroppedFlickrCEDP)
+        DataProvider.register_data_provider('dummy-ce-n', 'Dummy cross-entropy', DummyConvNetCEDP)
+        DataProvider.register_data_provider('flatmem', 'Flat memory', FlatMemoryDataProvider)
+        
+        DataProvider.register_data_provider('cifar', 'CIFAR', CIFARDataProvider)
+        DataProvider.register_data_provider('cifar-cropped', 'Cropped CIFAR', CroppedCIFARDataProvider)
+        return op
+    
+if __name__ == "__main__":
+    #nr.seed(5)
+    op = ConvNet.get_options_parser()
+
+    op, load_dic = IGPUModel.parse_options(op)
+    model = ConvNet(op, load_dic)
+    model.start()
diff --git a/deviceQuery.txt b/deviceQuery.txt
new file mode 100644
index 0000000..69bdf42
--- /dev/null
+++ b/deviceQuery.txt
@@ -0,0 +1,143 @@
+/u/kriz/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/deviceQuery Starting...
+
+ CUDA Device Query (Runtime API) version (CUDART static linking)
+
+Found 4 CUDA Capable device(s)
+
+Device 0: "Tesla S2050"
+  CUDA Driver Version / Runtime Version          4.2 / 4.2
+  CUDA Capability Major/Minor version number:    2.0
+  Total amount of global memory:                 3072 MBytes (3220897792 bytes)
+  (14) Multiprocessors x ( 32) CUDA Cores/MP:    448 CUDA Cores
+  GPU Clock rate:                                1147 MHz (1.15 GHz)
+  Memory Clock rate:                             1546 Mhz
+  Memory Bus Width:                              384-bit
+  L2 Cache Size:                                 786432 bytes
+  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
+  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
+  Total amount of constant memory:               65536 bytes
+  Total amount of shared memory per block:       49152 bytes
+  Total number of registers available per block: 32768
+  Warp size:                                     32
+  Maximum number of threads per multiprocessor:  1536
+  Maximum number of threads per block:           1024
+  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
+  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
+  Maximum memory pitch:                          2147483647 bytes
+  Texture alignment:                             512 bytes
+  Concurrent copy and execution:                 Yes with 2 copy engine(s)
+  Run time limit on kernels:                     No
+  Integrated GPU sharing Host Memory:            No
+  Support host page-locked memory mapping:       Yes
+  Concurrent kernel execution:                   Yes
+  Alignment requirement for Surfaces:            Yes
+  Device has ECC support enabled:                No
+  Device is using TCC driver mode:               No
+  Device supports Unified Addressing (UVA):      Yes
+  Device PCI Bus ID / PCI location ID:           7 / 0
+  Compute Mode:
+     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
+
+Device 1: "Tesla S2050"
+  CUDA Driver Version / Runtime Version          4.2 / 4.2
+  CUDA Capability Major/Minor version number:    2.0
+  Total amount of global memory:                 3072 MBytes (3220897792 bytes)
+  (14) Multiprocessors x ( 32) CUDA Cores/MP:    448 CUDA Cores
+  GPU Clock rate:                                1147 MHz (1.15 GHz)
+  Memory Clock rate:                             1546 Mhz
+  Memory Bus Width:                              384-bit
+  L2 Cache Size:                                 786432 bytes
+  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
+  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
+  Total amount of constant memory:               65536 bytes
+  Total amount of shared memory per block:       49152 bytes
+  Total number of registers available per block: 32768
+  Warp size:                                     32
+  Maximum number of threads per multiprocessor:  1536
+  Maximum number of threads per block:           1024
+  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
+  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
+  Maximum memory pitch:                          2147483647 bytes
+  Texture alignment:                             512 bytes
+  Concurrent copy and execution:                 Yes with 2 copy engine(s)
+  Run time limit on kernels:                     No
+  Integrated GPU sharing Host Memory:            No
+  Support host page-locked memory mapping:       Yes
+  Concurrent kernel execution:                   Yes
+  Alignment requirement for Surfaces:            Yes
+  Device has ECC support enabled:                No
+  Device is using TCC driver mode:               No
+  Device supports Unified Addressing (UVA):      Yes
+  Device PCI Bus ID / PCI location ID:           8 / 0
+  Compute Mode:
+     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
+
+Device 2: "Tesla S2050"
+  CUDA Driver Version / Runtime Version          4.2 / 4.2
+  CUDA Capability Major/Minor version number:    2.0
+  Total amount of global memory:                 3072 MBytes (3220897792 bytes)
+  (14) Multiprocessors x ( 32) CUDA Cores/MP:    448 CUDA Cores
+  GPU Clock rate:                                1147 MHz (1.15 GHz)
+  Memory Clock rate:                             1546 Mhz
+  Memory Bus Width:                              384-bit
+  L2 Cache Size:                                 786432 bytes
+  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
+  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
+  Total amount of constant memory:               65536 bytes
+  Total amount of shared memory per block:       49152 bytes
+  Total number of registers available per block: 32768
+  Warp size:                                     32
+  Maximum number of threads per multiprocessor:  1536
+  Maximum number of threads per block:           1024
+  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
+  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
+  Maximum memory pitch:                          2147483647 bytes
+  Texture alignment:                             512 bytes
+  Concurrent copy and execution:                 Yes with 2 copy engine(s)
+  Run time limit on kernels:                     No
+  Integrated GPU sharing Host Memory:            No
+  Support host page-locked memory mapping:       Yes
+  Concurrent kernel execution:                   Yes
+  Alignment requirement for Surfaces:            Yes
+  Device has ECC support enabled:                No
+  Device is using TCC driver mode:               No
+  Device supports Unified Addressing (UVA):      Yes
+  Device PCI Bus ID / PCI location ID:           16 / 0
+  Compute Mode:
+     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
+
+Device 3: "Tesla S2050"
+  CUDA Driver Version / Runtime Version          4.2 / 4.2
+  CUDA Capability Major/Minor version number:    2.0
+  Total amount of global memory:                 3072 MBytes (3220897792 bytes)
+  (14) Multiprocessors x ( 32) CUDA Cores/MP:    448 CUDA Cores
+  GPU Clock rate:                                1147 MHz (1.15 GHz)
+  Memory Clock rate:                             1546 Mhz
+  Memory Bus Width:                              384-bit
+  L2 Cache Size:                                 786432 bytes
+  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
+  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
+  Total amount of constant memory:               65536 bytes
+  Total amount of shared memory per block:       49152 bytes
+  Total number of registers available per block: 32768
+  Warp size:                                     32
+  Maximum number of threads per multiprocessor:  1536
+  Maximum number of threads per block:           1024
+  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
+  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
+  Maximum memory pitch:                          2147483647 bytes
+  Texture alignment:                             512 bytes
+  Concurrent copy and execution:                 Yes with 2 copy engine(s)
+  Run time limit on kernels:                     No
+  Integrated GPU sharing Host Memory:            No
+  Support host page-locked memory mapping:       Yes
+  Concurrent kernel execution:                   Yes
+  Alignment requirement for Surfaces:            Yes
+  Device has ECC support enabled:                No
+  Device is using TCC driver mode:               No
+  Device supports Unified Addressing (UVA):      Yes
+  Device PCI Bus ID / PCI location ID:           17 / 0
+  Compute Mode:
+     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
+
+deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 4, Device = Tesla S2050, Device = Tesla S2050
diff --git a/example-layers/layer-params-18pct.cfg b/example-layers/layer-params-18pct.cfg
new file mode 100644
index 0000000..728081d
--- /dev/null
+++ b/example-layers/layer-params-18pct.cfg
@@ -0,0 +1,35 @@
+# 18% error on CIFAR-10 in 20 minutes - layer definition file 
+
+# Reduce all learning rates by factor of 10 after 120 epochs.
+# Then another factor of 10 after 10 more epochs.
+
+[conv1]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[conv2]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[conv3]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[fc10]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=1
+
+[logprob]
+coeff=1
diff --git a/example-layers/layer-params-19pct.cfg b/example-layers/layer-params-19pct.cfg
new file mode 100644
index 0000000..a9d730b
--- /dev/null
+++ b/example-layers/layer-params-19pct.cfg
@@ -0,0 +1,33 @@
+# 19% error on CIFAR-10 in 20 minutes - layer parameter file 
+# Set wc to 0 for translations -- 14.2%
+
+[conv1]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[conv2]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[conv3]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[fc10]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=3
+
+[logprob]
+coeff=1
diff --git a/example-layers/layer-params-80sec.cfg b/example-layers/layer-params-80sec.cfg
new file mode 100644
index 0000000..241fdb7
--- /dev/null
+++ b/example-layers/layer-params-80sec.cfg
@@ -0,0 +1,39 @@
+# 26% error on CIFAR-10 in 80 seconds - layer parameter file 
+
+[conv1]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[conv2]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[conv3]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[fc64]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=.03
+
+[fc10]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=.03
+
+[logprob]
+coeff=1
diff --git a/example-layers/layer-params-conv-local-12pct.cfg b/example-layers/layer-params-conv-local-12pct.cfg
new file mode 100644
index 0000000..c7e1884
--- /dev/null
+++ b/example-layers/layer-params-conv-local-12pct.cfg
@@ -0,0 +1,40 @@
+# 12% error on CIFAR-10 - layer parameter file 
+# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
+
+[conv1]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.00
+
+[conv2]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.00
+
+[local3]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[local4]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[fc10]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[logprob]
+coeff=1
diff --git a/example-layers/layer-params-conv-local-13pct.cfg b/example-layers/layer-params-conv-local-13pct.cfg
new file mode 100644
index 0000000..0964ec7
--- /dev/null
+++ b/example-layers/layer-params-conv-local-13pct.cfg
@@ -0,0 +1,40 @@
+# 13% error on CIFAR-10 - layer parameter file 
+# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
+
+[conv1]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.00
+
+[conv2]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.00
+
+[local3]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[local4]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[fc10]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+
+[logprob]
+coeff=1
diff --git a/example-layers/layer-params-example.cfg b/example-layers/layer-params-example.cfg
new file mode 100644
index 0000000..0e240a9
--- /dev/null
+++ b/example-layers/layer-params-example.cfg
@@ -0,0 +1,44 @@
+[conv32]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0
+
+[local32]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0
+
+[fc1024]
+momW=0.9
+momB=0.9
+epsW=0.00001
+epsB=0.00002
+wc=0
+
+[conv32-2]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0
+
+[conv32-3]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0
+
+[fc10]
+epsW=0.0001,0.001
+epsB=0.002
+momW=0.5,0.9
+momB=0.9
+wc=0,0
+
+[logprob]
+coeff=1
diff --git a/example-layers/layer-params.gc.cfg b/example-layers/layer-params.gc.cfg
new file mode 100644
index 0000000..a13df15
--- /dev/null
+++ b/example-layers/layer-params.gc.cfg
@@ -0,0 +1,66 @@
+[conv32a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0
+
+[conv32b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0
+
+[conv32c]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0
+
+[fc10]
+wc=0
+momB=0
+momW=0
+epsW=0.00001
+epsB=0.00002
+
+[fc16a]
+wc=0,0,0
+momB=0
+momW=0,0,0
+epsW=0.00001,0.1,0.1
+epsB=0.00002
+
+[fc16b]
+wc=0,0,0
+momB=0
+momW=0,0,0
+epsW=0.00001,0.1,0.1
+epsB=0.00002
+
+[fc16c]
+wc=0,0,0
+momB=0
+momW=0,0,0
+epsW=0.00001,0.1,0.1
+epsB=0.00002
+
+[logreg]
+coeff=1
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=0.25
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=0.25
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+minDiv=0.25
diff --git a/example-layers/layers-18pct.cfg b/example-layers/layers-18pct.cfg
new file mode 100644
index 0000000..582e9ea
--- /dev/null
+++ b/example-layers/layers-18pct.cfg
@@ -0,0 +1,109 @@
+# 18% error on CIFAR-10 in 20 minutes - layer definition file 
+
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=2
+stride=1
+filterSize=5
+neuron=relu
+initW=0.0001
+partialSum=4
+sharedBiases=1
+
+[pool1]
+type=pool
+pool=max
+inputs=conv1
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=32
+
+[rnorm1]
+type=rnorm
+inputs=pool1
+channels=32
+sizeX=3
+scale=0.00005
+pow=.75
+
+[conv2]
+type=conv
+inputs=rnorm1
+filters=32
+padding=2
+stride=1
+filterSize=5
+channels=32
+neuron=relu
+initW=0.01
+partialSum=4
+sharedBiases=1
+
+[pool2]
+type=pool
+pool=avg
+inputs=conv2
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=32
+
+[rnorm2]
+type=rnorm
+inputs=pool2
+channels=32
+sizeX=3
+scale=0.00005
+pow=.75
+
+[conv3]
+type=conv
+inputs=rnorm2
+filters=64
+padding=2
+stride=1
+filterSize=5
+channels=32
+neuron=relu
+initW=0.01
+partialSum=4
+sharedBiases=1
+
+[pool3]
+type=pool
+pool=avg
+inputs=conv3
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[fc10]
+type=fc
+outputs=10
+inputs=pool3
+initW=0.01
+neuron=ident
+
+[probs]
+type=softmax
+inputs=fc10
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
diff --git a/example-layers/layers-19pct.cfg b/example-layers/layers-19pct.cfg
new file mode 100644
index 0000000..ec29ccf
--- /dev/null
+++ b/example-layers/layers-19pct.cfg
@@ -0,0 +1,93 @@
+# 19% error on CIFAR-10 in 20 minutes - layer definition file 
+
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=2
+stride=1
+filterSize=5
+neuron=relu
+initW=0.0001
+partialSum=1
+sharedBiases=1
+
+[pool1]
+type=pool
+pool=max
+inputs=conv1
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=32
+
+[conv2]
+type=conv
+inputs=pool1
+filters=32
+padding=2
+stride=1
+filterSize=5
+channels=32
+neuron=relu
+initW=0.01
+partialSum=1
+sharedBiases=1
+
+[pool2]
+type=pool
+pool=avg
+inputs=conv2
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=32
+
+[conv3]
+type=conv
+inputs=pool2
+filters=64
+padding=2
+stride=1
+filterSize=5
+channels=32
+neuron=relu
+initW=0.01
+partialSum=1
+sharedBiases=1
+
+[pool3]
+type=pool
+pool=avg
+inputs=conv3
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[fc10]
+type=fc
+outputs=10
+inputs=pool3
+initW=0.01
+neuron=ident
+
+[probs]
+type=softmax
+inputs=fc10
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
diff --git a/example-layers/layers-80sec.cfg b/example-layers/layers-80sec.cfg
new file mode 100644
index 0000000..e983b58
--- /dev/null
+++ b/example-layers/layers-80sec.cfg
@@ -0,0 +1,100 @@
+# 26% error on CIFAR-10 in 80 seconds - layer definition file 
+
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=2
+stride=1
+filterSize=5
+neuron=relu
+initW=0.0001
+partialSum=4
+sharedBiases=1
+
+[pool1]
+type=pool
+pool=max
+inputs=conv1
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=32
+
+[conv2]
+type=conv
+inputs=pool1
+filters=32
+padding=2
+stride=1
+filterSize=5
+channels=32
+neuron=relu
+initW=0.01
+partialSum=4
+sharedBiases=1
+
+[pool2]
+type=pool
+pool=avg
+inputs=conv2
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=32
+
+[conv3]
+type=conv
+inputs=pool2
+filters=64
+padding=2
+stride=1
+filterSize=5
+channels=32
+neuron=relu
+initW=0.01
+partialSum=4
+sharedBiases=1
+
+[pool3]
+type=pool
+pool=avg
+inputs=conv3
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[fc64]
+type=fc
+outputs=64
+inputs=pool3
+initW=0.1
+neuron=relu
+
+[fc10]
+type=fc
+outputs=10
+inputs=fc64
+initW=0.1
+neuron=ident
+
+[probs]
+type=softmax
+inputs=fc10
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
diff --git a/example-layers/layers-conv-local-12pct.cfg b/example-layers/layers-conv-local-12pct.cfg
new file mode 100644
index 0000000..1967fff
--- /dev/null
+++ b/example-layers/layers-conv-local-12pct.cfg
@@ -0,0 +1,92 @@
+# 19% error on CIFAR-10 in 20 minutes - layer definition file 
+
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1]
+type=conv
+inputs=data
+channels=3
+filters=64
+padding=2
+stride=1
+filterSize=5
+neuron=relu
+initW=0.0001
+partialSum=4
+sharedBiases=1
+
+[pool1]
+type=pool
+pool=max
+inputs=conv1
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[conv2]
+type=conv
+inputs=pool1
+filters=64
+padding=2
+stride=1
+filterSize=5
+channels=64
+neuron=relu
+initW=0.01
+partialSum=8
+sharedBiases=1
+
+[pool2]
+type=pool
+pool=max
+inputs=conv2
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[local3]
+type=local
+inputs=pool2
+filters=32
+padding=1
+stride=1
+filterSize=3
+channels=64
+neuron=relu
+initW=0.04
+
+[local4]
+type=local
+inputs=local3
+filters=32
+padding=1
+stride=1
+filterSize=3
+channels=32
+neuron=relu
+initW=0.04
+
+[fc10]
+type=fc
+outputs=10
+inputs=local4
+initW=0.01
+neuron=ident
+
+[probs]
+type=softmax
+inputs=fc10
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
diff --git a/example-layers/layers-conv-local-13pct.cfg b/example-layers/layers-conv-local-13pct.cfg
new file mode 100644
index 0000000..df6bcf6
--- /dev/null
+++ b/example-layers/layers-conv-local-13pct.cfg
@@ -0,0 +1,93 @@
+# 13% error on CIFAR-10 in 20 minutes - layer definition file 
+# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
+
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1]
+type=conv
+inputs=data
+channels=3
+filters=64
+padding=2
+stride=1
+filterSize=5
+neuron=relu
+initW=0.0001
+partialSum=4
+sharedBiases=1
+
+[pool1]
+type=pool
+pool=max
+inputs=conv1
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[conv2]
+type=conv
+inputs=pool1
+filters=64
+padding=2
+stride=1
+filterSize=5
+channels=64
+neuron=relu
+initW=0.01
+partialSum=8
+sharedBiases=1
+
+[pool2]
+type=pool
+pool=max
+inputs=conv2
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[local3]
+type=local
+inputs=pool2
+filters=32
+padding=1
+stride=1
+filterSize=3
+channels=64
+neuron=relu
+initW=0.04
+
+[local4]
+type=local
+inputs=local3
+filters=32
+padding=1
+stride=1
+filterSize=3
+channels=32
+neuron=relu
+initW=0.04
+
+[fc10]
+type=fc
+outputs=10
+inputs=local4
+initW=0.01
+neuron=ident
+
+[probs]
+type=softmax
+inputs=fc10
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
diff --git a/example-layers/layers-example.cfg b/example-layers/layers-example.cfg
new file mode 100644
index 0000000..e8a1b14
--- /dev/null
+++ b/example-layers/layers-example.cfg
@@ -0,0 +1,115 @@
+# This is a layer configuration file that contains all the 
+# layer types supported by this code. It's not actually good for anything
+# other than demonstrating how layers are specified and connected to one another.
+
+# Note: this file has gotten so big that the resultant net will not run on anything short of a 3GB GTX 580.
+# But there's no particular reason to run the net specified by this file. It's not actually good.
+
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv32]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=4
+stride=1
+filterSize=9
+neuron=logistic
+initW=0.00001
+partialSum=1
+sharedBiases=true
+
+[local32]
+type=local
+inputs=conv32
+channels=32
+filters=32
+padding=4
+stride=1
+filterSize=9
+neuron=logistic
+initW=0.00001
+
+[fc1024]
+type=fc
+outputs=1024
+inputs=data
+initW=0.001
+neuron=relu
+
+[maxpool]
+type=pool
+pool=max
+inputs=local32
+start=0
+sizeX=4
+stride=2
+outputsX=0
+channels=32
+
+[rnorm1]
+type=rnorm
+inputs=maxpool
+channels=32
+sizeX=5
+scale=0.0000125
+pow=0.75
+
+[cnorm1]
+type=cnorm
+inputs=rnorm1
+channels=32
+sizeX=7
+scale=0.001
+pow=0.5
+
+[conv32-2]
+type=conv
+inputs=cnorm1
+groups=4
+channels=32
+filters=32
+padding=2
+stride=1
+filterSize=5
+neuron=relu
+initW=0.0001
+partialSum=1
+sharedBiases=false
+
+[conv32-3]
+type=conv
+inputs=conv32-2
+groups=4
+channels=128
+filters=32
+padding=2
+stride=2
+filterSize=5
+neuron=relu
+initW=0.0001
+partialSum=1
+randSparse=true
+filterChannels=64
+
+[fc10]
+type=fc
+outputs=10
+inputs=conv32-3,fc1024
+initW=0.0001,0.0001
+neuron=ident
+
+[probs]
+type=softmax
+inputs=fc10
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
diff --git a/example-layers/layers.gc.cfg b/example-layers/layers.gc.cfg
new file mode 100644
index 0000000..f5a0c9e
--- /dev/null
+++ b/example-layers/layers.gc.cfg
@@ -0,0 +1,112 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv32a]
+type=conv
+inputs=data
+filters=16
+padding=0
+stride=1
+filterSize=3
+channels=3
+neuron=relu
+initW=0.3
+initB=1
+partialSum=1
+sharedBiases=true
+gpu=0
+
+[conv32b]
+type=conv
+inputs=data
+filters=16
+padding=0
+stride=1
+filterSize=3
+channels=3
+neuron=relu
+initW=0.3
+initB=1
+partialSum=1
+sharedBiases=true
+gpu=1
+
+[conv32c]
+type=conv
+inputs=data
+filters=16
+padding=0
+stride=1
+filterSize=3
+channels=3
+neuron=relu
+initW=0.3
+initB=1
+partialSum=1
+sharedBiases=true
+gpu=2
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv32a
+channels=16
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv32b
+channels=16
+size=5
+
+[rnorm1c]
+type=cmrnorm
+inputs=conv32c
+channels=16
+size=5
+
+[fc16a]
+type=fc
+outputs=16
+inputs=rnorm1a,rnorm1b,rnorm1c
+initW=0.1,0.1,0.1
+gpu=0
+
+[fc16b]
+type=fc
+outputs=16
+inputs=rnorm1b,rnorm1c,rnorm1a
+initW=0.1,0.1,0.1
+gpu=1
+
+[fc16c]
+type=fc
+outputs=16
+inputs=rnorm1c,rnorm1a,rnorm1a
+initW=0.1,0.1,0.1
+gpu=2
+
+[concat]
+type=concat
+inputs=fc16a,fc16c,fc16b
+
+[fc10]
+type=fc
+inputs=concat
+outputs=10
+initW=0.08
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc10
+gpu=0
+
+[logreg]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/findsimilar.py b/findsimilar.py
new file mode 100755
index 0000000..a97e46a
--- /dev/null
+++ b/findsimilar.py
@@ -0,0 +1,78 @@
+import os
+import sys
+from getopt import getopt
+import numpy as n
+import numpy.random as nr
+from time import time
+from util import *
+import pylab as pl
+import gc
+
+imnet_dir = '/storage2/imnet-contest'
+ftr_dir = '/storage2/imnet-features-4096'
+
+TEST_IMGS = 128
+TOP_IMGS = 16
+TEST_BATCH = 'data_batch_3000'
+
+IMG_SIZE = 256
+IMGS_PER_FIGURE = 16
+
+def draw_fig(test_imgs, tops):
+    for f in xrange(TEST_IMGS/IMGS_PER_FIGURE):
+        
+        pl.figure(f+1, figsize=(15,15))
+        pl.clf()
+        bigpic = n.zeros((3, (IMG_SIZE+1)*IMGS_PER_FIGURE - 1, (IMG_SIZE+1)*(1+TOP_IMGS) + 3), dtype=n.single)
+        for i in xrange(IMGS_PER_FIGURE):
+            img_idx = f * IMGS_PER_FIGURE + i
+            bigpic[:, (IMG_SIZE+1) * i:(IMG_SIZE+1)*i+IMG_SIZE,:IMG_SIZE] = test_imgs[:,img_idx].reshape(3, IMG_SIZE, IMG_SIZE)
+            for j in xrange(TOP_IMGS):
+                if tops[img_idx][j]['img'] is not None:
+                    bigpic[:, (IMG_SIZE+1) * i:(IMG_SIZE+1)*i+IMG_SIZE,IMG_SIZE + 4 + j*(IMG_SIZE+1):IMG_SIZE + 4 + j*(IMG_SIZE+1)+IMG_SIZE] = tops[img_idx][j]['img'].reshape(3, IMG_SIZE, IMG_SIZE)
+        bigpic /= 255
+        pl.imshow(bigpic.swapaxes(0,1).swapaxes(1,2), interpolation='lanczos')
+
+if __name__ == "__main__":
+    (options, args) = getopt(sys.argv[1:], "")
+    options = dict(options)
+    
+    # Take 128 images from test batch
+    dic = unpickle(os.path.join(ftr_dir, TEST_BATCH))
+    p = nr.permutation(dic['data'].shape[0])[:TEST_IMGS]
+    data = dic['data'][p,:]
+    labels = dic['labels'][:,p]
+    dicimgs = unpickle(os.path.join(imnet_dir, TEST_BATCH))
+    test_imgs = dicimgs['data'][:,p]
+    
+    tops = [[{'dist': n.inf, 'batch': 0, 'idx': 0, 'img': None} for i in xrange(TOP_IMGS)] for j in xrange(TEST_IMGS)]
+    
+    pl.ion()
+    for b in xrange(1, 1335):
+        dic = unpickle(os.path.join(ftr_dir, 'data_batch_%d' % b))
+        dicimgs = unpickle(os.path.join(imnet_dir, 'data_batch_%d' % b))
+        t = time()
+        dists = [n.sum((data[i,:] - dic['data'])**2, axis=1) for i in xrange(TEST_IMGS)]
+        minidx = [d.argmin() for d in dists]
+        print dists[0].shape
+        for i, dist, midx, top in zip(xrange(TEST_IMGS), dists, minidx, tops):
+            k = TOP_IMGS
+            while k > 0 and dist[midx] < top[k - 1]['dist']:
+                k -= 1
+            if k < TOP_IMGS:
+                top.insert(k, {'dist': dist[midx], 'batch': b, 'idx': midx, 'img': dicimgs['data'][:,midx].copy()})
+                top.pop()
+            #print top
+        del dic
+        del dicimgs
+        del dists
+        del minidx
+        gc.collect()
+        #print tops
+        print "Finished training batch %d (%f sec)" % (b, time() - t)
+        if b % 50 == 0:
+            draw_fig(test_imgs, tops)
+            pl.draw()
+    pl.ioff()
+    draw_fig(test_imgs, tops)
+    pl.show()
diff --git a/fix-big-imgnet.py b/fix-big-imgnet.py
new file mode 100755
index 0000000..c530e51
--- /dev/null
+++ b/fix-big-imgnet.py
@@ -0,0 +1,40 @@
+import os
+import sys
+from PIL import Image
+from StringIO import StringIO
+from util import *
+
+src = '/ais/gobi3/u/ilya/jpg_valid_2010_85/'
+dst = '/ais/gobi3/u/kriz/lsvrc-2010-jpg/'
+
+BATCH_SIZE = 1024
+
+def save_batch(c_strings, c_labels, c_wnids, out_b):
+    pickle(os.path.join(dst, 'data_batch_%d' % out_b), (c_strings, c_labels, c_wnids))
+
+    return out_b + 1
+if __name__ == "__main__":
+    c_strings = []
+    c_labels = []
+    c_wnids = []
+    out_b = 2000
+    for b in xrange(49):
+        failed = 0
+        strings, sizes, labels = unpickle(os.path.join(src, '%s' % b))
+        for s,l in zip(strings, labels):
+            try:
+                im = Image.open(StringIO(s)).convert('RGB')
+                c_strings += [s]
+                c_labels += [l[1]]
+                c_wnids += [l[0]]
+                if len(c_strings) == BATCH_SIZE:
+                    out_b = save_batch(c_strings, c_labels, c_wnids, out_b)
+                    c_strings = []
+                    c_labels = []
+                    c_wnids = []
+            except IOError,e:
+                failed += 1
+        print "Batch %d failed: %d" % (b, failed)
+            
+    if len(c_strings) > 0:
+        save_batch(c_strings, c_labels, c_wnids, out_b)
diff --git a/fix-flickr.py b/fix-flickr.py
new file mode 100755
index 0000000..8354425
--- /dev/null
+++ b/fix-flickr.py
@@ -0,0 +1,41 @@
+import os
+import sys
+from PIL import Image
+from StringIO import StringIO
+from util import *
+
+src = '/ais/gobi3/u/ilya/flickr_85/'
+dst = '/ais/gobi3/u/kriz/flickr-85-1024/'
+
+BATCH_SIZE = 2048
+
+def save_batch(c_strings, c_sizes, c_labels, out_b):
+    pickle(os.path.join(dst, 'data_batch_%d' % out_b), (c_strings, c_sizes, c_labels))
+
+    return out_b + 1
+if __name__ == "__main__":
+    c_strings = []
+    c_sizes = []
+    c_labels = []
+    out_b = 1
+    for b in xrange(977):
+        failed = 0
+        strings, sizes, labels = unpickle(os.path.join(src, '%s' % b))
+        for s,z,l in zip(strings, sizes, labels):
+            try:
+                im = Image.open(StringIO(s)).convert('RGB')
+                c_strings += [s]
+                c_sizes += [z]
+                c_labels += [l]
+                
+                if len(c_strings) == BATCH_SIZE:
+                    out_b = save_batch(c_strings, c_sizes, c_labels, out_b)
+                    c_strings = []
+                    c_sizes = []
+                    c_labels = []
+            except IOError,e:
+                failed += 1
+        print "Batch %d failed: %d" % (b, failed)
+            
+    if len(c_strings) > 0:
+        save_batch(c_strings, c_sizes, c_labels, out_b)
diff --git a/gen-py-interface.py b/gen-py-interface.py
new file mode 100755
index 0000000..f6407e3
--- /dev/null
+++ b/gen-py-interface.py
@@ -0,0 +1,65 @@
+import sys
+import re
+import os
+
+MODEL_CONSTRUCTOR = """ConvNet::ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID)"""
+                   
+pytype_mappings = {"float": "",
+                   "int": "",
+                   "bool":"",
+                   "PyListObject": "PyList_Type"}
+argstring_mappings = {"float": "d",
+                      "bool":"i",
+                      "int": "i"}
+init_type_mappings = {"float": "double",
+                      "int": "int",
+                      "bool":"int",
+                      "PyListObject": "PyListObject*"}
+                      
+if __name__ == "__main__":
+    m = re.match(r"^(\w+)::\w+\((.*)\)$", MODEL_CONSTRUCTOR, re.MULTILINE | re.DOTALL)
+    model_name = m.group(1)
+    model_params = m.group(2).split(',')
+    
+    template = ""
+    with open('./pyInterface.cutemp', 'r') as f:
+        template = ''.join(line for line in f)
+    template = template.replace("${MODEL_NAME}", model_name)
+    template = template.replace("${MODEL_NAME_LOWER}", model_name.lower())
+    
+    init_vars = ""
+    init_parse = ""
+    arg_string = ""
+    model_preamble = ""
+    model_start = "    model = new %s(" % model_name
+    space_padding = len(model_start)
+    numVectors = 0
+    for i,p in enumerate(model_params):
+        param = p.strip().split(' ')
+        ptype = re.match("^([\w<>\*]+)", param[0]).group(1).strip('*')
+        pname = param[1].strip('*')
+        pname = "py" + pname[0].upper() + pname[1:]
+        if ptype not in pytype_mappings:
+            print "Unknown type: %s" % ptype
+            sys.exit(1)
+        mapping = pytype_mappings[ptype]
+        if mapping == "":
+            arg_string += argstring_mappings[ptype]
+            init_parse += "                          &%s" % pname
+        else:
+            arg_string += "O!"
+            init_parse += "                          &%s, &%s" % (mapping, pname)
+
+        model_start += "%*s%s" % (space_padding * (i>0), "", pname)
+            
+        if i < len(model_params) - 1:
+            init_parse += ",\n"
+            model_start += ",\n"
+        init_vars += "    %s %s;\n" % (init_type_mappings[ptype], pname)
+    model_start += ");\n"
+    template = template.replace("${INIT_VARS}", init_vars)
+    template = template.replace("${INIT_PARSE}", init_parse)   
+    template = template.replace("${ARG_STRING}", arg_string)   
+    template = template.replace("${MODEL_START}", model_preamble + model_start)
+
+    print template
diff --git a/include/convnet.cuh b/include/convnet.cuh
new file mode 100644
index 0000000..5e9c96c
--- /dev/null
+++ b/include/convnet.cuh
@@ -0,0 +1,163 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CONVNET3
+#define	CONVNET3
+
+#include <vector>
+#include <string>
+#include <set>
+#include <map>
+#include <helper_cuda.h>
+#include <time.h>
+#include <queue.h>
+#include <thread.h>
+#include <math.h>
+#include <sync.h>
+#include <quantizer.cuh>
+#include <messages.cuh>
+#include <pipedispenser.cuh>
+
+#include "layer.cuh"
+#include "data.cuh"
+#include "worker.cuh"
+#include "weights.cuh"
+#include "hostmem.cuh"
+
+class Worker;
+class WorkResult;
+class Layer;
+class DataLayer;
+class CostLayer;
+class ConvNetGPU;
+
+class ConvNet : public Thread {
+protected:
+    std::map<std::string,Layer*> _layerMap;
+    std::vector<DataLayer*> _dataLayers;
+    std::vector<ConvNetGPU*> _convNetThreads; // List of convnet threads
+    DataProvider* _dp;
+    CPUData* _data;
+    ThreadSynchronizer* _sync;
+    PipeDispenser* _pd;
+    intv* _deviceIDs;
+    std::vector<intv*>* _deviceCPUs;
+    
+    Queue<Worker*> _workerQueue;
+    Queue<WorkResult*> _resultQueue;
+    Queue<Message*> _msgQueue;
+    
+    int _numFwdTerminal, _numBwdTerminal;
+    int _weightUpdateFreq, _numBwdMiniPasses;
+    // For gradient checking
+    int _numFailures;
+    int _numTests;
+    // Training progress (between 0 and 1).
+    // Used to determine learning rate based on LearningRateSchedule.
+    double _trainingProgress;
+    double _baseErr;
+    
+    void waitForTerminals(int numMsgs, MESSAGES msg);
+    void sendMessage(MESSAGES msg, bool sync);
+    void findBwdTerminal(Layer& l, std::set<std::string>& visited, std::set<std::string> &terminal);
+    void* run();
+public:
+    ConvNet(PyObject* layerParams, intv& deviceIDs, std::vector<intv*>& deviceCPUs, int minibatchSize, int weightUpdateFreq);
+    
+    Queue<Message*>& getMessageQueue();
+    Queue<Worker*>& getWorkerQueue();
+    Queue<WorkResult*>& getResultQueue();
+    DataProvider& getDataProvider();
+    
+    Layer& operator[](string& name);
+    Layer& getLayer(string& name);
+    void copyToCPU();
+    void copyToGPU();
+    void updateWeights();
+    void reset();
+    
+    void bprop(PASS_TYPE passType);
+    void fprop(PASS_TYPE passType);
+    void fprop(int miniIdx, PASS_TYPE passType);
+    void fprop(CPUData& data, PASS_TYPE passType);
+
+    void setTrainingProgress(double progress);
+    double getTrainingProgress() const;
+
+    bool checkGradient(const std::string& name, float eps, Weights& weights); 
+    void checkGradients();
+    Cost& getCost();
+    Cost& getCost(Cost& cost);
+    double getCostValue();
+    int getDeviceID(int gpuIdx);
+    intv& getDeviceIDs();
+    ThreadSynchronizer& getSync();
+    void syncWithChildren();
+    int getWeightUpdateFreq();
+    int getNumBwdMiniPasses();
+    int getMinibatchSize();
+    PipeDispenser& getPipeDispenser();
+};
+
+class ConvNetGPU : public Thread {
+protected:
+    std::map<std::string,Layer*> _layerMap;
+    std::vector<CostLayer*> _costs;
+    ConvNet* _convNet;
+    int _deviceID;
+    Queue<Message*> _msgQueue;
+    
+    void initCuda();
+    virtual void initLayer(PyObject* paramsDict);
+    void* run();    
+    
+    void copyToCPU();
+    void copyToGPU();
+    void updateWeights();
+    void reset();
+public:
+    ConvNetGPU(PyObject* layerList, int deviceID, intv& deviceCPUs, ConvNet* convNet);
+    
+    std::map<std::string, Layer*>& getLayerMap();
+    
+    void bprop(PASS_TYPE passType);
+    void fprop(PASS_TYPE passType);
+    void fprop(int miniIdx, PASS_TYPE passType);
+    int getDeviceID();
+    
+    ConvNet& getConvNet();
+    
+    void enqueueMessage(Message* msg);
+    Queue<Message*>& getMessageQueue();
+    std::vector<CostLayer*>& getCostLayers();
+    
+    Cost& getCost(int numCases);
+    Layer& operator[](string& name);
+    Layer& getLayer(string& name);
+};
+
+#endif	/* CONVNET */
+
diff --git a/include/cost.cuh b/include/cost.cuh
new file mode 100644
index 0000000..b6eae9e
--- /dev/null
+++ b/include/cost.cuh
@@ -0,0 +1,66 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef COST_CUH
+#define	COST_CUH
+
+#include <vector>
+#include <map>
+#include <helper_cuda.h>
+
+#include "layer.cuh"
+#include "util.cuh"
+
+class CostLayer;
+
+/*
+ * Wrapper for dictionary mapping cost name to vector of returned values.
+ */
+class Cost {
+private:
+    int _numCases;
+    CostMap _costMap;
+    CostCoeffMap _costCoeffMap;
+public:
+    Cost(int numCases);
+    Cost(int numCases, std::vector<CostLayer*>& costs);
+    doublev& operator [](const std::string s);
+    CostMap& getCostMap();
+    CostCoeffMap& getCostCoeffMap();
+    int getNumCases();
+    /*
+     * Returns sum of first values returned by all the costs, weighted by the cost coefficients.
+     */
+    double getValue();
+    Cost& operator += (Cost& er);
+    Cost& operator |= (Cost& er);
+    Cost& operator /= (const double v);
+    virtual ~Cost();
+};
+
+
+#endif	/* COST_CUH */
+
diff --git a/include/cpuCNN.cuh b/include/cpuCNN.cuh
new file mode 100644
index 0000000..c01c6ed
--- /dev/null
+++ b/include/cpuCNN.cuh
@@ -0,0 +1,31 @@
+/* 
+ * File:   cpuFuncs.h
+ * Author: Alex Krizhevsky
+ *
+ * Created on September 10, 2012, 5:05 PM
+ */
+
+#ifndef CPUFUNCS_H
+#define	CPUFUNCS_H
+#include <helper_cuda.h>
+#include <softmaxtree.cuh>
+/*
+ * weights: (numNodes, numFeatures)
+ * nodes:   numNodesAtDepth-length array of ushort2 
+ *          where x coordinate gives node idx and y coordinate gives parent idx
+ * targets: (numNodes, numFeatures)
+ * 
+ */
+void cpuSoftmaxTreeFwd(float* weights, float* targets, const int numFeatures, SoftmaxTree& tree);
+
+/*
+ * grads:   (numNodes, numFeatures)
+ * 
+ */
+void cpuSoftmaxTreeBwd(float* grads, const int numFeatures, SoftmaxTree& tree);
+
+void cpuSoftmaxTreeUpdateWeights(float* weights, float* weightsInc, float* weightsGrad,
+                                 const int numFeatures, float eps, const float mom, float wc, SoftmaxTree& tree);
+
+#endif	/* CPUFUNCS_H */
+
diff --git a/include/data.cuh b/include/data.cuh
new file mode 100644
index 0000000..a87b981
--- /dev/null
+++ b/include/data.cuh
@@ -0,0 +1,111 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DATA_CUH
+#define	DATA_CUH
+
+#include <vector>
+#include <algorithm>
+#include "util.cuh"
+
+class Data {
+protected:
+    MatrixV* _data;
+    void assertDimensions() {
+        assert(_data->size() > 0);
+        for (int i = 1; i < _data->size(); i++) {
+            assert(_data->at(i-1)->getNumCols() == _data->at(i)->getNumCols());
+            assert(_data->at(i-1)->isTrans() == _data->at(i)->isTrans());
+        }
+        assert(_data->at(0)->getNumCols() > 0);
+    }
+public:
+    typedef typename MatrixV::iterator T_iter;
+    // Cases in columns, but array may be transposed
+    // (so in memory they can really be in rows -- in which case the array is transposed
+    //  during the copy to GPU).
+    Data(PyObject* pyData) {
+        _data = getMatrixV(pyData);
+        assertDimensions();
+    }
+    
+    Data(MatrixV* data) : _data(data) {
+        assertDimensions();
+    }
+
+    ~Data() {
+        for (T_iter it = _data->begin(); it != _data->end(); ++it) {
+            delete *it;
+        }
+        delete _data;
+    }
+    
+    Matrix& operator [](int idx) const {
+        return *_data->at(idx);
+    }
+    
+    int getSize() const {
+        return _data->size();
+    }
+    
+    MatrixV& getData() const {
+        return *_data;
+    }
+    
+    Matrix& getData(int i) const {
+        return *_data->at(i);
+    }
+    
+    bool isTrans() const {
+        return _data->at(0)->isTrans();
+    }
+
+    int getNumCases() const {
+        return _data->at(0)->getNumCols();
+    }
+};
+
+typedef Data CPUData;
+
+class DataProvider {
+protected:
+    CPUData* _hData;
+    NVMatrixV _data;
+    int _minibatchSize;
+public:
+    DataProvider(int minibatchSize);
+    void setData(CPUData&);
+    void clearData();
+    CPUData& getMinibatch(int idx);
+    CPUData& getDataSlice(int startCase, int endCase);
+    int getNumMinibatches();
+    int getMinibatchSize();
+    int getNumCases();
+    int getNumCasesInMinibatch(int idx);
+};
+
+#endif	/* DATA_CUH */
+
diff --git a/include/hostmem.cuh b/include/hostmem.cuh
new file mode 100644
index 0000000..986867a
--- /dev/null
+++ b/include/hostmem.cuh
@@ -0,0 +1,51 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HOSTMEM_CUH
+#define	HOSTMEM_CUH
+
+#include <helper_cuda.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+/*
+ * A utility class for transferring untyped memory from CPU to GPU and vice versa.
+ */
+class PinnedHostMem {
+protected:
+    uint _numBytes;
+    void* _data;
+public:
+    PinnedHostMem();
+    ~PinnedHostMem();
+    void resize(uint bytes);
+    void copyFrom(void* src, uint bytes);
+    void copyTo(void* dst);
+    void* getData();
+};
+
+#endif	/* HOSTMEM_CUH */
+
diff --git a/include/layer.cuh b/include/layer.cuh
new file mode 100644
index 0000000..e3a00e2
--- /dev/null
+++ b/include/layer.cuh
@@ -0,0 +1,654 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LAYER_CUH
+#define	LAYER_CUH
+
+#include <algorithm>
+#include <string>
+#include <vector>
+#include <map>
+#include <assert.h>
+#include <nvmatrix.cuh>
+#include <multisoftmax.h>
+#include <helper_timer.h>
+
+#include "convnet.cuh"
+#include "cost.cuh"
+#include "weights.cuh"
+#include "neuron.cuh"
+#include "data.cuh"
+#include "layer_kernels.cuh"
+#include "hostmem.cuh"
+#include "softmaxtree.cuh"
+#include "pipedispenser.cuh"
+
+class Cost;
+class ConvNet;
+class ConvNetGPU;
+class CostLayer;
+class DataLayer;
+//class Message;
+//class FpropMessage;
+
+// The input matrix here is the squared norm.
+// This replaces the squared norm with:
+// 1 if it is below the threshold given by norm2
+// norm/sqrt(a) otherwise -- i.e. the desired norm (not squared)
+class WeightConstraintOperator {
+private:
+    float _norm, _norm2;
+public:
+    WeightConstraintOperator(float norm) : _norm(norm), _norm2(norm*norm) {
+    }
+    __device__ inline float operator()(const float a) const {
+        return a > _norm2 ? __fdividef(_norm, sqrtf(a)) : 1.0f;
+    }
+};
+
+class WeightContrastNormOperator {
+private:
+    float _min, _max, _scale;
+public:
+    WeightContrastNormOperator(float min, float max, float scale) : _min(min), _max(max), _scale(scale) {
+    }
+    __device__ inline float operator()(float a) const {
+        a = sqrtf(a) * _scale;
+        return a < _min ? __fdividef(_min, a) : a > _max ? __fdividef(_max, a) : 1.0f;
+    }
+};
+
+/*
+ * Abstract layer.
+ */
+class Layer {
+protected:
+    ConvNetGPU* _convNetGPU;
+    std::vector<Layer*> _prev, _next;
+    int _rcvdFInputs;
+    std::map<int, int> _rcvdBInputs;
+    int _rcvdBInputMsgs;
+    int _numOutputs;
+    NVMatrixV _inputs;
+    std::map<int, NVMatrix*> _outputs;
+    std::map<int, NVMatrix*> _actsGrad; // Layer activity gradients
+    bool _gradConsumer, _foundGradConsumers, _trans;
+    bool _conserveMem;
+    bool _bwdTerminal;
+    int _numGradProducersNext;
+    int _actsTarget, _actsGradTarget;
+    std::string _name, _type;
+    int _deviceID;
+    intv _nextDeviceIDs;
+    HostNVMatrix _hostMemFwd, _hostMemBwd;
+    Quantizer* _fwdQuantizer, *_bwdQuantizer;
+    
+    virtual void fpropNext(PASS_TYPE passType);
+    virtual void truncBwdActs(); 
+    virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) = 0;
+    
+    virtual void bpropCommon(NVMatrix& v, PASS_TYPE passType) {
+        // Do nothing by default
+    }
+    virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+        assert(!isGradProducer()); // Only do nothing if not grad producer
+    }
+    void shuffle(intv& v);
+public:
+    static bool _saveActsGrad, _saveActs;
+    
+    Layer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans);
+    
+    virtual void fprop(PASS_TYPE passType);
+    void fprop(NVMatrix& v, PASS_TYPE passType);
+    virtual void fprop(NVMatrixV& v, PASS_TYPE passType);
+    virtual void bprop(PASS_TYPE passType);
+    virtual void bprop(NVMatrix& v, PASS_TYPE passType);
+    virtual void reset();
+    int getNumCases(NVMatrix& v);
+    int incRcvdBInputs(int deviceID);
+    int getRcvdFInputs();
+    int getRcvdBInputs(int deviceID);
+    int incRcvdBInputMsgs();
+    bool isGradConsumer();
+    bool hasGradProducerNext(std::string& layerName);
+    // Does this layer produce a gradient for any layer?
+    virtual bool isGradProducer();
+    // Does this layer produce a gradient for layer of given name?
+    virtual bool isGradProducer(std::string& layerName);
+    std::string& getName();
+    std::string& getType();
+    void addNext(Layer* l);
+    void addPrev(Layer* l);
+    std::vector<Layer*>& getPrev();
+    std::vector<Layer*>& getNext();
+    virtual NVMatrix& getActs();
+    virtual NVMatrix& getActs(int deviceID);
+    virtual NVMatrix& getActsGrad(int deviceID);
+    virtual NVMatrix& getActsGrad();
+    virtual void postInit();
+    int getDeviceID();
+    ConvNetGPU& getConvNetGPU();
+    ConvNet& getConvNet();
+    PipeDispenser& getPipeDispenser();
+    void setBwdTerminal(bool t);
+    // Do nothing if this layer has no weights
+    virtual bool updateWeights() {
+        return false;
+    }
+    virtual void checkGradients() {
+    }
+    virtual void copyToCPU() {
+    }
+    virtual void copyToGPU()  {
+    }
+};
+
+class NeuronLayer : public Layer {
+protected:
+    Neuron* _neuron;
+    string _neuronType;
+    
+    virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    NeuronLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    std::string& getNeuronType();
+};
+
+class WeightLayer : public Layer {
+protected:
+    WeightList _weights;
+    Weights *_biases;
+    float _wStep, _bStep;
+    bool _gradComputed;
+    
+    void bpropCommon(NVMatrix& v, PASS_TYPE passType);
+    virtual void bpropBiases(NVMatrix& v, PASS_TYPE passType) = 0;
+    virtual void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) = 0;
+    virtual void constrainWeights() = 0;
+public:
+    WeightLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans, bool useGrad, bool initWeights);
+    virtual bool updateWeights();
+    virtual void copyToCPU();
+    virtual void copyToGPU();
+    virtual void checkGradients();
+    Weights& getWeights(int idx);
+};
+
+class FCLayer : public WeightLayer {
+protected:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropBiases(NVMatrix& v, PASS_TYPE passType);
+    void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
+    virtual void constrainWeights();
+public:
+    FCLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool useGrad, bool initWeights);
+    FCLayer();
+};
+
+class TreeFCLayer : public FCLayer {
+protected:
+    TreeWeights* _treeWeights;
+    static void makeTree(PyObject* pyTree, SoftmaxNode& rootNode);
+    void constrainWeights();
+public:
+    TreeFCLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void checkGradients();
+};
+
+class SoftmaxLayer : public Layer {
+protected:
+    bool _doLogregGrad;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    SoftmaxLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    void setDoLogregGrad(bool b);
+};
+
+class ConcatenationLayer : public Layer {
+protected:
+    intv* _copyOffsets;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    ConcatenationLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    void setDoLogregGrad(bool b);
+};
+
+class EltwiseSumLayer : public Layer {
+protected:
+    floatv* _coeffs;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    EltwiseSumLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class EltwiseMaxLayer : public Layer {
+protected:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    EltwiseMaxLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class DataLayer : public Layer {
+protected:
+    bool _useBuffer;
+    int _dataIdx;
+    int _bufferMinibatchIdx;
+    std::map<int, NVMatrix*> _outputs2; // Buffer for copying data during computation
+    CPUData* _bufferData;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void postInit();
+    void copyData(CPUData& data, bool other);
+    void fpropNext(PASS_TYPE passType);
+public:
+    DataLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    NVMatrix& getActs(int deviceID);
+    NVMatrix& getActs(int deviceID, bool other);
+    bool isGradProducer();
+    void fprop(PASS_TYPE passType);
+    void fprop(NVMatrixV& data, PASS_TYPE passType);
+    void setBuffer(CPUData& data, int minibatchIdx);
+    void startFprop(CPUData& data, PASS_TYPE passType);
+    void startFpropFromBuffer(PASS_TYPE passType);
+    int getBufferMinibatchIdx();
+    CPUData* getBufferData();
+};
+
+class LocalLayer : public WeightLayer {
+protected:
+    struct FilterConns {
+        int* hFilterConns;
+        int* dFilterConns;
+    };
+    vector<FilterConns>* _filterConns;
+    
+    intv* _padding, *_stride, *_filterSize, *_channels, *_imgSize, *_groups;
+    intv* _imgPixels, *_filterPixels, *_filterChannels, *_overSample, *_randSparse;
+    int _modulesX, _modules, _numFilters;
+
+    void copyToGPU();
+    
+public:
+    LocalLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool useGrad);
+};
+
+class ConvLayer : public LocalLayer {
+protected:
+    int _partialSum;
+    bool _sharedBiases;
+    floatv* _weightContrastNormMin, *_weightContrastNormMax;
+    NVMatrix _weightGradTmp, _actGradTmp;
+
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropBiases(NVMatrix& v, PASS_TYPE passType);
+    void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
+    void truncBwdActs();
+    void constrainWeights();
+
+public:
+    ConvLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+}; 
+
+class LocalUnsharedLayer : public LocalLayer {
+protected:
+    NVMatrix _sexMask;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropBiases(NVMatrix& v, PASS_TYPE passType);
+    void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
+    void constrainWeights();
+public:
+    LocalUnsharedLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+}; 
+
+class PoolLayer : public Layer {
+protected:
+    int _channels, _sizeX, _start, _stride, _outputsX;
+    int _imgSize;
+    string _pool;
+public:
+    PoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans);
+    
+    static PoolLayer& makePoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+}; 
+
+class AvgPoolLayer : public PoolLayer {
+protected:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    AvgPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+}; 
+
+class MaxPoolLayer : public PoolLayer {
+protected:
+    bool _abs;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    MaxPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool abs);
+};
+
+class RandomPoolLayer : public PoolLayer {
+protected:
+    bool _doMax;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    RandomPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class RandomScaleLayer : public Layer {
+protected:
+    int _channels, _imgSize, _tgtSize, _minScaledSize;
+    float _maxScale; // should be >= 1
+    NVMatrix _rescaledActs;
+    std::vector<double> _scaleProbs;
+public:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    
+    RandomScaleLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class NailbedLayer : public Layer {
+protected:
+    int _channels, _start, _stride, _outputsX;
+    int _imgSize;
+public:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    
+    NailbedLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class GaussianBlurLayer : public Layer {
+protected:
+    int _channels;
+    Matrix* _hFilter;
+    NVMatrix _filter;
+    NVMatrix _actGradsTmp;
+public:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void copyToGPU();
+    
+    GaussianBlurLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class HorizontalReflectionLayer : public Layer {
+protected:
+    int _channels, _imgSize;
+public:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    
+    HorizontalReflectionLayer(ConvNetGPU* convNet, PyObject* paramsDict);
+};
+
+class ResizeLayer : public Layer {
+protected:
+    int _channels;
+    float _scale;
+    int _imgSize, _tgtSize;
+public:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+
+    ResizeLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class HiddenSexLayer : public Layer {
+protected:
+    bool _enable;
+    float _keep;
+    NVMatrix _sexMask;
+public:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void truncBwdActs();
+    HiddenSexLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class RGBToYUVLayer : public Layer {
+public:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+
+    RGBToYUVLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class RGBToLABLayer : public Layer {
+protected:
+    bool _center;
+public:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+
+    RGBToLABLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class ResponseNormLayer : public Layer {
+protected:
+    int _channels, _size;
+    float _scale, _pow;
+    NVMatrix _denoms;
+
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void truncBwdActs();
+public:
+    ResponseNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+}; 
+
+class CrossMapResponseNormLayer : public ResponseNormLayer {
+protected:
+    bool _blocked;
+    float _minDiv;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    CrossMapResponseNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+}; 
+
+class ContrastNormLayer : public ResponseNormLayer {
+protected:
+    int _imgSize;
+    NVMatrix _meanDiffs;
+    
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void truncBwdActs();
+public:
+    ContrastNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class CostLayer : public Layer {
+protected:
+    float _coeff;
+    doublev _costv;
+public:
+    CostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans);
+    void bprop(NVMatrix& v, PASS_TYPE passType);
+//    void bprop(PASS_TYPE passType); // Pure idiocy... it won't compile without this useless definition.
+    void fprop(PASS_TYPE passType); 
+    
+    virtual doublev& getCost();
+    float getCoeff();
+    bool isGradProducer();
+    void setSendTerminalMessages(bool send);
+    
+    static CostLayer& makeCostLayer(ConvNetGPU* convNetGPU, string& type, PyObject* paramsDict);
+};
+
+/*
+ * Input 0: labels
+ * Input 1: softmax outputs
+ */
+class CrossEntCostLayer : public CostLayer {
+protected:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    CrossEntCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+/*
+ * Input 0: labels
+ * Input 1: softmax outputs
+ */
+class LogregCostLayer : public CostLayer {
+protected:
+    NVMatrix _correctProbs, _topkProbs;
+    NVMatrix _probsAccum;
+    int _numAccumed;
+    int _topk;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    LogregCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    NVMatrix& getProbsAccum();
+};
+
+/*
+ * Input 0: labels
+ * Input 1: logistic outputs
+ */
+class CrossEnt2CostLayer : public CostLayer {
+protected:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    CrossEnt2CostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    class CrossEntOperator {
+    public:
+        __device__ inline float operator()(const float t, const float y) const {
+            return t * safelog(y) + (1.0f - t) * safelog(1.0f - y);
+        }
+    };
+    // Only for use with non-logistic units
+    class CrossEntGradientOperator {
+    private:
+        float _coeff;
+    public:
+        CrossEntGradientOperator(float coeff) : _coeff(coeff) {
+            
+        }
+        __device__ inline float operator()(const float t, const float y) const {
+            return _coeff * (__fdividef(t, y) + __fdividef(1.0f - t, 1.0f - y));
+        }
+    };
+};
+
+/*
+ * Input 0: labels
+ * Input 1: logistic outputs
+ */
+class RobustFlickrCost : public CostLayer {
+protected:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    RobustFlickrCost(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    class RobustFlickrCostOperator {
+    public:
+        __device__ inline float operator()(const float t, const float y) const {
+            const float d = (y-t) * (y-t);
+            return __logf(1 + d);// - (t * safelog(y));
+        }
+    };
+    // Only for use with non-logistic units
+    class RobustFlickrCostGradientOperator {
+    private:
+        float _coeff;
+    public:
+        RobustFlickrCostGradientOperator(float coeff) : _coeff(coeff) {
+        }
+        __device__ inline float operator()(const float t, const float y) const {
+            const float d = y - t;
+            return -_coeff * (__fdividef(2.0f * d, 1.0f + d*d) /*- __fdividef(t, y)*/);
+        }
+    };
+};
+
+class SumOfSquaresCostLayer : public CostLayer {
+protected:
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    SumOfSquaresCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+/*
+ * Input 0: labels
+ * Input 1: energies
+ */
+class MultiSoftmaxCostLayer : public CostLayer {
+protected:
+    NVMatrix _probsT;
+    Matrix _cpuProbs, _cpuLabels, _energies_T_CPU;
+    std::vector<Matrix*> B;
+    int _setSize, _numOut, _threads;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    MultiSoftmaxCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+    void computeCost(bool useEnergies);
+};
+
+/*
+ * input 0: gates
+ * input 1: what to sum and square
+ */
+class GatedSumOfSquaresCostLayer : public CostLayer {
+protected:
+    NVMatrix _ungated;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    GatedSumOfSquaresCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+class TICACostLayer : public CostLayer {
+protected:
+    int _sizeX, _channels;
+    void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
+    void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
+public:
+    TICACostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict);
+};
+
+#endif	/* LAYER_CUH */
+
diff --git a/include/layer_kernels.cuh b/include/layer_kernels.cuh
new file mode 100644
index 0000000..df4e5a6
--- /dev/null
+++ b/include/layer_kernels.cuh
@@ -0,0 +1,65 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LAYER_KERNELS_CUH
+#define	LAYER_KERNELS_CUH
+
+#include <vector>
+#include <helper_cuda.h>
+#include <nvmatrix.cuh>
+
+#define LOGREG_GRAD_THREADS_X      32
+#define LOGREG_GRAD_THREADS_Y      4
+
+#define LOGREG_ERR_THREADS_X        128
+#define LOGREG_ERR_THREADS_Y        1
+
+__device__ inline float safelog(const float x) {
+    return x > 0.0f ? __logf(x) : -50.0f;
+}
+
+void computeCrossEntCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out);
+void computeCrossEntGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
+void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add);
+
+void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out);
+void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
+
+
+// Numerical stability optimization: this routine combines computeLogregGrad with computeSoftmaxGrad
+// to avoi dividing and then multiplying by quantities that may be near zero.
+void computeCrossEntSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
+void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
+void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add);
+
+void MSMBackward(NVMatrix& energies, NVMatrix& bLattice, int setSize);
+void MultiSoftmaxCPU(Matrix& elts, Matrix& B, Matrix& probs, int size, int fixed);
+void MultiSoftmaxCPU_T(Matrix& elts, Matrix& B, Matrix& probs, Matrix& fixed, int size);
+
+void computeMultiSoftmaxCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& energies, NVMatrix& labelLogProbs_out,
+                       NVMatrix& correctProbs_out, NVMatrix& top5Probs_out, int setSize, bool useEnergies);
+#endif	/* LAYER_KERNELS_CUH */
+
diff --git a/include/lr.cuh b/include/lr.cuh
new file mode 100644
index 0000000..20182a6
--- /dev/null
+++ b/include/lr.cuh
@@ -0,0 +1,77 @@
+#ifndef LR_CUH
+#define	LR_CUH
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <helper_cuda.h>
+#include <assert.h>
+#include <nvmatrix.cuh>
+#include <matrix.h>
+#include <util.cuh>
+#include <Python.h>
+
+/*
+ * The maximum learning rate is _baseRate.
+ * The minimum learning rate is _baseRate / _tgtFactor.
+ *
+ * These classes define annealing schedules that interpolate between these
+ * two extrema.
+ */
+class LearningRateSchedule {
+protected:
+	double _baseRate, _noiseStdev, _randnSpare;
+	bool _haveRandnSpare;
+	virtual double _getRate(double progress);
+	double randn();
+	double rand() const;
+	double abs(double x) const;
+public:
+	LearningRateSchedule(double base);
+	LearningRateSchedule(double base, double noiseStdev);
+	double getRate(double progress);
+	double getBaseRate() const;
+	virtual ~LearningRateSchedule();
+
+	static LearningRateSchedule& make(PyObject* lrsDict, double base);
+};
+
+class LinearLRS : public LearningRateSchedule {
+protected:
+	double _finalRate;
+public:
+	LinearLRS(double base, double tgtFactor, double noiseStdev);
+	virtual double _getRate(double progress);
+};
+
+class ExpLRS : public LearningRateSchedule {
+protected:
+	double _pow;
+public:
+	ExpLRS(double baseRate, double tgtFactor, double noiseStdev);
+	virtual double _getRate(double progress);
+};
+
+class TanhLRS : public LearningRateSchedule {
+protected:
+	double _alpha, _beta;
+public:
+	TanhLRS(double baseRate, double tgtFactor, double noiseStdev);
+	virtual double _getRate(double progress);
+};
+
+class DiscreteExpLRS : public LearningRateSchedule {
+protected:
+	std::vector<double> _rates;
+public:
+	DiscreteExpLRS(double baseRate, double tgtFactor, double noiseStdev, int numSteps);
+	virtual double _getRate(double progress);
+};
+
+class JumpyDiscreteExpLRS : public DiscreteExpLRS {
+public:
+	JumpyDiscreteExpLRS(double baseRate, double tgtFactor, double noiseStdev, int numSteps);
+	virtual double _getRate(double progress);
+};
+
+#endif	/* LR_CUH */
diff --git a/include/messages.cuh b/include/messages.cuh
new file mode 100644
index 0000000..dbdaab9
--- /dev/null
+++ b/include/messages.cuh
@@ -0,0 +1,133 @@
+/*
+ * messages.cuh
+ *
+ *  Created on: 2013-02-25
+ *      Author: spoon
+ */
+
+#ifndef MESSAGES_CUH_
+#define MESSAGES_CUH_
+
+#include <string>
+
+enum MESSAGES { FPROP_TERMINAL,
+                BPROP_TERMINAL,
+                BPROP_READY,
+                FPROP_READY,
+                SYNC,
+                COPY_TO_CPU,
+                COPY_TO_GPU,
+                UPDATE_WEIGHTS,
+                RESET,
+                COST_COMPUTED,
+                BPROP_START,
+//                COPY,
+//                DEQUANTIZE,
+                RUNME};
+
+class Message {
+protected:
+    MESSAGES _messageType;
+public:
+    MESSAGES getMessageType() {
+        return _messageType;
+    }
+    Message(MESSAGES messageType) : _messageType(messageType) {
+    }
+    virtual ~Message() {
+    }
+};
+
+/*
+ * A message that performs some simple function in its run method.
+ */
+class RunMeMessage : public Message {
+public:
+    RunMeMessage() : Message(RUNME) {
+    }
+    virtual void run() = 0;
+
+    virtual ~RunMeMessage() {
+    }
+};
+
+class CopyMessage : public RunMeMessage {
+protected:
+    NVMatrix* _src, *_tgt;
+public:
+    CopyMessage(NVMatrix* src, NVMatrix* tgt) : _src(src), _tgt(tgt), RunMeMessage() {
+    }
+    void run() {
+        _src->copy(*_tgt);
+    }
+    ~CopyMessage() {
+        assert(_src->isView());
+        delete _src;
+    }
+};
+
+class DequantizeMessage : public RunMeMessage {
+protected:
+    Quantizer* _q;
+    NVMatrix *_tgt;
+public:
+    DequantizeMessage(Quantizer* q, NVMatrix* tgt) : _q(q), _tgt(tgt), RunMeMessage()  {
+    }
+    void run() {
+        _q->dequantize(*_tgt);
+    }
+    ~DequantizeMessage() {
+    }
+};
+
+class PropMessage : public Message {
+protected:
+    std::string _fromLayer, _toLayer;
+    PASS_TYPE _passType;
+public:
+    std::string& getFromLayer() {
+        return _fromLayer;
+    }
+
+    std::string& getToLayer() {
+        return _toLayer;
+    }
+
+    PASS_TYPE getPassType() {
+        return _passType;
+    }
+    PropMessage(std::string fromLayer, std::string toLayer, PASS_TYPE passType, MESSAGES msgType)
+        : _fromLayer(fromLayer), _toLayer(toLayer), _passType(passType), Message(msgType) {
+    }
+};
+
+class FpropMessage : public PropMessage {
+public:
+    FpropMessage(std::string fromLayer, std::string toLayer, PASS_TYPE passType)
+        : PropMessage(fromLayer, toLayer, passType, FPROP_READY) {
+    }
+};
+
+class BpropMessage : public PropMessage {
+public:
+    BpropMessage(std::string fromLayer, std::string toLayer, PASS_TYPE passType)
+        : PropMessage(fromLayer, toLayer, passType, BPROP_READY) {
+    }
+};
+
+class BpropStartMessage : public Message {
+protected:
+    PASS_TYPE _passType;
+public:
+    PASS_TYPE getPassType() {
+        return _passType;
+    }
+
+    BpropStartMessage(PASS_TYPE passType)
+        : _passType(passType), Message(BPROP_START) {
+    }
+};
+
+
+
+#endif /* MESSAGES_CUH_ */
diff --git a/include/multisoftmax.h b/include/multisoftmax.h
new file mode 100644
index 0000000..3effad4
--- /dev/null
+++ b/include/multisoftmax.h
@@ -0,0 +1,38 @@
+/* 
+ * File:   multisoftmax.h
+ * Author: Alex Krizhevsky
+ *
+ * Created on May 9, 2012, 5:36 PM
+ */
+
+#ifndef MULTISOFTMAX_H
+#define	MULTISOFTMAX_H
+
+#include <algorithm>
+#include <thread.h>
+#include <matrix.h>
+#include <vector>
+
+#ifndef DIVUP
+#define DIVUP(x, y) (((x) + (y) - 1) / (y))
+#endif
+
+#define EXP exp
+#define LOG log
+#define INF 1e35f
+
+class MultiSoftmaxWorker : public Thread {
+protected:
+    Matrix* _elts, *_B, *_probs, *_fixed;
+    int _size;
+    bool _nofix;
+    void* run();
+public:
+    MultiSoftmaxWorker(Matrix* elts, Matrix* B, Matrix* probs, Matrix* _fixed, int size, bool nofix);
+    virtual ~MultiSoftmaxWorker();
+};
+
+void MultiSoftmaxCPU_T_parallel(Matrix& elts, std::vector<Matrix*>& B, Matrix& probs, Matrix& fixed, int size, bool nofix);
+
+#endif	/* MULTISOFTMAX_H */
+
diff --git a/include/neuron.cuh b/include/neuron.cuh
new file mode 100644
index 0000000..5729a96
--- /dev/null
+++ b/include/neuron.cuh
@@ -0,0 +1,529 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NEURONS_CUH
+#define	NEURONS_CUH
+
+#include <assert.h>
+#include <string>
+#include <nvmatrix.cuh>
+#include <helper_cuda.h>
+
+template <class GradientOp>
+class AddGradientBinaryOperator {
+    GradientOp _op;
+public:
+    AddGradientBinaryOperator(GradientOp op) : _op(op) {
+    }
+    __device__ inline float operator()(const float unitActGrad, const float unitAct, const float target) const {
+        return _op(unitActGrad, unitAct) + target; 
+    }
+};
+
+template <class GradientOp>
+class AddGradientOperator {
+    GradientOp _op;
+public:
+    AddGradientOperator(GradientOp op) : _op(op) {
+    }
+    __device__ inline float operator()(const float unitActGrad, const float target) const {
+        return target + _op(unitActGrad); 
+    }
+};
+
+/* =======================
+ * Neuron
+ * -----------------------
+ * 
+ * f(x) = x
+ * =======================
+ */
+class Neuron {
+protected:
+    bool _activated;
+    // Inputs and outputs potentially point to the same matrix, depending on the neuron
+    NVMatrix* _inputs, *_outputs; 
+    virtual void _activate() {
+        if (_inputs != _outputs) {
+            _inputs->copy(*_outputs);
+        }
+    }
+    virtual void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        if (&target != &actsGrad) {
+            actsGrad.copy(target);
+        }
+    }
+    virtual void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        if (&target != &actsGrad) {
+            target.add(actsGrad);
+        }
+    }
+public:
+    Neuron() : _activated(false), _inputs(NULL), _outputs(NULL) {
+    }
+    virtual void activate(NVMatrix& inputs, NVMatrix& outputs) {
+        _activated = true;
+        _inputs = &inputs;
+        _outputs = &outputs;
+        _activate();
+    }
+
+    virtual void computeInputGrad(NVMatrix& actsGrad, NVMatrix& target, bool add) {
+        assert(_activated);
+        if (!add) {
+            target.resize(actsGrad);
+            _computeInputGrad(actsGrad, target);
+        } else {
+            _addInputGrad(actsGrad, target);
+        }
+    }
+        
+    static Neuron& makeNeuron(PyObject* neuronDict);
+};
+
+/* =======================
+ * LogisticNeuron
+ * -----------------------
+ * 
+ * f(x) = 1 / (1 + e^-x)
+ * =======================
+ */
+class LogisticNeuron : public Neuron {
+protected:
+    void _activate() {
+        _inputs->apply(NVMatrixOps::Logistic(), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(LogisticGradientOperator(), *_outputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<LogisticGradientOperator>(LogisticGradientOperator()), *_outputs, target, target);
+    }
+public:
+    class LogisticGradientOperator {
+    public:
+        __device__ inline float operator()(float unitActGrad, float unitAct) const {
+            return unitActGrad * unitAct * (1.0f - unitAct); 
+        }
+    };
+    
+    LogisticNeuron() : Neuron() {
+    }
+};
+
+/* =======================
+ * ReluNeuron
+ * -----------------------
+ * 
+ * f(x) = max(0, x)
+ * =======================
+ */
+class ReluNeuron : public Neuron {
+protected:
+    virtual void _activate() {
+        _inputs->apply(ReluOperator(), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(ReluGradientOperator(), *_outputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<ReluGradientOperator>(ReluGradientOperator()), *_outputs, target, target);
+    }
+public:
+    class ReluOperator {
+    public:    
+        __device__ inline float operator()(float x) const {
+            return x < 0.0f ? 0.0f : x;
+        }
+    };
+
+    class ReluGradientOperator {
+    public:
+        __device__ inline float operator()(float unitActGrad, float unitAct) const  {
+            return unitActGrad * (unitAct > 0.0f); 
+        }
+    };
+    
+    ReluNeuron() : Neuron() {
+    }
+};
+
+/* =======================
+ * NoisyReluNeuron
+ * -----------------------
+ * 
+ * f(x) = max(0, max(0, x) + gaussian noise with variance equal to max(0, x))
+ * =======================
+ */
+class NoisyReluNeuron : public ReluNeuron {
+protected:
+    void _activate() {
+        ReluNeuron::_activate();
+        _outputs->addGaussianNoise(*_outputs, false);
+        _outputs->apply(ReluOperator());
+    }
+public:
+    NoisyReluNeuron() : ReluNeuron() {
+    }
+};
+
+/* =======================
+ * BoundedReluNeuron
+ * -----------------------
+ * 
+ * f(x) = min(a, max(0, x))
+ * =======================
+ */
+class BoundedReluNeuron : public Neuron {
+protected:
+    float _a;
+    
+    void _activate() {
+        _inputs->apply(BoundedReluOperator(_a), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(BoundedReluGradientOperator(_a), *_outputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<BoundedReluGradientOperator>(BoundedReluGradientOperator(_a)), *_outputs, target, target);
+    }
+public:
+    class BoundedReluOperator {
+    private:
+        float _a;
+    public:
+        BoundedReluOperator(float a) : _a(a) {
+        }
+        __device__ inline float operator()(float x) const {
+            return x < 0.0f ? 0.0f : x > _a ? _a : x;
+        }
+    };
+
+    class BoundedReluGradientOperator {
+    private:
+        float _a;
+    public:
+        BoundedReluGradientOperator(float a) : _a(a) {
+        }
+        __device__ inline float operator()(float unitActGrad, float unitAct) const  {
+            return unitActGrad * (unitAct > 0.0f) * (unitAct < _a); 
+        }
+    };
+    
+    BoundedReluNeuron(float a) : Neuron(), _a(a) {
+    }
+};
+
+/* =======================
+ * AbsNeuron
+ * -----------------------
+ * 
+ * f(x) = abs(x)
+ * =======================
+ */
+class AbsNeuron : public Neuron {
+protected:
+    void _activate() {
+        assert(_inputs != _outputs);
+        _inputs->apply(NVMatrixOps::Abs(), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(AbsGradientOperator(), *_inputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<AbsGradientOperator>(AbsGradientOperator()), *_inputs, target, target);
+    }
+public:
+    class AbsGradientOperator {
+    public:
+        __device__ inline float operator()(float unitActGrad, float unitInput) const  {
+            return unitActGrad * (unitInput > 0.0f ? 1.0f : -1.0f); 
+        }
+    };
+    
+    AbsNeuron() : Neuron() {
+    }
+};
+
+/* =======================
+ * TanhNeuron
+ * -----------------------
+ * 
+ * f(x) = a*tanh(b*x)
+ * =======================
+ */
+class TanhNeuron : public Neuron {
+protected:
+    float _a, _b;
+
+    void _activate() {
+        _inputs->apply(TanhOperator(_a, _b), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(TanhGradientOperator(_a, _b), *_outputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<TanhGradientOperator>(TanhGradientOperator(_a, _b)), *_outputs, target, target);
+    }
+public:
+    class TanhOperator {
+    private:
+        float _a, _n2b;
+    public:
+        TanhOperator(float a, float b) : _a(a), _n2b(-2*b) {
+        }
+        virtual __device__ inline float operator()(float x) const {
+            return _a * (__fdividef(2.0f, 1.0f + __expf(x * _n2b)) - 1.0f);
+        }
+    };
+
+    class TanhGradientOperator {
+    private:
+        float _b, _a;
+    public:
+        TanhGradientOperator(float a, float b) : _b(b), _a(a) {
+        }
+        __device__ inline float operator()(float unitActGrad, float unitAct) const  {
+//            const float t = (1.0f - __fdividef(unitAct, _a)) / 2.0f;
+//            return unitActGrad * _n4ab * (t * (t - 1.0f));
+            return unitActGrad * _b * (_a - __fdividef(unitAct * unitAct, _a));
+        }
+    };
+    
+    TanhNeuron(float a, float b) : Neuron(), _a(a), _b(b) {
+    }
+};
+
+/* =======================
+ * DoubleReluNeuron
+ * -----------------------
+ * 
+ * f(x) = x - a*tanh(x/a)
+ * =======================
+ */
+class DoubleReluNeuron : public Neuron {
+protected:
+    float _a;
+
+    void _activate() {
+        assert(_inputs != _outputs);
+        _inputs->apply(DoubleReluOperator(_a), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(DoubleReluGradientOperator(_a), *_inputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<DoubleReluGradientOperator>(DoubleReluGradientOperator(_a)), *_inputs, target, target);
+    }
+public:
+    class DoubleReluOperator {
+    private:
+        float _a, _n2a;
+    public:
+        DoubleReluOperator(float a) : _a(a), _n2a(-2.0f / a) {
+        }
+        virtual __device__ inline float operator()(float x) const {
+            return x - _a * (__fdividef(2.0f, 1.0f + __expf(_n2a * x)) - 1.0f);
+        }
+    };
+
+    class DoubleReluGradientOperator {
+    private:
+        float _n2a;
+    public:
+        DoubleReluGradientOperator(float a) : _n2a(-2.0f / a) {
+        }
+        __device__ inline float operator()(float unitActGrad, float unitInput) const  {
+            const float tanh = __fdividef(2.0f, 1.0f + __expf(_n2a * unitInput)) - 1.0f;
+            return unitActGrad * (tanh*tanh);
+        }
+    };
+    
+    DoubleReluNeuron(float a) : Neuron(), _a(a) {
+    }
+};
+
+/* =======================
+ * SoftReluNeuron
+ * -----------------------
+ * 
+ * f(x) = log(1 + e^x)
+ * =======================
+ */
+class SoftReluNeuron : public Neuron {
+protected:
+    void _activate() {
+        assert(_inputs != _outputs);
+        _inputs->apply(SoftReluOperator(), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(SoftReluGradientOperator(), *_inputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<SoftReluGradientOperator>(SoftReluGradientOperator()), *_inputs, target, target);
+    }
+public:
+    class SoftReluOperator {
+    public:    
+        __device__ inline float operator()(float x) const {
+            // This piece-wise implementation has better numerical stability than
+            // simply computing log(1 + e^x).
+            return x > 4.0f ? x : __logf(1.0f + __expf(x));
+        }
+    };
+
+    class SoftReluGradientOperator {
+    public:
+        __device__ inline float operator()(float unitActGrad, float unitInput) const  {
+            if (unitInput > 4.0f) {
+                return unitActGrad;
+            }
+            const float f = __expf(unitInput);
+            return unitActGrad * __fdividef(f, 1.0f + f); 
+        }
+    };
+    
+    SoftReluNeuron() : Neuron() {
+    }
+};
+
+/* =======================
+ * SquareNeuron
+ * -----------------------
+ * 
+ * f(x) = x^2
+ * =======================
+ */
+class SquareNeuron : public Neuron {
+protected:
+    void _activate() {
+        assert(_inputs != _outputs);
+        _inputs->apply(NVMatrixOps::Square(), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(SquareGradientOperator(), *_inputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<SquareGradientOperator>(SquareGradientOperator()), *_inputs, target, target);
+    }
+public:
+    class SquareGradientOperator {
+    public:
+        __device__ inline float operator()(float unitActGrad, float unitInput) const {
+            return unitActGrad * 2.0f * unitInput; 
+        }
+    };
+    
+    SquareNeuron() : Neuron() {
+    }
+};
+
+/* =======================
+ * SqrtNeuron
+ * -----------------------
+ * 
+ * f(x) = sqrt(x)
+ * =======================
+ */
+class SqrtNeuron : public Neuron {
+protected:
+    void _activate() {
+        _inputs->apply(NVMatrixOps::Sqrt(), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(SqrtGradientOperator(), *_outputs, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyTernary(AddGradientBinaryOperator<SqrtGradientOperator>(SqrtGradientOperator()), *_outputs, target, target);
+    }
+public:
+    class SqrtGradientOperator {
+    public:
+        __device__ inline float operator()(float unitActGrad, float unitAct) const {
+            return __fdividef(unitActGrad, 2.0f * unitAct); 
+        }
+    };
+    
+    SqrtNeuron() : Neuron() {
+    }
+};
+
+/* =======================
+ * LinearNeuron
+ * -----------------------
+ * 
+ * f(x) = a*x + b
+ * =======================
+ */
+class LinearNeuron : public Neuron {
+protected:
+    float _a, _b;
+    void _activate() {
+        _inputs->apply(LinearOperator(_a, _b), *_outputs);
+    }
+
+    void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.scale(_a, target);
+    }
+    
+    void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
+        actsGrad.applyBinary(AddGradientOperator<NVMatrixOps::MultByScalar>(NVMatrixOps::MultByScalar(_a)), target, target);
+    }
+public:
+    class LinearOperator {
+    protected:
+        float _a, _b;
+    public:    
+        __device__ inline float operator()(float x) const {
+            return _a * x + _b;
+        }
+        LinearOperator(float a, float b) : _a(a), _b(b) {
+        }
+    };
+    
+    LinearNeuron(float a, float b) : Neuron(), _a(a), _b(b) {
+    }
+};
+#endif	/* NEURONS_CUH */
+
diff --git a/include/pipedispenser.cuh b/include/pipedispenser.cuh
new file mode 100644
index 0000000..809d1ba
--- /dev/null
+++ b/include/pipedispenser.cuh
@@ -0,0 +1,139 @@
+/*
+ * pipedispenser.cuh
+ *
+ *  Created on: 2013-03-01
+ *      Author: spoon
+ */
+
+#ifndef PIPEDISPENSER_CUH_
+#define PIPEDISPENSER_CUH_
+
+#include <pthread.h>
+#include <set>
+#include <algorithm>
+#include <iterator>
+#include <util.cuh>
+
+class PipeDispenser {
+protected:
+    int _numPipes;
+    seti _pipes;
+    pthread_mutex_t *_mutex;
+    void lock() {
+        pthread_mutex_lock(_mutex);
+    }
+
+    void unlock() {
+        pthread_mutex_unlock(_mutex);
+    }
+public:
+    PipeDispenser(const seti& pipes) {
+        _pipes.insert(pipes.begin(), pipes.end());
+        _mutex = (pthread_mutex_t*)(malloc(sizeof (pthread_mutex_t)));
+        pthread_mutex_init(_mutex, NULL);
+    }
+
+    virtual ~PipeDispenser() {
+        pthread_mutex_destroy(_mutex);
+        free(_mutex);
+    }
+
+    virtual int getPipe(const seti& interested) = 0;
+    int getPipe(int interested) {
+        seti tmp;
+        tmp.insert(interested);
+        return getPipe(tmp);
+    }
+    virtual void freePipe(int pipe) = 0;
+};
+
+/*
+ * This one blocks until there is a free pipe to return.
+ */
+class PipeDispenserBlocking : public PipeDispenser {
+protected:
+    pthread_cond_t *_cv;
+
+    void wait() {
+        pthread_cond_wait(_cv, _mutex);
+    }
+
+    void broadcast() {
+        pthread_cond_broadcast(_cv);
+    }
+
+    int getAvailablePipes(const seti& interested, intv& available) {
+        available.clear();
+        std::set_intersection(_pipes.begin(), _pipes.end(), interested.begin(), interested.end(), std::back_inserter(available));
+        return available.size();
+    }
+public:
+    PipeDispenserBlocking(const seti& pipes) : PipeDispenser(pipes) {
+        _cv = (pthread_cond_t*)(malloc(sizeof (pthread_cond_t)));
+        pthread_cond_init(_cv, NULL);
+    }
+
+    ~PipeDispenserBlocking() {
+        pthread_cond_destroy(_cv);
+        free(_cv);
+    }
+
+    int getPipe(const seti& interested) {
+        lock();
+        intv avail;
+        while (getAvailablePipes(interested, avail) == 0) {
+            wait();
+        }
+        int pipe = avail[0];
+        _pipes.erase(pipe);
+        unlock();
+        return pipe;
+    }
+
+    void freePipe(int pipe) {
+        lock();
+        _pipes.insert(pipe);
+        broadcast();
+        unlock();
+    }
+};
+
+/*
+ * This one returns the least-occupied pipe.
+ */
+class PipeDispenserNonBlocking : public PipeDispenser  {
+protected:
+    std::map<int,int> _pipeUsers;
+
+public:
+    PipeDispenserNonBlocking(const seti& pipes) : PipeDispenser(pipes) {
+        for (seti::iterator it = pipes.begin(); it != pipes.end(); ++it) {
+            _pipeUsers[*it] = 0;
+        }
+    }
+
+    int getPipe(const seti& interested) {
+        lock();
+        int pipe = -1, users = 1 << 30;
+        for (seti::iterator it = _pipes.begin(); it != _pipes.end(); ++it) {
+            if (interested.count(*it) > 0 && _pipeUsers[*it] < users) {
+                pipe = *it;
+                users = _pipeUsers[*it];
+            }
+        }
+        if (pipe >= 0) {
+            _pipeUsers[pipe]++;
+        }
+        unlock();
+        return pipe;
+    }
+
+    void freePipe(int pipe) {
+        lock();
+        _pipeUsers[pipe]--;
+        unlock();
+    }
+};
+
+
+#endif /* PIPEDISPENSER_CUH_ */
diff --git a/include/pyconvnet.cuh b/include/pyconvnet.cuh
new file mode 100644
index 0000000..5c5fc67
--- /dev/null
+++ b/include/pyconvnet.cuh
@@ -0,0 +1,43 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PYCONVNET3_CUH
+#define	PYCONVNET3_CUH
+
+#define _QUOTEME(x) #x
+#define QUOTEME(x) _QUOTEME(x)
+
+extern "C" void INITNAME();
+
+PyObject* initModel(PyObject *self, PyObject *args);
+PyObject* startBatch(PyObject *self, PyObject *args);
+PyObject* finishBatch(PyObject *self, PyObject *args);
+PyObject* checkGradients(PyObject *self, PyObject *args);
+PyObject* syncWithHost(PyObject *self, PyObject *args);
+PyObject* startMultiviewTest(PyObject *self, PyObject *args);
+PyObject* startFeatureWriter(PyObject *self, PyObject *args);
+PyObject* startDataGrad(PyObject *self, PyObject *args);
+#endif
diff --git a/include/quantizer.cuh b/include/quantizer.cuh
new file mode 100644
index 0000000..e4f2251
--- /dev/null
+++ b/include/quantizer.cuh
@@ -0,0 +1,43 @@
+/*
+ * quantizer.cuh
+ *
+ *  Created on: 2013-02-15
+ *      Author: spoon
+ */
+
+#ifndef QUANTIZER_CUH_
+#define QUANTIZER_CUH_
+
+#include <Python.h>
+#include <util.cuh>
+#include <string>
+#include <nvmatrix.cuh>
+#include <conv_util.cuh>
+
+class Quantizer {
+protected:
+    NVMatrix* _quantized;
+    int _numRows, _numCols;
+    bool _trans;
+    virtual void _quantize(NVMatrix& src, NVMatrix& tgt);
+    virtual void _dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput);
+public:
+    Quantizer();
+    virtual ~Quantizer();
+    void quantize(NVMatrix& src, NVMatrix& tgt);
+    void dequantize(NVMatrix& tgt);
+    void dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput);
+
+    static Quantizer& make(PyObject* qDict);
+};
+
+class HalfQuantizer : public Quantizer {
+protected:
+    void _quantize(NVMatrix& src, NVMatrix& tgt);
+    void _dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput);
+public:
+    HalfQuantizer();
+};
+
+
+#endif /* QUANTIZER_CUH_ */
diff --git a/include/softmaxtree.cuh b/include/softmaxtree.cuh
new file mode 100644
index 0000000..5b1a97d
--- /dev/null
+++ b/include/softmaxtree.cuh
@@ -0,0 +1,144 @@
+/* 
+ * File:   softmaxtree.h
+ * Author: Alex Krizhevsky
+ *
+ * Created on September 9, 2012, 5:50 PM
+ */
+
+#ifndef SOFTMAXTREE_H
+#define	SOFTMAXTREE_H
+
+#include <helper_cuda.h>
+#include <string>
+#include <map>
+#include <vector>
+#include <algorithm>
+#include <assert.h>
+
+#include <nvmatrix.cuh>
+#include <matrix.h>
+
+class SoftmaxNode;
+class SoftmaxTree;
+typedef std::vector<SoftmaxNode*> SoftmaxNodeV;
+
+class SoftmaxNode {
+    friend class SoftmaxTree;
+protected:
+    SoftmaxNodeV _children;
+    SoftmaxNode* _parent;
+    int _depth, _height, _size;
+    int _label;
+
+    /*
+     * Computes height for entire subtree rooted at this node and populates
+     * given height->nodes map.
+     */
+    int setDistances(std::map<int, SoftmaxNodeV*>& nodeHeights,
+                     std::map<int, SoftmaxNodeV*>& nodeDepths);
+    
+    void setNodeCounts(int &nodes, int& leaves);
+    /*
+     * Compute the number of leaves in this subtree, which is a good estimate
+     * of the number of training cases it represents.
+     */
+    int setSizes(ushort* nodeSizes);
+    
+public:
+    SoftmaxNode(SoftmaxNode* parent, int label);
+    ~SoftmaxNode();
+    SoftmaxNode& addChild(int label);
+    
+    int getDepth() const;
+    int getHeight() const;
+    int getLabel() const;
+    int getSize() const;
+    SoftmaxNode* getParent(); // Might be null, so must be pointer
+    SoftmaxNodeV& getChildren();
+};
+
+
+/*
+ * numLabels: the number of leaves in the tree (normally 1000)
+ * numNodes: the total number of nodes in the tree
+ */
+class SoftmaxTree {
+    friend class SoftmaxNode;
+protected:
+    SoftmaxNode* _root;
+    std::map<int, SoftmaxNodeV*> _nodeHeights, _nodeDepths;
+    /*
+     * Map from depth --> ushort2[]
+     * where each ushort2 gives the index and parent index
+     * of a node at the given depth.
+     */
+    std::map<int, ushort2*> _nodeFwdMeta;
+    /*
+     * Map from height --> ushort2[]
+     * where each ushort2 gives the index and number of children
+     * of a node at the given height.
+     */
+    std::map<int, ushort2*> _nodeBwdMeta;
+    /*
+     * Map from height --> ushort[][]
+     * where each ushort[] gives children of a given node at a given height.
+     */
+    std::map<int, ushort**> _nodeChildMeta;
+    
+    /*
+     * An array of length numNodes with index i storing the number
+     * of leaves in subtree rooted at node with label i.
+     */
+    ushort* _nodeSizes;
+    int _numNodes, _numLeaves;
+    void setDistances();
+    void setNodeCounts();
+    void setNodeSizes();
+    void setFwdMeta();
+    void setBwdMeta();
+    void preprocess(NVMatrix& inp);
+    void postprocess(NVMatrix& inp);
+public:
+    SoftmaxTree(int rootLabel);
+    ~SoftmaxTree();
+    
+    void finalize();
+    
+    SoftmaxNode& getRoot();
+    SoftmaxNodeV& getNodesAtHeight(int height);
+    SoftmaxNodeV& getNodesAtDepth(int depth);
+    int getHeight() const;
+    int getDepth() const;
+    int getNumLeaves() const;
+    int getNumNodes() const;
+    
+    /*
+     * offsets: (numNodes, numFeatures)
+     * targets: (numNodes, numFeatures) 
+     */
+    void makeWeights(NVMatrix& offsets, NVMatrix& targets);
+    
+    /*
+     * grads: (numNodes, numFeatures)
+     * 
+     * The idea is that grads contains gradients for the leaves 
+     * (i.e. the first numLabels rows), so this routine will
+     * distribute them up the tree.
+     */
+    void distributeGradients(NVMatrix& grads);
+    
+    /*
+     * inc := mom * inc - wc * epsW * weight + epsW * grad
+     * weight := weight + inc
+     * 
+     * weights: (numNodes, numFeatures)
+     * incs:    (numNodes, numFeatures)
+     * grads:   (numNodes , numFeatures)
+     */
+    void updateWeights(NVMatrix& weights, NVMatrix& incs, NVMatrix& grads, float epsWBase, float mom, float wcBase);
+    
+    
+};
+
+#endif	/* SOFTMAXTREE_H */
+
diff --git a/include/util.cuh b/include/util.cuh
new file mode 100644
index 0000000..7b92959
--- /dev/null
+++ b/include/util.cuh
@@ -0,0 +1,113 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTIL_H
+#define	UTIL_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <vector>
+#include <map>
+#include <set>
+#include <string>
+#include <sstream>
+#include <string>
+#include <Python.h>
+#include <nvmatrix.cuh>
+#include <matrix.h>
+
+/*
+ * The types of passes that the convnet supports. Used in the fprop and bprop functions in
+ * ConvNet class. Most of the layers ignore the pass type, but some make use of it.
+ */
+//enum PASS_TYPE {PASS_TRAIN,
+//                PASS_TEST,
+//                PASS_GC,
+//                PASS_MULTIVIEW_TEST,
+//                PASS_MULTIVIEW_TEST_START,
+//                PASS_MULTIVIEW_TEST_END,
+//                PASS_FEATURE_GEN};
+                
+#define PASS_TYPE                   uint
+#define PASS_TRAIN                  0x1
+#define PASS_TEST                   0x2
+#define PASS_GC                     0x4
+#define PASS_MULTIVIEW_TEST         (PASS_TEST | 0x8)
+#define PASS_MULTIVIEW_TEST_START   (PASS_MULTIVIEW_TEST | 0x10)
+#define PASS_MULTIVIEW_TEST_END     (PASS_MULTIVIEW_TEST | 0x20)
+#define PASS_FEATURE_GEN            0x40
+
+#define HAS_FLAG(f, x)              (((x) & (f)) == (f))
+#define IS_MULTIVIEW_TEST(x)        HAS_FLAG(PASS_MULTIVIEW_TEST, x)
+#define IS_MULTIVIEW_TEST_START(x)  HAS_FLAG(PASS_MULTIVIEW_TEST_START, x)
+#define IS_MULTIVIEW_TEST_END(x)    HAS_FLAG(PASS_MULTIVIEW_TEST_END, x)
+
+// For gradient checking
+#define GC_SUPPRESS_PASSES          false
+#define GC_REL_ERR_THRESH           0.02
+
+/*
+ * Generates a random floating point number in the range 0-1.
+ */
+#define randf                       ((float)rand() / RAND_MAX)
+
+typedef std::vector<Matrix*> MatrixV;
+typedef std::vector<NVMatrix*> NVMatrixV;
+typedef std::map<std::string,std::vector<double>*> CostMap;
+typedef std::map<std::string,double> CostCoeffMap;
+typedef std::vector<double> doublev;
+typedef std::vector<float> floatv;
+typedef std::vector<int> intv;
+typedef std::vector<std::string> stringv;
+typedef std::set<int> seti;
+
+stringv* getStringV(PyObject* pyList);
+floatv* getFloatV(PyObject* pyList);
+intv* getIntV(PyObject* pyList);
+MatrixV* getMatrixV(PyObject* pyList);
+MatrixV* getMatrixV(PyObject* pyList, int len);
+int* getIntA(PyObject* pyList);
+
+int pyDictGetInt(PyObject* dict, const char* key);
+intv* pyDictGetIntV(PyObject* dict, const char* key);
+std::string pyDictGetString(PyObject* dict, const char* key);
+float pyDictGetFloat(PyObject* dict, const char* key);
+floatv* pyDictGetFloatV(PyObject* dict, const char* key);
+Matrix* pyDictGetMatrix(PyObject* dict, const char* key);
+MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key);
+int* pyDictGetIntA(PyObject* dict, const char* key);
+stringv* pyDictGetStringV(PyObject* dict, const char* key);
+
+template<typename T>
+std::string tostr(T n) {
+    std::ostringstream result;
+    result << n;
+    return result.str();
+}
+
+#endif	/* UTIL_H */
+
diff --git a/include/weights.cuh b/include/weights.cuh
new file mode 100644
index 0000000..593aaf8
--- /dev/null
+++ b/include/weights.cuh
@@ -0,0 +1,150 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef WEIGHTS_CUH
+#define	WEIGHTS_CUH
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <helper_cuda.h>
+#include <assert.h>
+#include <nvmatrix.cuh>
+#include <matrix.h>
+#include "util.cuh"
+#include "softmaxtree.cuh"
+#include <lr.cuh>
+
+using namespace std;
+
+class Weights {
+protected:
+    Matrix* _hWeights, *_hWeightsInc;
+    NVMatrix* _weights, *_weightsInc, *_weightsGrad;
+    NVMatrix* _weightsGradAvg, *_weightsGrad2Avg;
+    
+    LearningRateSchedule* _lrs;
+
+    float _wc, _mom, _wball, _superEps;
+    bool _onGPU, _useGrad, _cleanup;
+    int _numUpdates;
+    
+    // Non-NULL if these weights are really shared from some other layer
+    Weights* _srcWeights;
+public:
+    
+    class Grad2AvgOperator {
+    private:
+        float _mom;
+    public:
+        Grad2AvgOperator(float mom) : _mom(mom) {
+        }
+        __device__ inline float operator()(const float G2, const float g) const {
+            return _mom * G2 + (1.0f - _mom) * g * g;
+        }
+    };
+    
+    NVMatrix& operator*() const;
+    
+    Weights(Weights& srcWeights, LearningRateSchedule& lrs);
+    Weights(Matrix& hWeights, Matrix& hWeightsInc, LearningRateSchedule& lrs, float wc, float wball, float mom, float superEps, bool useGrad, bool cleanup=true);
+        
+    virtual ~Weights();
+
+    virtual NVMatrix& getW() const;
+    virtual NVMatrix& getInc() const;
+    virtual NVMatrix& getGrad() const;
+    virtual Matrix& getCPUW() const;
+    virtual Matrix& getCPUWInc() const;
+    virtual LearningRateSchedule& getLearningRateSchedule() const;
+    virtual int getNumRows() const;
+    virtual int getNumCols() const;
+    virtual void copyToCPU();
+    
+    // This function is assumed to be called in the order in which the layers
+    // were defined
+    virtual void copyToGPU();
+    
+    virtual void update(float progress);
+    int incNumUpdates();
+    
+    // Returns the number of times a gradient has been computed for this
+    // weight matrix during the current pass (interval between two calls of update())
+    // through the net. This number will only be greater than 1 if this weight matrix
+    // is *shared* by multiple layers in the net.
+    int getNumUpdates() const;
+    float getEps(float progress) const;
+    float getMom() const;
+    float getWC() const;
+    float getWBall() const;
+    bool isUseGrad() const;
+    bool isOwner() const;
+    float getSuperEps() const;
+};
+
+class TreeWeights : public Weights {
+protected:
+    NVMatrix _effWeights;
+    NVMatrix* _leafWeights, *_leafGrad, *_leafInc;
+    SoftmaxTree* _tree;
+    
+public:
+    void copyToGPU();
+    void update(float progress);
+    NVMatrix& getW() const;
+    NVMatrix& getInc() const;
+    NVMatrix& getGrad() const;
+    NVMatrix& getAllW() const;
+    NVMatrix& getAllInc() const;
+    NVMatrix& getAllGrad() const;
+    int getNumRows() const;
+    
+    void makeWeights();
+    void distributeGradients();
+    TreeWeights(SoftmaxTree& tree, Matrix& hWeights, Matrix& hWeightsInc, LearningRateSchedule& lrs, float wcBase, float mom);
+};
+
+class DummyWeights : public Weights {
+public:
+    DummyWeights(Matrix& hWeights, Matrix& hWeightsInc, NVMatrix& weights, NVMatrix& incs, NVMatrix& grads);
+};
+
+class WeightList {
+private:
+    std::vector<Weights*> _weightList;
+
+public:
+    Weights& operator[](const int idx) const;
+    ~WeightList();
+    WeightList();
+    void addWeights(Weights& w);
+    void update(float progress);
+    void copyToCPU();
+    void copyToGPU();
+    int getSize() const;
+};
+
+#endif	/* WEIGHTS_CUH */
diff --git a/include/worker.cuh b/include/worker.cuh
new file mode 100644
index 0000000..98424ab
--- /dev/null
+++ b/include/worker.cuh
@@ -0,0 +1,122 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef WORKER_CUH
+#define	WORKER_CUH
+
+#include "convnet.cuh"
+#include "cost.cuh"
+#include "data.cuh"
+
+class ConvNet;
+class Cost;
+
+class WorkResult {
+public:
+    enum RESULTS {BATCH_DONE, SYNC_DONE};
+protected:
+    WorkResult::RESULTS _resultType;
+    Cost* _results;
+public:
+    WorkResult(WorkResult::RESULTS resultType, Cost& results);
+    WorkResult(WorkResult::RESULTS resultType);
+    virtual ~WorkResult();
+    Cost& getResults() const;
+    WorkResult::RESULTS getResultType() const;
+};
+
+class Worker {
+protected:
+    ConvNet* _convNet;
+public:
+    Worker(ConvNet& convNet);
+    virtual void run() = 0;
+};
+
+class DataWorker : public Worker {
+protected:
+    CPUData* _data;
+    DataProvider* _dp;
+public:
+    DataWorker(ConvNet& convNet, CPUData& data);
+    virtual ~DataWorker();
+};
+
+class TrainingWorker : public DataWorker {
+protected:
+    bool _test;
+    double _progress;
+public:
+    TrainingWorker(ConvNet& convNet, CPUData& data, double progress, bool test);
+    void run();
+};
+
+class SyncWorker : public Worker {
+public:
+    SyncWorker(ConvNet& convNet);
+    void run();
+};
+
+class GradCheckWorker : public DataWorker {
+public:
+    GradCheckWorker(ConvNet& convNet, CPUData& data);
+    void run();
+};
+
+class MultiviewTestWorker : public DataWorker {
+protected:
+    int _numViews;
+    Matrix* _cpuProbs;
+    std::string _logregName;
+public:
+    MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, Matrix& cpuProbs, const char* softmaxName);
+    MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews);
+    ~MultiviewTestWorker();
+    virtual void run();
+};
+
+class FeatureWorker : public DataWorker {
+protected:
+    MatrixV *_ftrs;
+    stringv *_layerNames;
+public:
+    FeatureWorker(ConvNet& convNet, CPUData& data, MatrixV& ftrs, stringv& layerNames);
+    ~FeatureWorker();
+    void run();
+};
+
+class DataGradWorker : public DataWorker {
+protected:
+    Matrix* _dataGrads;
+    int _dataLayerIdx, _softmaxLayerIdx;
+public:
+    DataGradWorker(ConvNet& convNet, CPUData& data, Matrix& dataGrads, int dataLayerIdx, int softmaxLayerIdx);
+    ~DataGradWorker();
+    void run();
+};
+
+#endif	/* WORKER_CUH */
+
diff --git a/initw.py b/initw.py
new file mode 100755
index 0000000..4373ac1
--- /dev/null
+++ b/initw.py
@@ -0,0 +1,21 @@
+from gpumodel import *
+import numpy as n
+import numpy.random as nr
+
+def get_src():
+    src = IGPUModel.load_checkpoint('/nobackup/kriz/tmp/ConvNet__2012-09-19_23.29.04')
+    return src['model_state']['layers']
+    
+def makew(name, idx, shapes, params):
+    src, src_layer = get_src(), params[0]
+    if name == 'localcombine' and idx == 2:
+        return n.array(0.01 * nr.randn(shapes[0], shapes[1]), dtype=n.single, order='C')
+    return src[src_layer]['weights'][idx]
+    
+def makeb(name, shapes, params):
+    src, src_layer = get_src(), params[0]
+    return src[src_layer]['biases']
+    
+def makec(name, idx, shapes, params):
+    src, src_layer = get_src(), params[0]
+    return src[src_layer]['filterConns'][idx]
diff --git a/layer.py b/layer.py
new file mode 100755
index 0000000..205298e
--- /dev/null
+++ b/layer.py
@@ -0,0 +1,1418 @@
+from math import exp
+import sys
+import ConfigParser as cfg
+import os
+import numpy as n
+import numpy.random as nr
+from math import ceil, floor
+from ordereddict import OrderedDict
+from os import linesep as NL
+from options import OptionsParser
+import re
+
+class LayerParsingError(Exception):
+    pass
+
+# A neuron that doesn't take parameters
+class NeuronParser:
+    def __init__(self, type, func_str, uses_acts=True, uses_inputs=True):
+        self.type = type
+        self.func_str = func_str
+        self.uses_acts = uses_acts  
+        self.uses_inputs = uses_inputs
+        
+    def parse(self, type):
+        if type == self.type:
+            return {'type': self.type,
+                    'params': {},
+                    'usesActs': self.uses_acts,
+                    'usesInputs': self.uses_inputs}
+        return None
+    
+# A neuron that takes parameters
+class ParamNeuronParser(NeuronParser):
+    neuron_regex = re.compile(r'^\s*(\w+)\s*\[\s*(\w+(\s*,\w+)*)\s*\]\s*$')
+    def __init__(self, type, func_str, uses_acts=True, uses_inputs=True):
+        NeuronParser.__init__(self, type, func_str, uses_acts, uses_inputs)
+        m = self.neuron_regex.match(type)
+        self.base_type = m.group(1)
+        self.param_names = m.group(2).split(',')
+        assert len(set(self.param_names)) == len(self.param_names)
+        
+    def parse(self, type):
+        m = re.match(r'^%s\s*\[([\d,\.\s\-]*)\]\s*$' % self.base_type, type)
+        if m:
+            try:
+                param_vals = [float(v.strip()) for v in m.group(1).split(',')]
+                if len(param_vals) == len(self.param_names):
+                    return {'type': self.base_type,
+                            'params': dict(zip(self.param_names, param_vals)),
+                            'usesActs': self.uses_acts,
+                            'usesInputs': self.uses_inputs}
+            except TypeError:
+                pass
+        return None
+
+class AbsTanhNeuronParser(ParamNeuronParser):
+    def __init__(self):
+        ParamNeuronParser.__init__(self, 'abstanh[a,b]', 'f(x) = a * |tanh(b * x)|')
+        
+    def parse(self, type):
+        dic = ParamNeuronParser.parse(self, type)
+        # Make b positive, since abs(tanh(bx)) = abs(tanh(-bx)) and the C++ code
+        # assumes b is positive.
+        if dic:
+            dic['params']['b'] = abs(dic['params']['b'])
+        return dic
+
+class ParamParser:
+    lrs_regex = re.compile(r'^\s*(\w+)\s*(?:\[\s*(\w+(\s*,\w+)*)\s*\])?\s*$')
+    param_converters = {'i': int,
+                        'f': float}
+    def __init__(self, type):
+        m = self.lrs_regex.match(type)
+        self.base_type = m.group(1)
+        param_names_with_type = m.group(2).split(',') if m.group(2) is not None else []
+        self.param_names = [p[1:] for p in param_names_with_type]
+        self.param_types = [self.param_converters[p[0]] for p in param_names_with_type]
+        assert len(set(self.param_names)) == len(self.param_names)
+    
+    def parse(self, type):
+        param_regex_inner = ",".join([('\s*%s\s*=\s*[^,\s=]+\s*' % p) for p in self.param_names])
+        regex_str = ('^%s\s*(?:\[(' + param_regex_inner + ')\])?\s*$') % self.base_type
+        m = re.match(regex_str, type, flags=re.IGNORECASE)
+        if m:
+            try:
+                param_vals = [ptype(v.split('=')[1].strip()) for ptype,v in zip(self.param_types, m.group(1).split(','))] if m.group(1) is not None else []
+                if len(param_vals) == len(self.param_names):
+                    return {'type': self.base_type,
+                            'params': dict(zip(self.param_names, param_vals))}
+            except TypeError:
+                pass
+        return None
+
+# Subclass that throws more convnet-specific exceptions than the default
+class MyConfigParser(cfg.SafeConfigParser):
+    def safe_get(self, section, option, f=cfg.SafeConfigParser.get, typestr=None, default=None):
+        try:
+            return f(self, section, option)
+        except cfg.NoOptionError, e:
+            if default is not None:
+                return default
+            raise LayerParsingError("Layer '%s': required parameter '%s' missing" % (section, option))
+        except ValueError, e:
+            if typestr is None:
+                raise e
+            raise LayerParsingError("Layer '%s': parameter '%s' must be %s" % (section, option, typestr))
+        
+    def safe_get_list(self, section, option, f=str, typestr='strings', default=None):
+        v = self.safe_get(section, option, default=default)
+        if type(v) == list:
+            return v
+        try:
+            return [f(x.strip()) for x in v.split(',')]
+        except:
+            raise LayerParsingError("Layer '%s': parameter '%s' must be ','-delimited list of %s" % (section, option, typestr))
+        
+    def safe_get_int(self, section, option, default=None):
+        return self.safe_get(section, option, f=cfg.SafeConfigParser.getint, typestr='int', default=default)
+        
+    def safe_get_float(self, section, option, default=None):
+        return self.safe_get(section, option, f=cfg.SafeConfigParser.getfloat, typestr='float', default=default)
+    
+    def safe_get_bool(self, section, option, default=None):
+        return self.safe_get(section, option, f=cfg.SafeConfigParser.getboolean, typestr='bool', default=default)
+    
+    def safe_get_float_list(self, section, option, default=None):
+        return self.safe_get_list(section, option, float, typestr='floats', default=default)
+    
+    def safe_get_int_list(self, section, option, default=None):
+        return self.safe_get_list(section, option, int, typestr='ints', default=default)
+    
+    def safe_get_bool_list(self, section, option, default=None):
+        return self.safe_get_list(section, option, lambda x: x.lower() in ('true', '1'), typestr='bools', default=default)
+
+# A class that implements part of the interface of MyConfigParser
+class FakeConfigParser(object):
+    def __init__(self, dic):
+        self.dic = dic
+
+    def safe_get(self, section, option, default=None):
+        if option in self.dic:
+            return self.dic[option]
+        return default
+    
+    def safe_get_int(self, section, option, default=None):
+        return int(self.safe_get(section, option, default))
+
+class LayerParser:
+    def __init__(self):
+        self.dic = {}
+        self.set_defaults()
+        
+    # Post-processing step -- this is called after all layers have been initialized
+    def optimize(self, layers):
+        self.dic['actsTarget'] = -1
+        self.dic['actsGradTarget'] = -1
+    
+    def parse_params(self, mcp, parsers, param_name, default, human_name):
+        dic, name = self.dic, self.dic['name']
+        vals = default
+        if mcp.has_section(name): # A layer with defined parameters in the parameter file
+            vals = mcp.safe_get(name, param_name, default='default')
+        elif 'src_layer' in dic and mcp.has_section(dic['src_layer']): # A detached layer
+            vals = mcp.safe_get(dic['src_layer'], param_name, default='default')
+        else: # A layer with no parameters defined in parameter file
+            pass
+
+        for p in parsers:
+            parsed = p.parse(vals)
+            if parsed: 
+                return parsed
+        raise LayerParsingError("Layer '%s': unable to parse %s %s=%s." % (name, human_name, param_name, vals))
+    
+    # Add parameters from layer parameter file
+    def add_params(self, mcp):
+        dic, name = self.dic, self.dic['name']
+        dic['quantF'] = self.parse_params(mcp, quant_parsers, 'quantF', 'default', 'forward quantization scheme')
+        dic['quantB'] = self.parse_params(mcp, quant_parsers, 'quantB', 'default', 'backward quantization scheme')
+#        print name
+#        print dic['quantF'], dic['quantB']
+    
+    def init(self, dic):
+        self.dic = dic
+        return self
+    
+    def set_defaults(self):
+        self.dic['outputs'] = 0
+        self.dic['parser'] = self
+        self.dic['requiresParams'] = False
+        # Does this layer use its own activity matrix
+        # for some purpose other than computing its output?
+        # Usually, this will only be true for layers that require their
+        # own activity matrix for gradient computations. For example, layers
+        # with logistic units must compute the gradient y * (1 - y), where y is 
+        # the activity matrix.
+        # 
+        # Layers that do not not use their own activity matrix should advertise
+        # this, since this will enable memory-saving matrix re-use optimizations.
+        #
+        # The default value of this property is True, for safety purposes.
+        # If a layer advertises that it does not use its own activity matrix when
+        # in fact it does, bad things will happen.
+        self.dic['usesActs'] = True
+        
+        # Does this layer use the activity matrices of its input layers
+        # for some purpose other than computing its output?
+        #
+        # Again true by default for safety
+        self.dic['usesInputs'] = True
+        
+        # Force this layer to use its own activity gradient matrix,
+        # instead of borrowing one from one of its inputs.
+        # 
+        # This should be true for layers where the mapping from output
+        # gradient to input gradient is non-elementwise.
+        self.dic['forceOwnActs'] = True
+        
+        # Does this layer need the gradient at all?
+        # Should only be true for layers with parameters (weights).
+        self.dic['gradConsumer'] = False
+        
+        # The gpu index on which this layer runs
+        self.dic['gpu'] = -1
+        
+    def parse(self, name, mcp, prev_layers, model=None):
+        self.prev_layers = prev_layers
+        self.dic['name'] = name
+        self.dic['type'] = mcp.safe_get(name, 'type')
+
+        return self.dic  
+
+    def verify_float_range(self, v, param_name, _min, _max):
+        self.verify_num_range(v, param_name, _min, _max, strconv=lambda x: '%.3f' % x)
+
+    def verify_num_range(self, v, param_name, _min, _max, strconv=lambda x:'%d' % x):
+        if type(v) == list:
+            for i,vv in enumerate(v):
+                self._verify_num_range(vv, param_name, _min, _max, i, strconv=strconv)
+        else:
+            self._verify_num_range(v, param_name, _min, _max, strconv=strconv)
+    
+    def _verify_num_range(self, v, param_name, _min, _max, input=-1, strconv=lambda x:'%d' % x):
+        layer_name = self.dic['name'] if input < 0 else '%s[%d]' % (self.dic['name'], input)
+        if _min is not None and _max is not None and (v < _min or v > _max):
+            raise LayerParsingError("Layer '%s': parameter '%s' must be in the range %s-%s" % (layer_name, param_name, strconv(_min), strconv(_max)))
+        elif _min is not None and v < _min:
+            raise LayerParsingError("Layer '%s': parameter '%s' must be greater than or equal to %s" % (layer_name, param_name,  strconv(_min)))
+        elif _max is not None and v > _max:
+            raise LayerParsingError("Layer '%s': parameter '%s' must be smaller than or equal to %s" % (layer_name, param_name,  strconv(_max)))
+    
+    def verify_divisible(self, value, div, value_name, div_name=None, input_idx=0):
+        layer_name = self.dic['name'] if len(self.dic['inputs']) == 0 else '%s[%d]' % (self.dic['name'], input_idx)
+        if value % div != 0:
+            raise LayerParsingError("Layer '%s': parameter '%s' must be divisible by %s" % (layer_name, value_name, str(div) if div_name is None else "'%s'" % div_name))
+        
+    def verify_str_in(self, value, param_name, lst):
+        if value not in lst:
+            raise LayerParsingError("Layer '%s': parameter '%s' must be one of %s" % (self.dic['name'], param_name, ", ".join("'%s'" % s for s in lst)))
+        
+    def verify_int_in(self, value, param_name, lst):
+        if value not in lst:
+            raise LayerParsingError("Layer '%s': parameter '%s' must be one of %s" % (self.dic['name'], param_name, ", ".join("'%d'" % s for s in lst)))
+    
+    def verify_input_dims(self, dims):
+        for i,d in enumerate(dims):
+            if d is not None and self.dic['numInputs'][i] != d: # first input must be labels
+                raise LayerParsingError("Layer '%s': dimensionality of input %d must be %d" % (self.dic['name'], i, d))
+
+    # This looks for neuron=x arguments in various layers, and creates
+    # separate layer definitions for them.
+    @staticmethod
+    def detach_neuron_layers(layers):
+        for name,l in layers.items():
+            if l['type'] != 'neuron' and 'neuron' in l and l['neuron']:
+                NeuronLayerParser().detach_neuron_layer(name, layers)
+                
+    @staticmethod
+    def parse_layers(layer_cfg_path, param_cfg_path, model, layers={}):
+        try:
+            if not os.path.exists(layer_cfg_path):
+                raise LayerParsingError("Layer definition file '%s' does not exist" % layer_cfg_path)
+            if not os.path.exists(param_cfg_path):
+                raise LayerParsingError("Layer parameter file '%s' does not exist" % param_cfg_path)
+            if len(layers) == 0:
+                mcp = MyConfigParser(dict_type=OrderedDict)
+                mcp.read([layer_cfg_path])
+                for name in mcp.sections():
+                    if not mcp.has_option(name, 'type'):
+                        raise LayerParsingError("Layer '%s': no type given" % name)
+                    ltype = mcp.safe_get(name, 'type')
+                    if ltype not in layer_parsers:
+                        raise LayerParsingError("Layer '%s': Unknown layer type: '%s'" % (name, ltype))
+                    layers[name] = layer_parsers[ltype]().parse(name, mcp, layers, model)
+                
+                LayerParser.detach_neuron_layers(layers)
+                for l in layers.values():
+                    lp = layer_parsers[l['type']]()
+                    l['parser'].optimize(layers)
+                    del l['parser']
+                    
+                for name,l in layers.items():
+                    if not l['type'].startswith('cost.'):
+                        found = max(name in l2['inputs'] for l2 in layers.values() if 'inputs' in l2)
+                        if not found:
+                            raise LayerParsingError("Layer '%s' of type '%s' is unused" % (name, l['type']))
+            
+            mcp = MyConfigParser(dict_type=OrderedDict)
+            mcp.read([param_cfg_path])
+            
+            for name,l in layers.items():
+                if not mcp.has_section(name) and l['requiresParams']:
+                    raise LayerParsingError("Layer '%s' of type '%s' requires extra parameters, but none given in file '%s'." % (name, l['type'], param_cfg_path))
+                lp = layer_parsers[l['type']]().init(l)
+                lp.add_params(mcp)
+                lp.dic['conserveMem'] = model.op.get_value('conserve_mem')
+        except LayerParsingError, e:
+            print e
+            sys.exit(1)
+        return layers
+        
+    @staticmethod
+    def register_layer_parser(ltype, cls):
+        if ltype in layer_parsers:
+            raise LayerParsingError("Layer type '%s' already registered" % ltype)
+        layer_parsers[ltype] = cls
+
+# Any layer that takes an input (i.e. non-data layer)
+class LayerWithInputParser(LayerParser):
+    def __init__(self, num_inputs=-1):
+        LayerParser.__init__(self)
+        self.num_inputs = num_inputs
+        
+    def verify_num_params(self, params):
+        for param in params:
+            if len(self.dic[param]) != len(self.dic['inputs']):
+                raise LayerParsingError("Layer '%s': %s list length does not match number of inputs" % (self.dic['name'], param))        
+    
+    def optimize(self, layers):
+        LayerParser.optimize(self, layers)
+        dic = self.dic
+        # Check if I have an input that no one else uses.
+        if not dic['forceOwnActs']:
+            for i, inp in enumerate(dic['inputLayers']):
+#                l = layers[inp]
+                if inp['outputs'] == dic['outputs'] and sum('inputs' in ll and inp in ll['inputs'] for ll in layers) == 1:
+                    # I can share my activity matrix with this layer
+                    # if it does not use its activity matrix, and I 
+                    # do not need to remember my inputs.
+                    if not inp['usesActs'] and not dic['usesInputs']:
+                        dic['actsTarget'] = i
+#                        print "Layer '%s' sharing activity matrix with layer '%s'" % (dic['name'], l['name'])
+                    # I can share my gradient matrix with this layer if we're on the same GPU.
+                    if dic['gpu'] == inp['gpu']:
+                        dic['actsGradTarget'] = i
+#                    print "Layer '%s' sharing activity gradient matrix with layer '%s'" % (dic['name'], l['name'])
+            
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = LayerParser.parse(self, name, mcp, prev_layers, model)
+        
+        dic['inputs'] = [inp.strip() for inp in mcp.safe_get(name, 'inputs').split(',')]
+
+        for inp in dic['inputs']:
+            if inp not in prev_layers:
+                raise LayerParsingError("Layer '%s': input layer '%s' not defined" % (name, inp))
+
+        dic['inputLayers'] = [prev_layers[inp] for inp in dic['inputs']]
+        for inp in dic['inputLayers']:
+            if inp['outputs'] == 0:
+                raise LayerParsingError("Layer '%s': input layer '%s' does not produce any output" % (name, inp['name']))
+        dic['numInputs'] = [inp['outputs'] for inp in dic['inputLayers']]
+        
+        # Layers can declare a neuron activation function to apply to their output, as a shortcut
+        # to avoid declaring a separate neuron layer above themselves.
+        dic['neuron'] = mcp.safe_get(name, 'neuron', default="")
+        if self.num_inputs > 0 and len(dic['numInputs']) != self.num_inputs:
+            raise LayerParsingError("Layer '%s': number of inputs must be %d", name, self.num_inputs)
+        
+        dic['gpu'] = mcp.safe_get_int(name, 'gpu', default=dic['inputLayers'][0]['gpu'])
+#        if dic['gpu'] < 0:
+#            print dic['inputLayers'][0]['name'], dic['inputLayers'][0]['gpu']
+        if model:
+            self.verify_int_in(dic['gpu'], 'gpu', range(0, model.op.get_value('num_gpus')))
+#        input_layers = [prev_layers[i] for i in dic['inputs']]
+#        dic['gradConsumer'] = any(l['gradConsumer'] for l in dic['inputLayers'])
+#        dic['usesActs'] = dic['gradConsumer'] # A conservative setting by default for layers with input
+        return dic
+    
+    def verify_img_size(self):
+        dic = self.dic
+        if dic['numInputs'][0] % dic['imgPixels'] != 0 or dic['imgSize'] * dic['imgSize'] != dic['imgPixels']:
+            raise LayerParsingError("Layer '%s': has %-d dimensional input, not interpretable as %d-channel images" % (dic['name'], dic['numInputs'][0], dic['channels']))
+    
+    @staticmethod
+    def grad_consumers_below(dic):
+        if dic['gradConsumer']:
+            return True
+        if 'inputLayers' in dic:
+            return any(LayerWithInputParser.grad_consumers_below(l) for l in dic['inputLayers'])
+        
+    def verify_no_grads(self):
+        if LayerWithInputParser.grad_consumers_below(self.dic):
+            raise LayerParsingError("Layer '%s': layers of type '%s' cannot propagate gradient and must not be placed over layers with parameters." % (self.dic['name'], self.dic['type']))
+
+class NailbedLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['forceOwnActs'] = False
+        dic['usesActs'] = False
+        dic['usesInputs'] = False
+        
+        dic['channels'] = mcp.safe_get_int(name, 'channels')
+        dic['stride'] = mcp.safe_get_int(name, 'stride')
+
+        self.verify_num_range(dic['channels'], 'channels', 1, None)
+        
+        # Computed values
+        dic['imgPixels'] = dic['numInputs'][0] / dic['channels']
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        dic['outputsX'] = (dic['imgSize'] + dic['stride'] - 1) / dic['stride']
+        dic['start'] = (dic['imgSize'] - dic['stride'] * (dic['outputsX'] - 1)) / 2
+        dic['outputs'] = dic['channels'] * dic['outputsX']**2
+        
+        self.verify_num_range(dic['outputsX'], 'outputsX', 0, None)
+        
+        self.verify_img_size()
+        
+        print "Initialized bed-of-nails layer '%s' on GPU %d, producing %dx%d %d-channel output" % (name, dic['gpu'], dic['outputsX'], dic['outputsX'], dic['channels'])
+        return dic
+    
+class GaussianBlurLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['forceOwnActs'] = False
+        dic['usesActs'] = False
+        dic['usesInputs'] = False
+        dic['outputs'] = dic['numInputs'][0]
+        
+        dic['channels'] = mcp.safe_get_int(name, 'channels')
+        dic['filterSize'] = mcp.safe_get_int(name, 'filterSize')
+        dic['stdev'] = mcp.safe_get_float(name, 'stdev')
+
+        self.verify_num_range(dic['channels'], 'channels', 1, None)
+        self.verify_int_in(dic['filterSize'], 'filterSize', [3, 5, 7, 9])
+        
+        # Computed values
+        dic['imgPixels'] = dic['numInputs'][0] / dic['channels']
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        dic['filter'] = n.array([exp(-(dic['filterSize']/2 - i)**2 / float(2 * dic['stdev']**2)) 
+                                 for i in xrange(dic['filterSize'])], dtype=n.float32).reshape(1, dic['filterSize'])
+        dic['filter'] /= dic['filter'].sum()
+        self.verify_img_size()
+        
+        if dic['filterSize'] > dic['imgSize']:
+            raise LayerParsingError("Later '%s': filter size (%d) must be smaller than image size (%d)." % (dic['name'], dic['filterSize'], dic['imgSize']))
+        
+        print "Initialized Gaussian blur layer '%s', producing %dx%d %d-channel output" % (name, dic['imgSize'], dic['imgSize'], dic['channels'])
+        
+        return dic
+    
+class HorizontalReflectionLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['outputs'] = dic['numInputs'][0]
+        dic['channels'] = mcp.safe_get_int(name, 'channels')
+  
+        self.verify_num_range(dic['channels'], 'channels', 1, 3)
+
+        # Computed values
+        dic['imgPixels'] = dic['numInputs'][0] / dic['channels']
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        self.verify_img_size()
+        
+        print "Initialized horizontal reflection layer '%s', producing %dx%d %d-channel output" % (name, dic['imgSize'], dic['imgSize'], dic['channels'])
+        
+        return dic
+    
+class ResizeLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['forceOwnActs'] = False
+        dic['usesActs'] = False
+        dic['usesInputs'] = False
+        
+        dic['channels'] = mcp.safe_get_int(name, 'channels')
+        dic['imgPixels'] = dic['numInputs'][0] / dic['channels']
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        
+        dic['scale'] = mcp.safe_get_float(name, 'scale')
+        dic['tgtSize'] = int(floor(dic['imgSize'] / dic['scale']))
+        dic['tgtPixels'] = dic['tgtSize']**2
+        self.verify_num_range(dic['channels'], 'channels', 1, None)
+        # Really not recommended to use this for such severe scalings
+        self.verify_float_range(dic['scale'], 'scale', 0.5, 2) 
+
+        dic['outputs'] = dic['channels'] * dic['tgtPixels']
+        
+        self.verify_img_size()
+        self.verify_no_grads()
+        
+        print "Initialized resize layer '%s', producing %dx%d %d-channel output" % (name, dic['tgtSize'], dic['tgtSize'], dic['channels'])
+        
+        return dic
+    
+class RandomScaleLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['forceOwnActs'] = False
+        dic['usesActs'] = False
+        dic['usesInputs'] = False
+        
+        dic['channels'] = mcp.safe_get_int(name, 'channels')
+        self.verify_num_range(dic['channels'], 'channels', 1, None)
+        
+        # Computed values
+        dic['imgPixels'] = dic['numInputs'][0] / dic['channels']
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        
+        dic['maxScale'] = mcp.safe_get_float(name, 'maxScale')
+        dic['tgtSize'] = mcp.safe_get_int(name, 'tgtSize')
+        min_size = int(floor(dic['imgSize'] / dic['maxScale']))
+        max_size = dic['imgSize'] #int(floor(dic['imgSize'] * dic['maxScale']))
+        if dic['tgtSize'] < min_size:
+            raise LayerParsingError("Layer '%s': target size must be greater than minimum image size after rescaling (%d)" % (name, min_size))
+        if dic['tgtSize'] > max_size:
+            raise LayerParsingError("Layer '%s': target size must be smaller than maximum image size after rescaling (%d)" % (name, max_size))
+        dic['tgtPixels'] = dic['tgtSize']**2
+        
+        self.verify_float_range(dic['maxScale'], 'maxScale', 1, 2) 
+
+        dic['outputs'] = dic['channels'] * dic['tgtPixels']
+        
+        self.verify_img_size()
+        self.verify_no_grads()
+        
+        print "Initialized random scale layer '%s', producing %dx%d %d-channel output" % (name, dic['tgtSize'], dic['tgtSize'], dic['channels'])
+        
+        return dic
+    
+class ColorTransformLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+    
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['forceOwnActs'] = False
+        dic['usesActs'] = False
+        dic['usesInputs'] = False
+
+        # Computed values
+        dic['imgPixels'] = dic['numInputs'][0] / 3
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        dic['channels'] = 3
+        dic['outputs'] = dic['numInputs'][0]
+        
+        self.verify_img_size()
+        self.verify_no_grads()
+        
+        return dic
+    
+class RGBToYUVLayerParser(ColorTransformLayerParser):
+    def __init__(self):
+        ColorTransformLayerParser.__init__(self)
+        
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = ColorTransformLayerParser.parse(self, name, mcp, prev_layers, model)
+        print "Initialized RGB --> YUV layer '%s', producing %dx%d %d-channel output" % (name, dic['imgSize'], dic['imgSize'], dic['channels'])
+        return dic
+    
+class RGBToLABLayerParser(ColorTransformLayerParser):
+    def __init__(self):
+        ColorTransformLayerParser.__init__(self)
+        
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = ColorTransformLayerParser.parse(self, name, mcp, prev_layers, model)
+        dic['center'] = mcp.safe_get_bool(name, 'center', default=False)
+        print "Initialized RGB --> LAB layer '%s', producing %dx%d %d-channel output" % (name, dic['imgSize'], dic['imgSize'], dic['channels'])
+        return dic
+
+class NeuronLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+    
+    @staticmethod
+    def get_unused_layer_name(layers, wish):
+        if wish not in layers:
+            return wish
+        for i in xrange(1, 100):
+            name = '%s.%d' % (wish, i)
+            if name not in layers:
+                return name
+        raise LayerParsingError("This is @#$%&!.")
+    
+    def parse_neuron(self, neuron_str):
+        for n in neuron_parsers:
+            p = n.parse(neuron_str)
+            if p: # Successfully parsed neuron, return it
+                self.dic['neuron'] = p
+                self.dic['usesActs'] = self.dic['neuron']['usesActs']
+                self.dic['usesInputs'] = self.dic['neuron']['usesInputs']
+                
+                return
+        # Could not parse neuron
+        # Print available neuron types
+        colnames = ['Neuron type', 'Function']
+        m = max(len(colnames[0]), OptionsParser._longest_value(neuron_parsers, key=lambda x:x.type)) + 2
+        ntypes = [OptionsParser._bold(colnames[0].ljust(m))] + [n.type.ljust(m) for n in neuron_parsers]
+        fnames = [OptionsParser._bold(colnames[1])] + [n.func_str for n in neuron_parsers]
+        usage_lines = NL.join(ntype + fname for ntype,fname in zip(ntypes, fnames))
+        
+        raise LayerParsingError("Layer '%s': unable to parse neuron type '%s'. Valid neuron types: %sWhere neurons have parameters, they must be floats." % (self.dic['name'], neuron_str, NL + usage_lines + NL))
+    
+    def detach_neuron_layer(self, src_name, layers):
+        dic = self.dic
+#        self.set_defaults()
+        dic['name'] = NeuronLayerParser.get_unused_layer_name(layers, '%s_neuron' % src_name)
+        dic['type'] = 'neuron'
+        dic['inputs'] = src_name
+        dic['neuron'] = layers[src_name]['neuron']
+        dic['gpu'] = layers[src_name]['gpu']
+        
+        # Yes it's not entirely correct to pass all of layers as prev_layers, but it's harmless
+        dic = self.parse(dic['name'], FakeConfigParser(dic), layers)
+        dic['src_layer'] = src_name
+        
+        # Link upper layers to this new one
+        for l in layers.values():
+            if 'inputs' in l:
+                l['inputs'] = [inp if inp != src_name else dic['name'] for inp in l['inputs']]
+                l['inputLayers'] = [inp if inp['name'] != src_name else dic for inp in l['inputLayers']]
+        layers[dic['name']] = dic
+    
+    def parse(self, name, mcp, prev_layers, model=None):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['outputs'] = dic['numInputs'][0]
+        self.parse_neuron(dic['neuron'])
+        dic['forceOwnActs'] = False
+        print "Initialized neuron layer '%s' on GPU %d, producing %d outputs" % (name, dic['gpu'], dic['outputs'])
+        return dic
+
+class EltwiseSumLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        
+        if len(set(dic['numInputs'])) != 1:
+            raise LayerParsingError("Layer '%s': all inputs must have the same dimensionality. Got dimensionalities: %s" % (name, ", ".join(str(s) for s in dic['numInputs'])))
+        dic['outputs'] = dic['numInputs'][0]
+        dic['usesInputs'] = False
+        dic['usesActs'] = False
+        dic['forceOwnActs'] = False
+        
+        dic['coeffs'] = mcp.safe_get_float_list(name, 'coeffs', default=[1.0] * len(dic['inputs']))
+        
+        print "Initialized elementwise sum layer '%s' on GPU %d, producing %d outputs" % (name, dic['gpu'], dic['outputs'])
+        return dic
+    
+class EltwiseMaxLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        if len(dic['inputs']) < 2:
+            raise LayerParsingError("Layer '%s': elementwise max layer must have at least 2 inputs, got %d." % (name, len(dic['inputs'])))
+        if len(set(dic['numInputs'])) != 1:
+            raise LayerParsingError("Layer '%s': all inputs must have the same dimensionality. Got dimensionalities: %s" % (name, ", ".join(str(s) for s in dic['numInputs'])))
+        dic['outputs'] = dic['numInputs'][0]
+
+        print "Initialized elementwise max layer '%s' on GPU %d, producing %d outputs" % (name, dic['gpu'], dic['outputs'])
+        return dic
+    
+class HiddenSexLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+        
+    def add_params(self, mcp):
+        LayerWithInputParser.add_params(self, mcp)
+        dic, name = self.dic, self.dic['name']
+        dic['enable'] = mcp.safe_get_bool(name, 'enable')
+        
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['requiresParams'] = True
+        dic['usesInputs'] = False
+        dic['usesActs'] = False
+        dic['forceOwnActs'] = False
+        dic['outputs'] = dic['numInputs'][0]
+        dic['keep'] = mcp.safe_get_float(name, 'keep')
+
+        print "Initialized hidden sex layer '%s' on GPU %d, producing %d outputs" % (name, dic['gpu'], dic['outputs'])
+        return dic
+
+class WeightLayerParser(LayerWithInputParser):
+    LAYER_PAT = re.compile(r'^\s*([^\s\[]+)(?:\[(\d+)\])?\s*$') # matches things like layername[5], etc
+    
+    def __init__(self, num_inputs=-1):
+        LayerWithInputParser.__init__(self, num_inputs=num_inputs)
+    
+    @staticmethod
+    def get_layer_name(name_str):
+        m = WeightLayerParser.LAYER_PAT.match(name_str)
+        if not m:
+            return None
+        return m.group(1), m.group(2)
+        
+        
+    def add_params(self, mcp):
+        LayerWithInputParser.add_params(self, mcp)
+        dic, name = self.dic, self.dic['name']
+        dic['epsW'] = mcp.safe_get_float_list(name, 'epsW')
+        dic['epsB'] = mcp.safe_get_float(name, 'epsB')
+        dic['momW'] = mcp.safe_get_float_list(name, 'momW')
+        dic['momB'] = mcp.safe_get_float(name, 'momB')
+        dic['superEps'] = mcp.safe_get_float(name, 'superEps', default=0.0)
+        dic['superMom'] = mcp.safe_get_float(name, 'superMom', default=0.0)
+        dic['wc'] = mcp.safe_get_float_list(name, 'wc', default=[0.0] * len(dic['inputs']))
+        dic['wball'] = mcp.safe_get_float_list(name, 'wball', default=[0.0] * len(dic['inputs']))
+        dic['wballNormed'] = [wball * nweights for wball,nweights in zip(dic['wball'], dic['weightsPerFilter'])]
+        self.verify_num_params(['epsW', 'momW', 'wc', 'wball'])
+        
+        dic['schedW'] = self.parse_params(mcp, lrs_parsers, 'schedW', 'default', 'learning rate schedule')
+        dic['schedB'] = self.parse_params(mcp, lrs_parsers, 'schedB', 'default', 'learning rate schedule')
+        #print name
+        #print dic['schedW']
+        
+        dic['gradConsumer'] = dic['epsB'] > 0 or any(w > 0 for w in dic['epsW'])
+
+    @staticmethod
+    def unshare_weights(layer, layers, matrix_idx=None):
+        def unshare(layer, layers, indices):
+            for i in indices:
+                if layer['weightSourceLayers'][i] >= 0:
+                    src_matrix_idx = layer['weightSourceMatrixIndices'][i]
+                    layer['weightSourceLayers'][i] = ""
+                    layer['weightSourceMatrixIndices'][i] = -1
+                    layer['weights'][i] = layer['weights'][i].copy()
+                    layer['weightsInc'][i] = n.zeros_like(layer['weights'][i])
+                    print "Unshared weight matrix %s[%d] from %s[%d]." % (layer['name'], i, layer['weightSourceLayers'][i], src_matrix_idx)
+                else:
+                    print "Weight matrix %s[%d] already unshared." % (layer['name'], i)
+        if 'weightSourceLayers' in layer:
+            unshare(layer, layers, range(len(layer['inputs'])) if matrix_idx is None else [matrix_idx])
+
+    # Load weight/biases initialization module
+    def call_init_func(self, param_name, shapes, input_idx=-1):
+        dic = self.dic
+        func_pat = re.compile('^([^\.]+)\.([^\(\)]+)\s*(?:\(([^,]+(?:,[^,]+)*)\))?$')
+        m = func_pat.match(dic[param_name])
+        if not m:
+            raise LayerParsingError("Layer '%s': '%s' parameter must have format 'moduleName.functionName(param1,param2,...)'; got: %s." % (dic['name'], param_name, dic['initWFunc']))
+        module, func = m.group(1), m.group(2)
+        params = m.group(3).split(',') if m.group(3) is not None else []
+        try:
+            mod = __import__(module)
+            return getattr(mod, func)(dic['name'], input_idx, shapes, params=params) if input_idx >= 0 else getattr(mod, func)(dic['name'], shapes, params=params)
+        except (ImportError, AttributeError, TypeError), e:
+            raise LayerParsingError("Layer '%s': %s." % (dic['name'], e))
+        
+    def make_weights(self, initW, rows, cols, order='C'):
+        dic = self.dic
+        dic['weights'], dic['weightsInc'] = [], []
+        if dic['initWFunc']: # Initialize weights from user-supplied python function
+            # Initialization function is supplied in the format
+            # module.func
+            for i in xrange(len(dic['inputs'])):
+                dic['weights'] += [self.call_init_func('initWFunc', (rows[i], cols[i]), input_idx=i)]
+
+                if type(dic['weights'][i]) != n.ndarray:
+                    raise LayerParsingError("Layer '%s[%d]': weight initialization function %s must return numpy.ndarray object. Got: %s." % (dic['name'], i, dic['initWFunc'], type(dic['weights'][i])))
+                if dic['weights'][i].dtype != n.float32:
+                    raise LayerParsingError("Layer '%s[%d]': weight initialization function %s must weight matrices consisting of single-precision floats. Got: %s." % (dic['name'], i, dic['initWFunc'], dic['weights'][i].dtype))
+                if dic['weights'][i].shape != (rows[i], cols[i]):
+                    raise LayerParsingError("Layer '%s[%d]': weight matrix returned by weight initialization function %s has wrong shape. Should be: %s; got: %s." % (dic['name'], i, dic['initWFunc'], (rows[i], cols[i]), dic['weights'][i].shape))
+                # Convert to desired order
+                dic['weights'][i] = n.require(dic['weights'][i], requirements=order)
+                dic['weightsInc'] += [n.zeros_like(dic['weights'][i])]
+                print "Layer '%s[%d]' initialized weight matrices from function %s" % (dic['name'], i, dic['initWFunc'])
+        else:
+            for i in xrange(len(dic['inputs'])):
+                if dic['weightSourceLayers'][i] != '': # Shared weight matrix
+                    src_layer = self.prev_layers[dic['weightSourceLayers'][i]] if dic['weightSourceLayers'][i] != dic['name'] else dic
+                    dic['weights'] += [src_layer['weights'][dic['weightSourceMatrixIndices'][i]]]
+                    dic['weightsInc'] += [src_layer['weightsInc'][dic['weightSourceMatrixIndices'][i]]]
+                    if dic['weights'][i].shape != (rows[i], cols[i]):
+                        raise LayerParsingError("Layer '%s': weight sharing source matrix '%s' has shape %dx%d; should be %dx%d." 
+                                                % (dic['name'], dic['weightSource'][i], dic['weights'][i].shape[0], dic['weights'][i].shape[1], rows[i], cols[i]))
+                    print "Layer '%s' initialized weight matrix %d from %s" % (dic['name'], i, dic['weightSource'][i])
+                else:
+                    dic['weights'] += [n.array(initW[i] * nr.randn(rows[i], cols[i]), dtype=n.single, order=order)]
+                    dic['weightsInc'] += [n.zeros_like(dic['weights'][i])]
+        
+    def make_biases(self, rows, cols, order='C'):
+        dic = self.dic
+        if dic['initBFunc']:
+            dic['biases'] = self.call_init_func('initBFunc', (rows, cols))
+            if type(dic['biases']) != n.ndarray:
+                raise LayerParsingError("Layer '%s': bias initialization function %s must return numpy.ndarray object. Got: %s." % (dic['name'], dic['initBFunc'], type(dic['biases'])))
+            if dic['biases'].dtype != n.float32:
+                raise LayerParsingError("Layer '%s': bias initialization function %s must return numpy.ndarray object consisting of single-precision floats. Got: %s." % (dic['name'], dic['initBFunc'], dic['biases'].dtype))
+            if dic['biases'].shape != (rows, cols):
+                raise LayerParsingError("Layer '%s': bias vector returned by bias initialization function %s has wrong shape. Should be: %s; got: %s." % (dic['name'], dic['initBFunc'], (rows, cols), dic['biases'].shape))
+
+            dic['biases'] = n.require(dic['biases'], requirements=order)
+            print "Layer '%s' initialized bias vector from function %s" % (dic['name'], dic['initBFunc'])
+        else:
+            dic['biases'] = dic['initB'] * n.ones((rows, cols), order=order, dtype=n.single)
+        dic['biasesInc'] = n.zeros_like(dic['biases'])
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['requiresParams'] = True
+        dic['gradConsumer'] = True
+        dic['usesActs'] = False
+        dic['initW'] = mcp.safe_get_float_list(name, 'initW', default=0.01)
+        dic['initB'] = mcp.safe_get_float(name, 'initB', default=0)
+        dic['initWFunc'] = mcp.safe_get(name, 'initWFunc', default="")
+        dic['initBFunc'] = mcp.safe_get(name, 'initBFunc', default="")
+        # Find shared weight matrices
+        
+        dic['weightSource'] = mcp.safe_get_list(name, 'weightSource', default=[''] * len(dic['inputs']))
+        self.verify_num_params(['initW', 'weightSource'])
+        
+        dic['weightSourceLayers'] = []
+        dic['weightSourceMatrixIndices'] = []
+
+        for i, src_name in enumerate(dic['weightSource']):
+            src_layer_matrix_idx = -1
+            src_layer_name = ''
+            if src_name != '':
+                src_layer_match = WeightLayerParser.get_layer_name(src_name)
+                if src_layer_match is None:
+                    raise LayerParsingError("Layer '%s': unable to parse weight sharing source '%s'. Format is layer[idx] or just layer, in which case idx=0 is used." % (name, src_name))
+                src_layer_name = src_layer_match[0]
+                src_layer_matrix_idx = int(src_layer_match[1]) if src_layer_match[1] is not None else 0
+
+                if src_layer_name not in prev_layers and src_layer_name != name:
+                    raise LayerParsingError("Layer '%s': weight sharing source layer '%s' does not exist." % (name, src_layer_name))
+                
+#                src_layer_idx = prev_names.index(src_layer_name) if src_layer_name != name else len(prev_names)
+                src_layer = prev_layers[src_layer_name] if src_layer_name != name else dic
+                if src_layer['gpu'] != dic['gpu']:
+                    raise LayerParsingError("Layer '%s': weight sharing source layer '%s' runs on GPU %d, while '%s' runs on GPU %d." % (name, src_layer_name, src_layer['gpu'], name, dic['gpu']))
+                if src_layer['type'] != dic['type']:
+                    raise LayerParsingError("Layer '%s': weight sharing source layer '%s' is of type '%s'; should be '%s'." % (name, src_layer_name, src_layer['type'], dic['type']))
+                if src_layer_name != name and len(src_layer['weights']) <= src_layer_matrix_idx:
+                    raise LayerParsingError("Layer '%s': weight sharing source layer '%s' has %d weight matrices, but '%s[%d]' requested." % (name, src_layer_name, len(src_layer['weights']), src_name, src_layer_matrix_idx))
+                if src_layer_name == name and src_layer_matrix_idx >= i:
+                    raise LayerParsingError("Layer '%s': weight sharing source '%s[%d]' not defined yet." % (name, name, src_layer_matrix_idx))
+
+            dic['weightSourceLayers'] += [src_layer_name]
+            dic['weightSourceMatrixIndices'] += [src_layer_matrix_idx]
+                
+        return dic
+        
+class FCLayerParser(WeightLayerParser):
+    def __init__(self):
+        WeightLayerParser.__init__(self)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = WeightLayerParser.parse(self, name, mcp, prev_layers, model)
+        
+        dic['outputs'] = mcp.safe_get_int(name, 'outputs')
+        dic['weightsPerFilter'] = dic['numInputs']
+        self.verify_num_range(dic['outputs'], 'outputs', 1, None)
+        self.make_weights(dic['initW'], dic['numInputs'], [dic['outputs']] * len(dic['numInputs']), order='F')
+        self.make_biases(1, dic['outputs'], order='F')
+        print "Initialized fully-connected layer '%s' on GPU %d, producing %d outputs" % (name, dic['gpu'], dic['outputs'])
+        return dic
+    
+class TreeFCLayerParser(WeightLayerParser):
+    def __init__(self):
+        WeightLayerParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = WeightLayerParser.parse(self, name, mcp, prev_layers, model)
+        meta = model.train_data_provider.batch_meta
+        num_classes = model.train_data_provider.get_num_classes()
+        dic['tree'] = [meta['tree'][i] for i in xrange(len(meta['tree']))]
+        dic['rootLabel'] = meta['all_wnids']['gproot']
+        if len(set(dic['weightSourceLayers'])) > 1 or dic['weightSourceLayers'][0] != '':
+            raise LayerParsingError("Layer '%s': weight sharing not allowed in tree-fc layers." % (name))
+        dic['outputs'] = num_classes
+        dic['weightsPerFilter'] = dic['numInputs']
+        
+        self.make_weights(dic['initW'], dic['numInputs'], [len(dic['tree'])], order='F')
+        dic['weights'][0][:,num_classes:] = 0 # Zero out non-leaf weight vectors
+        self.make_biases(1, dic['outputs'], order='F')
+        
+        print "Initialized tree-fc layer '%s' on GPU %d, producing %d outputs" % (name, dic['gpu'], dic['outputs'])
+        return dic
+
+class LocalLayerParser(WeightLayerParser):
+    def __init__(self):
+        WeightLayerParser.__init__(self)
+        
+    # Convert convolutional layer to unshared, locally-connected layer
+    @staticmethod
+    def conv_to_local(layers, idx):
+        layer = layers[idx]
+        if layer['type'] == 'conv':
+            layer['type'] = 'local'
+            for inp in xrange(len(layer['inputs'])):
+                src_layer_name = layer['weightSourceLayers'][inp]
+                if src_layer_name != '':
+                    src_layer_idx = [l['name'] for l in layers].index(src_layer_name)
+                    src_layer = layers[src_layer_idx]
+                    src_matrix_idx = layer['weightSourceMatrixIndices'][inp]
+                    LocalLayerParser.conv_to_local(layers, src_layer_idx)
+                    for w in ('weights', 'weightsInc'):
+                        layer[w][inp] = src_layer[w][src_matrix_idx]
+                else:
+                    layer['weights'][inp] = n.require(n.reshape(n.tile(n.reshape(layer['weights'][inp], (1, n.prod(layer['weights'][inp].shape))), (layer['modules'], 1)),
+                                                        (layer['modules'] * layer['filterChannels'][inp] * layer['filterPixels'][inp], layer['filters'])),
+                                                      requirements='C')
+                    layer['weightsInc'][inp] = n.zeros_like(layer['weights'][inp])
+            if layer['sharedBiases']:
+                layer['biases'] = n.require(n.repeat(layer['biases'], layer['modules'], axis=0), requirements='C')
+                layer['biasesInc'] = n.zeros_like(layer['biases'])
+            
+            print "Converted layer '%s' from convolutional to unshared, locally-connected" % layer['name']
+            
+            # Also call this function on any layers sharing my weights
+            for i, l in enumerate(layers):
+                if 'weightSourceLayers' in l and layer['name'] in l['weightSourceLayers']:
+                    LocalLayerParser.conv_to_local(layers, i)
+        return layer
+        
+    # Returns (groups, filterChannels) array that represents the set
+    # of image channels to which each group is connected
+    def gen_rand_conns(self, groups, channels, filterChannels, inputIdx):
+        dic = self.dic
+        overSample = groups * filterChannels / channels
+        filterConns = [x for i in xrange(overSample) for x in nr.permutation(range(channels))]
+        
+        if dic['initCFunc']: # Initialize connectivity from outside source
+            filterConns = self.call_init_func('initCFunc', (groups, channels, filterChannels), input_idx=inputIdx)
+            if len(filterConns) != overSample * channels:
+                raise LayerParsingError("Layer '%s[%d]': random connectivity initialization function %s must return list of length <groups> * <filterChannels> = %d; got: %d" % (dic['name'], inputIdx, dic['initCFunc'], len(filterConns)))
+            if any(c not in range(channels) for c in filterConns):
+                raise LayerParsingError("Layer '%s[%d]': random connectivity initialization function %s must return list of channel indices in the range 0-<channels-1> = 0-%d." % (dic['name'], inputIdx, dic['initCFunc'], channels-1))
+            # Every "channels" sub-slice should be a permutation of range(channels)
+            if any(len(set(c)) != len(c) for c in [filterConns[o*channels:(o+1)*channels] for o in xrange(overSample)]):
+                raise LayerParsingError("Layer '%s[%d]': random connectivity initialization function %s must return list of channel indices such that every non-overlapping sub-list of <channels> = %d elements is a permutation of the integers 0-<channels-1> = 0-%d." % (dic['name'], inputIdx, dic['initCFunc'], channels, channels-1))
+
+        elif dic['weightSourceLayers'][inputIdx] != '': # Shared weight matrix
+            
+            src_layer = self.prev_layers[dic['weightSourceLayers'][inputIdx]] if dic['weightSourceLayers'][inputIdx] != dic['name'] else dic
+            src_inp = dic['weightSourceMatrixIndices'][inputIdx]
+            if 'randSparse' not in src_layer or not src_layer['randSparse']:
+                raise LayerParsingError("Layer '%s[%d]': randSparse is true in this layer but false in weight sharing source layer '%s[%d]'." % (dic['name'], inputIdx, src_layer['name'], src_inp))
+            if (groups, channels, filterChannels) != (src_layer['groups'][src_inp], src_layer['channels'][src_inp], src_layer['filterChannels'][src_inp]):
+                raise LayerParsingError("Layer '%s[%d]': groups, channels, filterChannels set to %d, %d, %d, respectively. Does not match setting in weight sharing source layer '%s[%d]': %d, %d, %d." % (dic['name'], inputIdx, groups, channels, filterChannels, src_layer['name'], src_inp, src_layer['groups'][src_inp], src_layer['channels'][src_inp], src_layer['filterChannels'][src_inp]))
+            filterConns = src_layer['filterConns'][src_inp]
+        return filterConns
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = WeightLayerParser.parse(self, name, mcp, prev_layers, model)
+        dic['requiresParams'] = True
+        dic['usesActs'] = False
+        # Supplied values
+        dic['channels'] = mcp.safe_get_int_list(name, 'channels')
+        dic['padding'] = mcp.safe_get_int_list(name, 'padding', default=[0]*len(dic['inputs']))
+        dic['stride'] = mcp.safe_get_int_list(name, 'stride', default=[1]*len(dic['inputs']))
+        dic['filterSize'] = mcp.safe_get_int_list(name, 'filterSize')
+        dic['filters'] = mcp.safe_get_int_list(name, 'filters')
+        dic['groups'] = mcp.safe_get_int_list(name, 'groups', default=[1]*len(dic['inputs']))
+        dic['randSparse'] = mcp.safe_get_bool_list(name, 'randSparse', default=[False]*len(dic['inputs']))
+        dic['initW'] = mcp.safe_get_float_list(name, 'initW')
+        dic['initCFunc'] = mcp.safe_get(name, 'initCFunc', default='')
+        
+        self.verify_num_params(['channels', 'padding', 'stride', 'filterSize', \
+                                'filters', 'groups', 'randSparse', 'initW'])
+        
+        self.verify_num_range(dic['stride'], 'stride', 1, None)
+        self.verify_num_range(dic['filterSize'],'filterSize', 1, None)  
+        self.verify_num_range(dic['padding'], 'padding', 0, None)
+        self.verify_num_range(dic['channels'], 'channels', 1, None)
+        self.verify_num_range(dic['groups'], 'groups', 1, None)
+        
+        # Computed values
+        dic['imgPixels'] = [numInputs/channels for numInputs,channels in zip(dic['numInputs'], dic['channels'])]
+        dic['imgSize'] = [int(n.sqrt(imgPixels)) for imgPixels in dic['imgPixels']]
+        self.verify_num_range(dic['imgSize'], 'imgSize', 1, None)
+        dic['filters'] = [filters*groups for filters,groups in zip(dic['filters'], dic['groups'])]
+        dic['filterPixels'] = [filterSize**2 for filterSize in dic['filterSize']]
+        dic['modulesX'] = [1 + int(ceil((2 * padding + imgSize - filterSize) / float(stride))) for padding,imgSize,filterSize,stride in zip(dic['padding'], dic['imgSize'], dic['filterSize'], dic['stride'])]
+
+        dic['filterChannels'] = [channels/groups for channels,groups in zip(dic['channels'], dic['groups'])]
+        if max(dic['randSparse']): # When randSparse is turned on for any input, filterChannels must be given for all of them
+            dic['filterChannels'] = mcp.safe_get_int_list(name, 'filterChannels', default=dic['filterChannels'])
+            self.verify_num_params(['filterChannels'])
+        
+        if len(set(dic['modulesX'])) != 1 or len(set(dic['filters'])) != 1:
+            raise LayerParsingError("Layer '%s': all inputs must produce equally-dimensioned output. Dimensions are: %s." % (name, ", ".join("%dx%dx%d" % (filters, modulesX, modulesX) for filters,modulesX in zip(dic['filters'], dic['modulesX']))))
+
+        dic['modulesX'] = dic['modulesX'][0]
+        dic['modules'] = dic['modulesX']**2
+        dic['filters'] = dic['filters'][0]
+        dic['outputs'] = dic['modules'] * dic['filters']
+        dic['filterConns'] = [[]] * len(dic['inputs'])
+        for i in xrange(len(dic['inputs'])):
+            if dic['numInputs'][i] % dic['imgPixels'][i] != 0 or dic['imgSize'][i] * dic['imgSize'][i] != dic['imgPixels'][i]:
+                raise LayerParsingError("Layer '%s[%d]': has %-d dimensional input, not interpretable as square %d-channel images" % (name, i, dic['numInputs'][i], dic['channels'][i]))
+            if dic['channels'][i] > 3 and dic['channels'][i] % 4 != 0:
+                raise LayerParsingError("Layer '%s[%d]': number of channels must be smaller than 4 or divisible by 4" % (name, i))
+            if dic['filterSize'][i] > 2 * dic['padding'][i] + dic['imgSize'][i]:
+                raise LayerParsingError("Layer '%s[%d]': filter size (%d) greater than image size + 2 * padding (%d)" % (name, i, dic['filterSize'][i], 2 * dic['padding'][i] + dic['imgSize'][i]))
+        
+            if dic['randSparse'][i]: # Random sparse connectivity requires some extra checks
+                if dic['groups'][i] == 1:
+                    raise LayerParsingError("Layer '%s[%d]': number of groups must be greater than 1 when using random sparse connectivity" % (name, i))
+                self.verify_divisible(dic['channels'][i], dic['filterChannels'][i], 'channels', 'filterChannels', input_idx=i)
+                self.verify_divisible(dic['filterChannels'][i], 4, 'filterChannels', input_idx=i)
+                self.verify_divisible( dic['groups'][i]*dic['filterChannels'][i], dic['channels'][i], 'groups * filterChannels', 'channels', input_idx=i)
+                dic['filterConns'][i] = self.gen_rand_conns(dic['groups'][i], dic['channels'][i], dic['filterChannels'][i], i)
+            else:
+                if dic['groups'][i] > 1:
+                    self.verify_divisible(dic['channels'][i], 4*dic['groups'][i], 'channels', '4 * groups', input_idx=i)
+                self.verify_divisible(dic['channels'][i], dic['groups'][i], 'channels', 'groups', input_idx=i)
+
+            self.verify_divisible(dic['filters'], 16*dic['groups'][i], 'filters * groups', input_idx=i)
+        
+            dic['padding'][i] = -dic['padding'][i]
+        dic['overSample'] = [groups*filterChannels/channels for groups,filterChannels,channels in zip(dic['groups'], dic['filterChannels'], dic['channels'])]
+        dic['weightsPerFilter'] = [fc * (fz**2) for fc, fz in zip(dic['filterChannels'], dic['filterSize'])]
+        
+        return dic    
+
+class ConvLayerParser(LocalLayerParser):
+    def __init__(self):
+        LocalLayerParser.__init__(self)
+        
+    def add_params(self, mcp):
+        LocalLayerParser.add_params(self, mcp)
+        self.dic['wcNormMax'] = mcp.safe_get_float_list(self.dic['name'], 'wcNormMax', default=[0.0] * len(self.dic['inputs']))
+        self.dic['wcNormMin'] = mcp.safe_get_float_list(self.dic['name'], 'wcNormMin', default=[0.0] * len(self.dic['inputs']))
+        self.verify_num_params(['wcNormMax', 'wcNormMin'])
+        for min,max in zip(self.dic['wcNormMin'], self.dic['wcNormMax']):
+            if min > max:
+                raise LayerParsingError("Layer '%s': wcNormMin must be <= wcNormMax." % (self.dic['name']))
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LocalLayerParser.parse(self, name, mcp, prev_layers, model)
+        
+        dic['partialSum'] = mcp.safe_get_int(name, 'partialSum')
+        dic['sharedBiases'] = mcp.safe_get_bool(name, 'sharedBiases', default=True)
+        
+        if dic['partialSum'] != 0 and dic['modules'] % dic['partialSum'] != 0:
+            raise LayerParsingError("Layer '%s': convolutional layer produces %dx%d=%d outputs per filter, but given partialSum parameter (%d) does not divide this number" % (name, dic['modulesX'], dic['modulesX'], dic['modules'], dic['partialSum']))
+
+        num_biases = dic['filters'] if dic['sharedBiases'] else dic['modules']*dic['filters']
+
+        eltmult = lambda list1, list2: [l1 * l2 for l1,l2 in zip(list1, list2)]
+        self.make_weights(dic['initW'], eltmult(dic['filterPixels'], dic['filterChannels']), [dic['filters']] * len(dic['inputs']), order='C')
+        self.make_biases(num_biases, 1, order='C')
+
+        print "Initialized convolutional layer '%s' on GPU %d, producing %dx%d %d-channel output" % (name, dic['gpu'], dic['modulesX'], dic['modulesX'], dic['filters'])
+        return dic    
+    
+class LocalUnsharedLayerParser(LocalLayerParser):
+    def __init__(self):
+        LocalLayerParser.__init__(self)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LocalLayerParser.parse(self, name, mcp, prev_layers, model)
+        eltmult = lambda list1, list2: [l1 * l2 for l1,l2 in zip(list1, list2)]
+        scmult = lambda x, lst: [x * l for l in lst]
+        self.make_weights(dic['initW'], scmult(dic['modules'], eltmult(dic['filterPixels'], dic['filterChannels'])), [dic['filters']] * len(dic['inputs']), order='C')
+        self.make_biases(dic['modules'] * dic['filters'], 1, order='C')
+        
+        print "Initialized locally-connected layer '%s' on GPU %d, producing %dx%d %d-channel output" % (name, dic['gpu'], dic['modulesX'], dic['modulesX'], dic['filters'])
+        return dic  
+    
+class DataLayerParser(LayerParser):
+    def __init__(self):
+        LayerParser.__init__(self)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerParser.parse(self, name, mcp, prev_layers, model)
+        dic['dataIdx'] = mcp.safe_get_int(name, 'dataIdx')
+        dic['outputs'] = model.train_data_provider.get_data_dims(idx=dic['dataIdx'])
+        
+        print "Initialized data layer '%s', producing %d outputs" % (name, dic['outputs'])
+        return dic
+
+class SoftmaxLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['outputs'] = dic['inputLayers'][0]['outputs']
+        print "Initialized softmax layer '%s' on GPU %d, producing %d outputs" % (name, dic['gpu'], dic['outputs'])
+        return dic
+    
+class ConcatentionLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['outputs'] = sum(l['outputs'] for l in dic['inputLayers'])
+        dic['copyOffsets'] = [sum(dic['inputLayers'][j]['outputs'] for j in xrange(i)) for i in xrange(len(dic['inputLayers']))]
+        print "Initialized concatenation layer '%s' on GPU %d, producing %d outputs" % (name, dic['gpu'], dic['outputs'])
+        return dic
+
+class PoolLayerParser(LayerWithInputParser):
+    def __init__(self):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+    
+    def add_params(self, mcp):
+        LayerWithInputParser.add_params(self, mcp)
+        dic, name = self.dic, self.dic['name']
+        if dic['pool'] == 'rand':
+            dic['doMax'] = mcp.safe_get_bool(name, 'doMax', default=False)
+    
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['channels'] = mcp.safe_get_int(name, 'channels')
+        dic['sizeX'] = mcp.safe_get_int(name, 'sizeX')
+        dic['start'] = mcp.safe_get_int(name, 'start', default=0)
+        dic['stride'] = mcp.safe_get_int(name, 'stride')
+        dic['outputsX'] = mcp.safe_get_int(name, 'outputsX', default=0)
+        dic['pool'] = mcp.safe_get(name, 'pool')
+        dic['requiresParams'] = dic['pool'] == 'rand'
+        
+        # Avg pooler does not use its acts or inputs
+        dic['usesActs'] = 'pool' != 'avg'
+        dic['usesInputs'] = 'pool' != 'avg'
+        
+        dic['imgPixels'] = dic['numInputs'][0] / dic['channels']
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        
+        self.verify_num_range(dic['sizeX'], 'sizeX', 1, dic['imgSize'])
+        self.verify_num_range(dic['stride'], 'stride', 1, dic['sizeX'])
+        self.verify_num_range(dic['outputsX'], 'outputsX', 0, None)
+        self.verify_num_range(dic['channels'], 'channels', 1, None)
+        
+        if LayerWithInputParser.grad_consumers_below(dic):
+            self.verify_divisible(dic['channels'], 16, 'channels')
+        self.verify_str_in(dic['pool'], 'pool', ['max', 'maxabs', 'avg', 'rand'])
+        
+        self.verify_img_size()
+
+        if dic['outputsX'] <= 0:
+            dic['outputsX'] = int(ceil((dic['imgSize'] - dic['start'] - dic['sizeX']) / float(dic['stride']))) + 1;
+        dic['outputs'] = dic['outputsX']**2 * dic['channels']
+        
+        print "Initialized %s-pooling layer '%s' on GPU %d, producing %dx%d %d-channel output" % (dic['pool'], name, dic['gpu'], dic['outputsX'], dic['outputsX'], dic['channels'])
+        return dic
+    
+class NormLayerParser(LayerWithInputParser):
+    RESPONSE_NORM = 'response'
+    CONTRAST_NORM = 'contrast'
+    CROSSMAP_RESPONSE_NORM = 'cross-map response'
+    
+    def __init__(self, norm_type):
+        LayerWithInputParser.__init__(self, num_inputs=1)
+        self.norm_type = norm_type
+        
+    def add_params(self, mcp):
+        LayerWithInputParser.add_params(self, mcp)
+        dic, name = self.dic, self.dic['name']
+        dic['scale'] = mcp.safe_get_float(name, 'scale')
+        dic['scale'] /= dic['size'] if self.norm_type == self.CROSSMAP_RESPONSE_NORM else dic['size']**2
+        dic['pow'] = mcp.safe_get_float(name, 'pow')
+        if self.norm_type == self.CROSSMAP_RESPONSE_NORM:
+            dic['minDiv'] = mcp.safe_get_float(name, 'minDiv', default=1.0)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['requiresParams'] = True
+        dic['channels'] = mcp.safe_get_int(name, 'channels')
+        dic['size'] = mcp.safe_get_int(name, 'size')
+        dic['blocked'] = mcp.safe_get_bool(name, 'blocked', default=False)
+        
+        dic['imgPixels'] = dic['numInputs'][0] / dic['channels']
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        
+        # Contrast normalization layer does not use its inputs
+        dic['usesInputs'] = self.norm_type != self.CONTRAST_NORM
+        
+        self.verify_num_range(dic['channels'], 'channels', 1, None)
+        if self.norm_type == self.CROSSMAP_RESPONSE_NORM: 
+            self.verify_num_range(dic['size'], 'size', 2, dic['channels'])
+            if dic['channels'] % 16 != 0:
+                raise LayerParsingError("Layer '%s': number of channels must be divisible by 16 when using crossMap" % name)
+        else:
+            self.verify_num_range(dic['size'], 'size', 1, dic['imgSize'])
+        
+        if self.norm_type != self.CROSSMAP_RESPONSE_NORM and dic['channels'] > 3 and dic['channels'] % 4 != 0:
+            raise LayerParsingError("Layer '%s': number of channels must be smaller than 4 or divisible by 4" % name)
+
+        self.verify_img_size()
+
+        dic['outputs'] = dic['imgPixels'] * dic['channels']
+        print "Initialized %s-normalization layer '%s' on GPU %d, producing %dx%d %d-channel output" % (self.norm_type, name, dic['gpu'], dic['imgSize'], dic['imgSize'], dic['channels'])
+        return dic
+
+class CostParser(LayerWithInputParser):
+    def __init__(self, num_inputs=-1):
+        LayerWithInputParser.__init__(self, num_inputs=num_inputs)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = LayerWithInputParser.parse(self, name, mcp, prev_layers, model)
+        dic['requiresParams'] = True
+        del dic['neuron']
+        return dic
+
+    def add_params(self, mcp):
+        LayerWithInputParser.add_params(self, mcp)
+        dic, name = self.dic, self.dic['name']
+        dic['coeff'] = mcp.safe_get_float(name, 'coeff')
+            
+class CrossEntCostParser(CostParser):
+    def __init__(self):
+        CostParser.__init__(self, num_inputs=2)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = CostParser.parse(self, name, mcp, prev_layers, model)
+        if dic['numInputs'][0] != model.train_data_provider.get_num_classes(): # first input must be labels
+            raise LayerParsingError("Layer '%s': Dimensionality of first input must be equal to number of labels" % name)
+        if dic['inputLayers'][1]['type'] != 'softmax':
+            raise LayerParsingError("Layer '%s': Second input must be softmax layer" % name)
+        if dic['numInputs'][1] != model.train_data_provider.get_num_classes():
+            raise LayerParsingError("Layer '%s': Softmax input '%s' must produce %d outputs, because that is the number of classes in the dataset" \
+                                    % (name, dic['inputs'][1], model.train_data_provider.get_num_classes()))
+        
+        print "Initialized cross-entropy cost '%s' on GPU %d" % (name, dic['gpu'])
+        return dic
+    
+class LogregCostParser(CostParser):
+    def __init__(self):
+        CostParser.__init__(self, num_inputs=2)
+        
+    def add_params(self, mcp):
+        CostParser.add_params(self, mcp)
+        dic, name = self.dic, self.dic['name']
+        dic['topk'] = mcp.safe_get_int(name, 'topk', default=1)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = CostParser.parse(self, name, mcp, prev_layers, model)
+        dic['requiresParams'] = True
+        if dic['numInputs'][0] != 1: # first input must be labels
+            raise LayerParsingError("Layer '%s': dimensionality of first input must be 1" % name)
+        if dic['inputLayers'][1]['type'] != 'softmax':
+            raise LayerParsingError("Layer '%s': second input must be softmax layer" % name)
+        if dic['numInputs'][1] != model.train_data_provider.get_num_classes():
+            raise LayerParsingError("Layer '%s': softmax input '%s' must produce %d outputs, because that is the number of classes in the dataset" \
+                                    % (name, dic['inputs'][1], model.train_data_provider.get_num_classes()))
+        
+        print "Initialized logistic regression cost '%s' on GPU %d" % (name, dic['gpu'])
+        return dic
+    
+class FlickrBaseCost(CostParser):
+    def __init__(self, cost_name):
+        CostParser.__init__(self, num_inputs=2)
+        self.cost_name = cost_name
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = CostParser.parse(self, name, mcp, prev_layers, model)
+        for i in xrange(2):
+            if dic['numInputs'][i] != model.train_data_provider.get_num_classes():
+                raise LayerParsingError("Layer '%s': input '%s' must produce %d outputs, because that is the number of classes in the dataset" \
+                                        % (name, dic['inputs'][i], model.train_data_provider.get_num_classes()))
+        if 'neuron' not in dic['inputLayers'][1] or dic['inputLayers'][1]['neuron'] != 'logistic':
+            print "WARNING: Layer '%s': input '%s' is not logistic, results may not be what you intend." % (dic['name'], dic['inputs'][1])
+        print "Initialized %s cost '%s' on GPU %d" % (self.cost_name, name, dic['gpu'])
+        return dic
+    
+class CrossEnt2CostParser(FlickrBaseCost):
+    def __init__(self):
+        FlickrBaseCost.__init__(self, "elementwise cross-entropy")
+    
+class RobustFlickrCost(FlickrBaseCost):
+    def __init__(self):
+        FlickrBaseCost.__init__(self, "robust Flickr")
+
+    
+class MultiSoftmaxCostParser(CostParser):
+    def __init__(self):
+        CostParser.__init__(self, num_inputs=2)
+        
+    def add_params(self, mcp):
+        CostParser.add_params(self, mcp)
+        dic, name = self.dic, self.dic['name']
+        dic['setSize'] = mcp.safe_get_int(name, 'setSize')
+        dic['threads'] = mcp.safe_get_int(name, 'threads')
+        self.verify_num_range(dic['setSize'], 'setSize', 1, dic['numOut'] - 1, '%d')
+        self.verify_num_range(dic['threads'], 'threads', 1, 32, '%d')
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = CostParser.parse(self, name, mcp, prev_layers, model)
+        if dic['numInputs'][0] != 1: # first input must be labels
+            raise LayerParsingError("Layer '%s': dimensionality of first input must be 1" % name)
+        if dic['numInputs'][1] != model.train_data_provider.get_num_classes():
+            raise LayerParsingError("Layer '%s': input '%s' must produce %d outputs, because that is the number of classes in the dataset" \
+                                    % (name, dic['inputs'][1], model.train_data_provider.get_num_classes()))
+
+        dic['numOut'] = dic['numInputs'][1]
+        
+        print "Initialized multi-softmax cost '%s' on GPU %d" % (name, dic['gpu'])
+        return dic
+        
+class SumOfSquaresCostParser(CostParser):
+    def __init__(self):
+        CostParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = CostParser.parse(self, name, mcp, prev_layers, model)
+        print "Initialized sum-of-squares cost '%s' on GPU %d" % (name, dic['gpu'])
+        return dic
+    
+class GatedSumOfSquaresCostParser(CostParser):
+    def __init__(self):
+        CostParser.__init__(self, num_inputs=2)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = CostParser.parse(self, name, mcp, prev_layers, model)
+
+        self.verify_input_dims([1, None]) # First input is gate
+        
+        print "Initialized gated sum-of-squares cost '%s' on GPU %d" % (name, dic['gpu'])
+        return dic
+    
+class TICACostParser(CostParser):
+    def __init__(self):
+        CostParser.__init__(self, num_inputs=1)
+        
+    def parse(self, name, mcp, prev_layers, model):
+        dic = CostParser.parse(self, name, mcp, prev_layers, model)
+
+        dic['channels'] = mcp.safe_get_int(name, 'channels')
+        dic['sizeX'] = mcp.safe_get_int(name, 'sizeX')
+        
+        dic['imgPixels'] = dic['numInputs'][0] / dic['channels']
+        dic['imgSize'] = int(n.sqrt(dic['imgPixels']))
+        
+        self.verify_img_size()
+
+        print "Initialized TICA cost '%s' on GPU %d" % (name, dic['gpu'])
+        return dic
+
+# All the layer parsers
+layer_parsers = {'data': lambda : DataLayerParser(),
+                 'fc': lambda : FCLayerParser(),
+                 'treefc': lambda : TreeFCLayerParser(),
+                 'conv': lambda : ConvLayerParser(),
+                 'local': lambda : LocalUnsharedLayerParser(),
+                 'softmax': lambda : SoftmaxLayerParser(),
+                 'eltsum': lambda : EltwiseSumLayerParser(),
+                 'eltmax': lambda : EltwiseMaxLayerParser(),
+                 'neuron': lambda : NeuronLayerParser(),
+                 'pool': lambda : PoolLayerParser(),
+                 'rnorm': lambda : NormLayerParser(NormLayerParser.RESPONSE_NORM),
+                 'cnorm': lambda : NormLayerParser(NormLayerParser.CONTRAST_NORM),
+                 'cmrnorm': lambda : NormLayerParser(NormLayerParser.CROSSMAP_RESPONSE_NORM),
+                 'nailbed': lambda : NailbedLayerParser(),
+                 'blur': lambda : GaussianBlurLayerParser(),
+                 'href': lambda : HorizontalReflectionLayerParser(),
+                 'resize': lambda : ResizeLayerParser(),
+                 'rgb2yuv': lambda : RGBToYUVLayerParser(),
+                 'rgb2lab': lambda : RGBToLABLayerParser(),
+                 'rscale': lambda : RandomScaleLayerParser(),
+                 'concat': lambda : ConcatentionLayerParser(),
+                 'hs': lambda : HiddenSexLayerParser(),
+                 'cost.logreg': lambda : LogregCostParser(),
+                 'cost.msm': lambda : MultiSoftmaxCostParser(),
+                 'cost.crossent': lambda : CrossEntCostParser(),
+                 'cost.crossent2': lambda : CrossEnt2CostParser(),
+                 'cost.sum2': lambda : SumOfSquaresCostParser(),
+                 'cost.gsum2': lambda : GatedSumOfSquaresCostParser(),
+                 'cost.tica': lambda : TICACostParser(),
+                 'cost.rflickr': lambda : RobustFlickrCost()}
+ 
+# All the neuron parsers
+# This isn't a name --> parser mapping as the layer parsers above because neurons don't have fixed names.
+# A user may write tanh[0.5,0.25], etc.
+neuron_parsers = sorted([NeuronParser('ident', 'f(x) = x', uses_acts=False, uses_inputs=False),
+                         NeuronParser('logistic', 'f(x) = 1 / (1 + e^-x)', uses_acts=True, uses_inputs=False),
+                         NeuronParser('abs', 'f(x) = |x|', uses_acts=False, uses_inputs=True),
+                         NeuronParser('relu', 'f(x) = max(0, x)', uses_acts=True, uses_inputs=False),
+                         NeuronParser('nrelu', 'f(x) = max(0, x) + noise', uses_acts=True, uses_inputs=False),
+                         NeuronParser('softrelu', 'f(x) = log(1 + e^x)', uses_acts=True, uses_inputs=False),
+                         NeuronParser('square', 'f(x) = x^2', uses_acts=False, uses_inputs=True),
+                         NeuronParser('sqrt', 'f(x) = sqrt(x)', uses_acts=True, uses_inputs=False),
+                         ParamNeuronParser('tanh[a,b]', 'f(x) = a * tanh(b * x)', uses_acts=True, uses_inputs=False),
+                         ParamNeuronParser('brelu[a]', 'f(x) = min(a, max(0, x))', uses_acts=True, uses_inputs=False),
+                         ParamNeuronParser('linear[a,b]', 'f(x) = a * x + b', uses_acts=True, uses_inputs=False),
+                         ParamNeuronParser('drelu[a]', 'f(x) = x - a * tanh(x / a)', uses_acts=False, uses_inputs=True)],
+                        key=lambda x:x.type)
+
+lrs_parsers = sorted([ParamParser('default'),
+                      ParamParser('linear[ftgtFactor,fnoiseStdev]'),
+                      ParamParser('exp[ftgtFactor,fnoiseStdev]'),
+                      ParamParser('dexp[ftgtFactor,fnoiseStdev,inumSteps]'),
+                      ParamParser('jdexp[ftgtFactor,fnoiseStdev,inumSteps]')])
+
+quant_parsers = sorted([ParamParser('default'),
+                        ParamParser('half')])
diff --git a/layers-cifar/layer-params-18pct-noisylr.cfg b/layers-cifar/layer-params-18pct-noisylr.cfg
new file mode 100644
index 0000000..abebece
--- /dev/null
+++ b/layers-cifar/layer-params-18pct-noisylr.cfg
@@ -0,0 +1,47 @@
+# 18% error on CIFAR-10 in 20 minutes - layer definition file 
+
+# Reduce all learning rates by factor of 10 after 120 epochs.
+# Then another factor of 10 after 10 more epochs.
+
+[conv1]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+schedW=linear[1,1]
+
+[conv2]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+schedW=linear[1,1]
+
+[conv3]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+schedW=linear[1,1]
+
+[fc10]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=1
+schedW=linear[1,1]
+
+[logprob]
+coeff=1
+
+[rnorm1]
+scale=0.00005
+pow=.75
+
+[rnorm2]
+scale=0.00005
+pow=.75
diff --git a/layers-cifar/layer-params-conv-local-13pct-noisylr.cfg b/layers-cifar/layer-params-conv-local-13pct-noisylr.cfg
new file mode 100644
index 0000000..7a4e562
--- /dev/null
+++ b/layers-cifar/layer-params-conv-local-13pct-noisylr.cfg
@@ -0,0 +1,45 @@
+# 13% error on CIFAR-10 - layer parameter file 
+# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
+
+[conv1]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.00
+schedW=linear[1,1]
+
+[conv2]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.00
+schedW=linear[1,1]
+
+[local3]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+schedW=linear[1,1]
+
+[local4]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+schedW=linear[1,1]
+
+[fc10]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.004
+schedW=linear[1,1]
+
+[logprob]
+coeff=1
diff --git a/layers-cifar/layers-18pct.cfg b/layers-cifar/layers-18pct.cfg
new file mode 100644
index 0000000..50d8869
--- /dev/null
+++ b/layers-cifar/layers-18pct.cfg
@@ -0,0 +1,106 @@
+# 18% error on CIFAR-10 in 20 minutes - layer definition file 
+
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=2
+stride=1
+filterSize=5
+initW=0.0001
+partialSum=4
+sharedBiases=1
+gpu=0
+
+[pool1]
+type=pool
+pool=max
+inputs=conv1
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=32
+neuron=relu
+
+[rnorm1]
+type=rnorm
+inputs=pool1
+channels=32
+size=3
+
+[conv2]
+type=conv
+inputs=rnorm1
+filters=32
+padding=2
+stride=1
+filterSize=5
+channels=32
+neuron=relu
+initW=0.01
+partialSum=4
+sharedBiases=1
+
+[pool2]
+type=pool
+pool=avg
+inputs=conv2
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=32
+
+[rnorm2]
+type=rnorm
+inputs=pool2
+channels=32
+size=3
+
+[conv3]
+type=conv
+inputs=rnorm2
+filters=64
+padding=2
+stride=1
+filterSize=5
+channels=32
+neuron=relu
+initW=0.01
+partialSum=4
+sharedBiases=1
+
+[pool3]
+type=pool
+pool=avg
+inputs=conv3
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[fc10]
+type=fc
+outputs=10
+inputs=pool3
+initW=0.01
+
+[probs]
+type=softmax
+inputs=fc10
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers-cifar/layers-conv-local-13pct.cfg b/layers-cifar/layers-conv-local-13pct.cfg
new file mode 100644
index 0000000..996aa0c
--- /dev/null
+++ b/layers-cifar/layers-conv-local-13pct.cfg
@@ -0,0 +1,95 @@
+# 13% error on CIFAR-10 in 20 minutes - layer definition file 
+# See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
+
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1]
+type=conv
+inputs=data
+channels=3
+filters=64
+padding=2
+stride=1
+filterSize=5
+neuron=relu
+initW=0.0001
+partialSum=4
+sharedBiases=1
+gpu=0
+
+[pool1]
+type=pool
+pool=max
+inputs=conv1
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[conv2]
+type=conv
+inputs=pool1
+filters=64
+padding=2
+stride=1
+filterSize=5
+channels=64
+neuron=relu
+initW=0.01
+partialSum=8
+sharedBiases=1
+
+[pool2]
+type=pool
+pool=max
+inputs=conv2
+start=0
+sizeX=3
+stride=2
+outputsX=0
+channels=64
+
+[local3]
+type=local
+inputs=pool2
+filters=32
+padding=1
+stride=1
+filterSize=3
+channels=64
+neuron=relu
+initW=0.04
+
+[local4]
+type=local
+inputs=local3
+filters=32
+padding=1
+stride=1
+filterSize=3
+channels=32
+neuron=relu
+initW=0.04
+
+[fc10]
+type=fc
+outputs=10
+inputs=local4
+initW=0.01
+neuron=ident
+
+[probs]
+type=softmax
+inputs=fc10
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layer-params-100.cfg b/layers/layer-params-100.cfg
new file mode 100644
index 0000000..240e8f6
--- /dev/null
+++ b/layers/layer-params-100.cfg
@@ -0,0 +1,157 @@
+[conv1a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv1b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv2a]
+epsW=0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[conv2b]
+epsW=0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.001
+pow=0.25
+
+[rnorm1b]
+scale=0.001
+pow=0.25
+
+[rnorm2a]
+scale=0.001
+pow=0.25
+
+[rnorm2b]
+scale=0.001
+pow=0.25
+
+# on guppy7
+# this is like #97 (on gpu) but with different rnorm coeffs
+# /nobackup/kriz/tmp/ConvNet__2012-06-27_14.03.18
+# epoch 15: set epsw to 0.001 from 0.01
+# epoch 43: killed, seems slightly worse than using my old rnorm coeffs
diff --git a/layers/layer-params-106.cfg b/layers/layer-params-106.cfg
new file mode 100644
index 0000000..98daf67
--- /dev/null
+++ b/layers/layer-params-106.cfg
@@ -0,0 +1,184 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #101 but with contrast normalization layers over rnorm2
+# on guppy7
+# logs/layers-106.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-07_21.11.34
+# epoch 22: set epsw to 0.001 from 0.01
+# epoch 31: killed, turns out weight contrast normalization is better
+
+# restart after fixing cnorm
+# on guppy9
+# logs/layers-106a.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-17_19.06.09
+# epoch 21: set epsw to 0.001 from 0.01
+
+# restart with proper learning rate
+# logs/layers-106b.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-19_04.15.40
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 46: set epsw to 0.0001 from 0.001
+# epoch 61: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 72: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 79: set epsw to 0.00001 from 0.0001
+# epoch 93: killed
+# [1.5942473039940013, 0.3705782743769917, 0.16672222296297284]
diff --git a/layers/layer-params-107.cfg b/layers/layer-params-107.cfg
new file mode 100644
index 0000000..c31eeb9
--- /dev/null
+++ b/layers/layer-params-107.cfg
@@ -0,0 +1,167 @@
+[conv1a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+wcNormMin=0.001
+wcNormMax=0.002
+
+[conv1b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+wcNormMin=0.001
+wcNormMax=0.002
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# this is like #101 but uses wcnorm in conv1/conv2. so it uses def file #101.
+# it's also like #104, but #104 only does wcnorm on conv2
+# on guppy7
+# logs/layers-107.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-09_19.20.14
diff --git a/layers/layer-params-109.cfg b/layers/layer-params-109.cfg
new file mode 100644
index 0000000..d7186f2
--- /dev/null
+++ b/layers/layer-params-109.cfg
@@ -0,0 +1,187 @@
+[conv1a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1536a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1536b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1536ba]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1536bb]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1536ca]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1536cb]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs3a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs3b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# this is like #101 but uses wcnorm in conv2 and also has 3 fc layers. 
+# on guppy9
+# logs/layers-109.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-10_00.46.52
+# epoch 17: set epsw to 0.001 from 0.01
+# epoch 26: enabled dropout on hs3a,hs3b
+# epoch 27: killed -- overfitting as feared
diff --git a/layers/layer-params-110.cfg b/layers/layer-params-110.cfg
new file mode 100644
index 0000000..510f98d
--- /dev/null
+++ b/layers/layer-params-110.cfg
@@ -0,0 +1,146 @@
+[conv1a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+# this is like #101 but without rnorm. it's about time i found out how helpful it is to modern nets
+# on guppy7
+# logs/layers-110.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-11_00.26.55
+# epoch 19: set epsw to 0.001 from 0.01
+# epoch 46: set epsw to 0.0001 from 0.001
+# epoch 67: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 66: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 75: killed, it looks to be about 1% worse than #101
diff --git a/layers/layer-params-111.cfg b/layers/layer-params-111.cfg
new file mode 100644
index 0000000..0de2542
--- /dev/null
+++ b/layers/layer-params-111.cfg
@@ -0,0 +1,187 @@
+[conv1a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ca]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048cb]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs3a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs3b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# this is like #101 but uses wcnorm in conv2 and also has 3 fc layers. 
+# its also like #109 but uses wider fc layers with dropout in all cos 109 overfit
+# on guppy9
+# logs/layers-111.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-12_23.59.48
+# epoch 19: set epsw to 0.001 from 0.01
+# epoch 42: this is quite a bit worse, and in an underfitting way, so i'm starting #104, which will be like this but the fc layers will be 3072 each instead of 2048
diff --git a/layers/layer-params-112.cfg b/layers/layer-params-112.cfg
new file mode 100644
index 0000000..2df041f
--- /dev/null
+++ b/layers/layer-params-112.cfg
@@ -0,0 +1,163 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy7
+# this is like #101 but with rnorm region of size 5 instead of 9
+# logs/layers-112.log
+# epoch 22: set epsw to 0.001 from 0.01
+# epoch 46: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 71: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 79: set epsw to 0.00001 from 0.0001
+# epoch 90: killed
+# [1.6064990917001289, 0.37237829837731168, 0.16815557540767209]
diff --git a/layers/layer-params-113.cfg b/layers/layer-params-113.cfg
new file mode 100644
index 0000000..303f67a
--- /dev/null
+++ b/layers/layer-params-113.cfg
@@ -0,0 +1,154 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[conv3b]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# this is like #101 but with conv3 taking both conv2 and conv1 as input, and conv2 just taking the low res img as input
+# on guppy9
diff --git a/layers/layer-params-114.cfg b/layers/layer-params-114.cfg
new file mode 100644
index 0000000..8168b3a
--- /dev/null
+++ b/layers/layer-params-114.cfg
@@ -0,0 +1,187 @@
+[conv1a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc3072a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072ba]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072bb]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072ca]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072cb]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs3a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs3b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# this is like #101 but uses wcnorm in conv2 and also has 3 fc layers, with width 6144. 
+# on guppy9
+# logs/layers-114.log
+# 140523240 params (incl biases)
+# /nobackup/kriz/tmp/ConvNet__2012-07-15_14.56.24
+# epoch 20: set epsw to 0.001 from 0.01
+# epoch 40: killed, doing worse than 115 which is the same but has only 2 fc layers
diff --git a/layers/layer-params-115-jpeg.cfg b/layers/layer-params-115-jpeg.cfg
new file mode 100644
index 0000000..546d629
--- /dev/null
+++ b/layers/layer-params-115-jpeg.cfg
@@ -0,0 +1,181 @@
+[conv1a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+wcnorm=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc3072a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072ba]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072bb]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072ca]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc3072cb]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# this is like 115 (on gpu) but trained on ilya's new imgnet-2010 jpeg
+# on guppy7
+# logs/layers-115-jpeg.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-18_20.56.13
+# epoch 22: set epsw to 0.001 from 0.01
+# epoch 48: set epsw to 0.0001 from 0.001
+# epoch 58: killed, since this was a duplicate (jpeg) of a suboptimal net anyway 
diff --git a/layers/layer-params-116.cfg b/layers/layer-params-116.cfg
new file mode 100644
index 0000000..0706b32
--- /dev/null
+++ b/layers/layer-params-116.cfg
@@ -0,0 +1,303 @@
+[conv1a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2c]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2d]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024a]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024b]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024c]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024d]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2a]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+# on guppy8
+# this is like 112, but has wcnorm in conv2, and also its on 4 gpus
+
diff --git a/layers/layer-params-117.cfg b/layers/layer-params-117.cfg
new file mode 100644
index 0000000..f1e31e1
--- /dev/null
+++ b/layers/layer-params-117.cfg
@@ -0,0 +1,279 @@
+[conv1a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2c]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2d]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[conv3b]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[conv3c]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1408a]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[fc1408b]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[fc1408c]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[fc1408-2a]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[fc1408-2b]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[fc1408-2c]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[fc1000]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+# on guppy8
+# this is like 112, but has wcnorm in conv2, and also its on 4 gpus
+
diff --git a/layers/layer-params-118.cfg b/layers/layer-params-118.cfg
new file mode 100644
index 0000000..06d3a40
--- /dev/null
+++ b/layers/layer-params-118.cfg
@@ -0,0 +1,168 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy7
+# this is like #112 but with wcnorm on conv2, and also trained on jpeg
+# logs/layers-118.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-19_18.35.31
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 46: set epsw to 0.0001 from 0.001
+# epoch 65: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 84: set epsw to 0.00001 from 0.0001
+# epcoh 98: killed
+# [1.640873252105713, 0.37831333333333333, 0.17355999999999999]
diff --git a/layers/layer-params-120-2012-full.cfg b/layers/layer-params-120-2012-full.cfg
new file mode 100644
index 0000000..460846f
--- /dev/null
+++ b/layers/layer-params-120-2012-full.cfg
@@ -0,0 +1,174 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #120 (so uses def file #120) but trained on lsvrc-2012 (full)
+# on gpu
+# /storage/tmp/ConvNet__2012-07-26_04.06.44
+# logs/layers-120-2012-full.log
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 38: moved to guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-07-26_04.06.44
+# epoch 49: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 73: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 87: set epsw to 0.00001 from 0.0001
+# epoch 94: killed
+# 
diff --git a/layers/layer-params-120-2012.cfg b/layers/layer-params-120-2012.cfg
new file mode 100644
index 0000000..a4da5ca
--- /dev/null
+++ b/layers/layer-params-120-2012.cfg
@@ -0,0 +1,173 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #120 (so uses def file #120) but trained on lsvrc-2012 (non-full)
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-07-24_23.16.15
+# epoch 22: set epsw to 0.001 from 0.01
+# epoch 49: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 73: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 81: set epsw to 0.00001 from 0.0001
+# epoch 95: killed
+# validation multiview error:
+# logprob:  1.765247, 0.410440, 0.187140 
+
diff --git a/layers/layer-params-120-4gpu-auto2.cfg b/layers/layer-params-120-4gpu-auto2.cfg
new file mode 100644
index 0000000..925c52f
--- /dev/null
+++ b/layers/layer-params-120-4gpu-auto2.cfg
@@ -0,0 +1,313 @@
+[conv1a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2c]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2d]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024-1a]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1b]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1c]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1d]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+
+[fc1024-2a]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like #120 but on 4 gpus. trained on 2012 (non-full)
+# on guppy
+# logs/layers-120-4gpu.log
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23
diff --git a/layers/layer-params-120-4gpu-auto3.cfg b/layers/layer-params-120-4gpu-auto3.cfg
new file mode 100644
index 0000000..03635d1
--- /dev/null
+++ b/layers/layer-params-120-4gpu-auto3.cfg
@@ -0,0 +1,313 @@
+[conv1a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2c]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2d]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024-1a]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1b]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1c]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1d]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+
+[fc1024-2a]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like #120 but on 4 gpus. trained on 2012 (non-full)
+# on guppy
+# logs/layers-120-4gpu.log
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23
diff --git a/layers/layer-params-120-4gpu-auto4.cfg b/layers/layer-params-120-4gpu-auto4.cfg
new file mode 100644
index 0000000..40d2e0e
--- /dev/null
+++ b/layers/layer-params-120-4gpu-auto4.cfg
@@ -0,0 +1,313 @@
+[conv1a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2c]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2d]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024-1a]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1b]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1c]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1d]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+
+[fc1024-2a]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like #120 but on 4 gpus. trained on 2012 (non-full)
+# on guppy
+# logs/layers-120-4gpu.log
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23
diff --git a/layers/layer-params-120-4gpu-auto5.cfg b/layers/layer-params-120-4gpu-auto5.cfg
new file mode 100644
index 0000000..87a59dd
--- /dev/null
+++ b/layers/layer-params-120-4gpu-auto5.cfg
@@ -0,0 +1,313 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2c]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2d]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024-1a]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1b]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1c]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1d]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+
+[fc1024-2a]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like #120 but on 4 gpus. trained on 2012 (non-full)
+# on guppy
+# logs/layers-120-4gpu.log
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23
diff --git a/layers/layer-params-120-4gpu-auto6.cfg b/layers/layer-params-120-4gpu-auto6.cfg
new file mode 100644
index 0000000..280a228
--- /dev/null
+++ b/layers/layer-params-120-4gpu-auto6.cfg
@@ -0,0 +1,313 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2c]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2d]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024-1a]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1b]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1c]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1d]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+
+[fc1024-2a]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.00001,0.00001,0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like #120 but on 4 gpus. trained on 2012 (non-full)
+# on guppy
+# logs/layers-120-4gpu.log
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23
diff --git a/layers/layer-params-120-4gpu.cfg b/layers/layer-params-120-4gpu.cfg
new file mode 100644
index 0000000..794fccb
--- /dev/null
+++ b/layers/layer-params-120-4gpu.cfg
@@ -0,0 +1,314 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2c]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2d]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024-1a]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1b]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1c]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1d]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+
+[fc1024-2a]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like #120 but on 4 gpus. trained on 2012 (non-full)
+# on guppy
+# logs/layers-120-4gpu.log
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23
+# epoch 56: killed, this is overfitting. will try reducing the # of params.
diff --git a/layers/layer-params-120.cfg b/layers/layer-params-120.cfg
new file mode 100644
index 0000000..0256104
--- /dev/null
+++ b/layers/layer-params-120.cfg
@@ -0,0 +1,174 @@
+[conv1a]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.0
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.0
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #106 but with rnorm of size 5, also train on jpegs
+# on gpu
+# logs/layers-120.log
+# /storage/tmp/ConvNet__2012-07-22_04.40.34
+# moving to guppy7
+# /nobackup/kriz/tmp/ConvNet__2012-07-22_04.40.34/
+# epoch 26: set epsw to 0.001 from 0.01
+# epoch 47: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 72: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 82: set epsw to 0.00001 from 0.0001
+# epoch 106: killed
+# logprob:  1.634692, 0.378533, 0.172360
diff --git a/layers/layer-params-121.cfg b/layers/layer-params-121.cfg
new file mode 100644
index 0000000..4c6e7b1
--- /dev/null
+++ b/layers/layer-params-121.cfg
@@ -0,0 +1,179 @@
+[conv1a]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.0
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.0
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm1a]
+scale=0.001
+pow=0.75
+
+[cnorm1b]
+scale=0.001
+pow=0.75
+
+# this is like #120 but with cnorm over conv1 as well
+# on guppy8
+# logs/layers-121.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-22_15.59.00
+# epoch 25: set epsw to 0.001 from 0.01
+# epoch 51: set epsw to 0.0001 from 0.001
+# epoch 63: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 76: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 90: set epsw to 0.00001 from 0.0001
+# worse than 120
diff --git a/layers/layer-params-126-2012-full.cfg b/layers/layer-params-126-2012-full.cfg
new file mode 100644
index 0000000..02bf45b
--- /dev/null
+++ b/layers/layer-params-126-2012-full.cfg
@@ -0,0 +1,165 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #120-2012-full  but also has horiz reflection for gpu2
+# on guppy8
+# logs/layers-126.log
+# /nobackup/kriz/tmp/ConvNet__2012-07-31_22.55.59
+# killed after 19 epochs..seems no good, and also full sucks we now know
diff --git a/layers/layer-params-127.cfg b/layers/layer-params-127.cfg
new file mode 100644
index 0000000..856c5ee
--- /dev/null
+++ b/layers/layer-params-127.cfg
@@ -0,0 +1,174 @@
+[conv1a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv3b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# trained on lsvrc-2012 (full), like #120 but
+# this examines whether communication is necessary at conv3
+# .. meaning it has no communication at conv3
+# on gpu
+# /storage/tmp/ConvNet__2012-08-01_02.35.01
+# logs/layers-127.log
+# killed, since we know now that full sucks.
+
+# trained on lsvrc-2012 (non-full). like #120 but now also make conv3,conv4 wider to compensate for lost connections
+# on guppy8
+# logs/layers-127a.log
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-02_00.18.36
+# epoch 21: set epsw to 0.001 from 0.01
+# epoch 36: killed, significantly worse than 120
diff --git a/layers/layer-params-128.cfg b/layers/layer-params-128.cfg
new file mode 100644
index 0000000..68a770d
--- /dev/null
+++ b/layers/layer-params-128.cfg
@@ -0,0 +1,167 @@
+[conv1a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv5a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #120 
+# and has narrower columns which communicate more. i'm running this because #127 suggests
+# that communication is good
+# on guppy9
+# logs/layers-128.log
+# epoch 25: set epsw to 0.001 from 0.01
+# on hold
diff --git a/layers/layer-params-129.cfg b/layers/layer-params-129.cfg
new file mode 100644
index 0000000..ddd79f5
--- /dev/null
+++ b/layers/layer-params-129.cfg
@@ -0,0 +1,316 @@
+[conv1a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2c]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2d]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024-1a]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1b]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1c]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1d]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+
+[fc1024-2a]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.001,0.001,0.001,0.001
+epsB=0.002
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# 4 gpus, based on 120
+# on guppy7
+# logs/layers-129.log
+# /nobackup/kriz/tmp/ConvNet__2012-08-06_22.23.16
+# epoch 22: set epsw to 0.001 from 0.01
+# uhh.. relu wiped this. nice.
+
diff --git a/layers/layer-params-130.cfg b/layers/layer-params-130.cfg
new file mode 100644
index 0000000..6bcd287
--- /dev/null
+++ b/layers/layer-params-130.cfg
@@ -0,0 +1,320 @@
+[conv1a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1c]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1d]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2c]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2d]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3c]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3d]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4c]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4d]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5c]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5d]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1024-1a]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1b]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1c]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-1d]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+
+[fc1024-2a]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2b]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2c]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1024-2d]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[fc1000]
+epsW=0.0001,0.0001,0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like #129, but with 2x as many filters in conv2
+# on guppy8
+# /nobackup/kriz/tmp/ConvNet__2012-08-07_13.31.34
+# logs/layers-130.log
+# uhh.. relu wiped this. nice.
+# on guppy9
+# logs/layers-130a.log
+# /nobackup/kriz/tmp/ConvNet__2012-08-09_14.09.20
+# epoch 22: set epsw to 0.001 from 0.01
+# epoch 46: set epsw to 0.0001 from 0.001
+# epoch 62: killed. surprisingly, this is hardly (if at all) better than 2-gpu net
diff --git a/layers/layer-params-131-2009.cfg b/layers/layer-params-131-2009.cfg
new file mode 100644
index 0000000..de3b43b
--- /dev/null
+++ b/layers/layer-params-131-2009.cfg
@@ -0,0 +1,172 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# on guppy9
+# logs/layers-131-2009.log
+# /nobackup/kriz/tmp/ConvNet__2012-08-18_15.41.20
+# epoch 7: set epsw to 0.001 from 0.01
+# epoch 14: set epsw to 0.0001 from 0.001
+# epoch 20: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 24: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 31: set epsw to 0.00001 from 0.0001
+# epoch 36: killed
+# logprob:  3.466260, 0.694209, 0.437308
+# a bit worse than previous 2009 thing!  
diff --git a/layers/layer-params-131.cfg b/layers/layer-params-131.cfg
new file mode 100644
index 0000000..809dd4b
--- /dev/null
+++ b/layers/layer-params-131.cfg
@@ -0,0 +1,175 @@
+[conv1a]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.0
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.0
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #120, but puts rnorm1 right over conv1 (trained on 2012-nonfull)
+# on gpu
+# /storage/tmp/ConvNet__2012-08-09_12.33.33
+# logs/layers-131.log
+# moved to guppy7
+# /nobackup/kriz/tmp/ConvNet__2012-08-09_12.33.33/
+# epoch 22: set epsw to 0.001 from 0.01
+# epoch 46: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 81: set epsw to 0.00001 from 0.0001
+# epoch 100: killed
+# validation multiview error:
+# logprob:  1.755725, 0.409340, 0.185740 
diff --git a/layers/layer-params-132.cfg b/layers/layer-params-132.cfg
new file mode 100644
index 0000000..e919e31
--- /dev/null
+++ b/layers/layer-params-132.cfg
@@ -0,0 +1,179 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv3b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like 120 but with communication in conv4 instead of conv3
+# on gpu
+# logs/layers-132.log
+# /storage/tmp/ConvNet__2012-08-11_02.23.36
+# epoch 20: set epsw to 0.001 from 0.01
+# epoch 44: set epsw to 0.0001 from 0.001
+# moved to guppy9
+# @#$%&!, killed, i accidentally trained this on full
+
+# restart:
+# /nobackup/kriz/tmp/ConvNet__2012-08-13_16.47.07
+# logs/layers-132a.log
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 4x: set epsw to 0.0001 from 0.001
+# epoch 65: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 71: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 88: killed, worse than 131
+
diff --git a/layers/layer-params-133.cfg b/layers/layer-params-133.cfg
new file mode 100644
index 0000000..a4e20e7
--- /dev/null
+++ b/layers/layer-params-133.cfg
@@ -0,0 +1,167 @@
+[conv1a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv3b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv5a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is a hybrid of 131 and 132: so it's like 120, but has communication in conv4 instead of conv3, and it also puts rnorm1 directly over conv1
+# on guppy7
+# logs/layers-133.log
+# /nobackup/kriz/tmp/ConvNet__2012-08-15_16.08.23
+# epoch 21: set epsw to 0.001 from 0.01
+# epoch 48: set epsw to 0.0001 from 0.001
+# epoch 50: killed, worse than 131
diff --git a/layers/layer-params-134.cfg b/layers/layer-params-134.cfg
new file mode 100644
index 0000000..5f07d72
--- /dev/null
+++ b/layers/layer-params-134.cfg
@@ -0,0 +1,169 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=0.25
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=0.25
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=0.25
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=0.25
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #131, but with minDiv of 0.25 on rnorms
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-08-20_23.26.41
+# logs/layers-134.log
+# epoch 13: on hold
diff --git a/layers/layer-params-135-2009-2012.cfg b/layers/layer-params-135-2009-2012.cfg
new file mode 100644
index 0000000..5833cb2
--- /dev/null
+++ b/layers/layer-params-135-2009-2012.cfg
@@ -0,0 +1,199 @@
+[conv1a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this trains 135 on 2012, initialized from 2009 1-8800
+# on guppy9
+# init epsw 0.001
+# logs/layers-135-2012-pretrain-2009.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-09_15.20.47
+# epoch 22: set epsw to 0.0001 from 0.001
+# epoch 23: putting on hold to train softmax tree
+#           this is doing worse than 141-2009 anyway, which has an extra 6th conv layer (1.97 vs 2.00)
+
+# 135 notes:
+# this is like #131, but with minDiv of 2 on rnorms
+# on guppy8
+# /nobackup/kriz/tmp/ConvNet__2012-08-21_01.49.23
+# logs/layers-135.log
+# epoch 20: set epsw to 0.001 from 0.01
+# epoch 47: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 81: set epsw to 0.00001 from 0.0001
+# epoch 96: killed
+# validation multiview: 
+# logprob:  1.757653, 0.410700, 0.184160 
+
+# now let's train on 2009 1-8800
+# logs/layers-135-2009-bigtrain.log
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-08-26_22.39.45
+# epoch 4.7822: set epsw to 0.001 from 0.01
+# epoch 8.1299: set epsw to 0.0001 from 0.001
+# epoch 10.3697: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 11.4731: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 14.3906: set epsw to 0.00001 from 0.0001
+# epoch 17: killed
diff --git a/layers/layer-params-135-2009.cfg b/layers/layer-params-135-2009.cfg
new file mode 100644
index 0000000..a3786bf
--- /dev/null
+++ b/layers/layer-params-135-2009.cfg
@@ -0,0 +1,189 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #131, but with minDiv of 2 on rnorms
+# on guppy8
+# /nobackup/kriz/tmp/ConvNet__2012-08-21_01.49.23
+# logs/layers-135.log
+# epoch 20: set epsw to 0.001 from 0.01
+# epoch 47: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 81: set epsw to 0.00001 from 0.0001
+# epoch 96: killed
+# validation multiview: 
+# logprob:  1.757653, 0.410700, 0.184160 
+
+# now let's train on 2009 1-8800
+# logs/layers-135-2009-bigtrain.log
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-08-26_22.39.45
+# epoch 4.7822: set epsw to 0.001 from 0.01
+# epoch 8.1299: set epsw to 0.0001 from 0.001
+# epoch 10.3697: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 11.4731: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 14.3906: set epsw to 0.00001 from 0.0001
+# epoch 17: killed
diff --git a/layers/layer-params-135.cfg b/layers/layer-params-135.cfg
new file mode 100644
index 0000000..6453b3a
--- /dev/null
+++ b/layers/layer-params-135.cfg
@@ -0,0 +1,177 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #131, but with minDiv of 2 on rnorms
+# on guppy8
+# /nobackup/kriz/tmp/ConvNet__2012-08-21_01.49.23
+# logs/layers-135.log
+# epoch 20: set epsw to 0.001 from 0.01
+# epoch 47: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 75: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 81: set epsw to 0.00001 from 0.0001
+# epoch 96: killed
+# validation multiview: 
+# logprob:  1.757653, 0.410700, 0.184160 
diff --git a/layers/layer-params-136.cfg b/layers/layer-params-136.cfg
new file mode 100644
index 0000000..7c7d350
--- /dev/null
+++ b/layers/layer-params-136.cfg
@@ -0,0 +1,169 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #135 (so uses def file 135), but subtracts scalar mean
+# on guppy7
+# logs/layers-136.log
+# /nobackup/kriz/tmp/ConvNet__2012-08-23_04.38.51
+# epoch 15: eh, this is no better, and has no reason to be better. screw it.
diff --git a/layers/layer-params-137-tree.cfg b/layers/layer-params-137-tree.cfg
new file mode 100644
index 0000000..4ddf3c9
--- /dev/null
+++ b/layers/layer-params-137-tree.cfg
@@ -0,0 +1,196 @@
+[conv1a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like 137, but with treefc
+# on guppy9
+# init epsw 0.01 -- this run does not sale epsw by node size
+# /nobackup/kriz/tmp/ConvNet__2012-09-10_22.47.57
+# logs/layers-137-tree.log
+# epoch 14: set epsw to 0.001 from 0.01
+# epoch 38: killed..its stuck at 2.17 nats.. should be nearer to 2.06. perhaps resume later
+
+# 137 notes:
+# this is like #135, but changes the cnorm layers to rnorm
+# on guppy8
+# logs/layers-137.log
+# /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04
+# epoch 26: set epsw to 0.001 from 0.01
+# epoch 50: set epsw to 0.0001 from 0.001
+# epoch 75: set epsw to 0 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 84: set epsw to 0.00001 from 0.0001
+# epoch 92: made backup to /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04.bak
+#           set epsw to 0.0001 from 0.00001 (conv1/2 still 0)
+#           using BRIGHTNESS NOISE of 0.2 (in other words i zeroed out the other components of the color noise)
+# epoch 101: set color (brightness) noise to 0 from 0.2
+# epoch 105: set epsw to 0.00001 from 0.0001
+# experiment a failure. going back to training /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04.bak
+# epoch 99: killed
+# logprob:  1.751138, 0.407820, 0.183440 
+
+# batch size 128 x 8:
+# /nobackup/kriz/tmp/ConvNet__2012-09-07_17.08.47
+# epoch 25: set epsw to 0.001 from 0.01
+#           made backup to /nobackup/kriz/tmp/ConvNet__2012-09-07_17.08.47.bak
+# epoch 34: killed, its not good
diff --git a/layers/layer-params-137.cfg b/layers/layer-params-137.cfg
new file mode 100644
index 0000000..5063f26
--- /dev/null
+++ b/layers/layer-params-137.cfg
@@ -0,0 +1,207 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #135, but changes the cnorm layers to rnorm
+
+# on lsvrc-2010:
+# logs/layers-137-2010.log
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-09-13_02.47.12
+# epoch 25: set epsw to 0.001 from 0.01
+# epoch 49: set epsw to 0.0001 from 0.001
+# epoch 81: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 85: set epsw to 0 from 0.00001 on conv1,conv2
+#           set epsw to 0.00001 from 0.0001 elsewhere 
+# epoch 103: killed
+# validation:
+# logprob:  1.727592, 0.394153, 0.182784 
+# validation multiview:
+# logprob:  1.632875, 0.377960, 0.171020 
+# test multiview:
+# logprob:  1.623185, 0.376167, 0.171247 
+
+# on lsvrc-2012:
+# on guppy8
+# logs/layers-137.log
+# /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04
+# epoch 26: set epsw to 0.001 from 0.01
+# epoch 50: set epsw to 0.0001 from 0.001
+# epoch 75: set epsw to 0 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 84: set epsw to 0.00001 from 0.0001
+# epoch 92: made backup to /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04.bak
+#           set epsw to 0.0001 from 0.00001 (conv1/2 still 0)
+#           using BRIGHTNESS NOISE of 0.2 (in other words i zeroed out the other components of the color noise)
+# epoch 101: set color (brightness) noise to 0 from 0.2
+# epoch 105: set epsw to 0.00001 from 0.0001
+# experiment a failure. going back to training /nobackup/kriz/tmp/ConvNet__2012-08-25_05.39.04.bak
+# epoch 99: killed
+# logprob:  1.751138, 0.407820, 0.183440 
+
+# batch size 128 x 8:
+# /nobackup/kriz/tmp/ConvNet__2012-09-07_17.08.47
+# epoch 25: set epsw to 0.001 from 0.01
+#           made backup to /nobackup/kriz/tmp/ConvNet__2012-09-07_17.08.47.bak
+# epoch 34: killed, its not good
diff --git a/layers/layer-params-139.cfg b/layers/layer-params-139.cfg
new file mode 100644
index 0000000..ae275e2
--- /dev/null
+++ b/layers/layer-params-139.cfg
@@ -0,0 +1,172 @@
+[conv1a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+wcNormMin=0.001,0
+wcNormMax=0.002,0
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #137 (hence uses same file) but has wcnorm on conv2[0]
+# epoch 19: set epsw to 0.001 from 0.01
+# epoch 49: set epsw to 0.0001 from 0.001
+# epoch 62: killed, about 0.01 nat worse than 137 (which is pretty significant at this stage)
diff --git a/layers/layer-params-141-2009-half.cfg b/layers/layer-params-141-2009-half.cfg
new file mode 100644
index 0000000..2f09392
--- /dev/null
+++ b/layers/layer-params-141-2009-half.cfg
@@ -0,0 +1,203 @@
+[conv1a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv6a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv6b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.0001,0.0001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like 141, but trained on half of 2009 imgnet, to be comparable to google's results
+# logs/layers-141-2009-half.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-09_00.26.31
+# on guppy9
+# epoch 6.2600: set epsw to 0.001 from 0.01
+# epoch 13.3361: set epsw to 0.0001 from 0.001
+# epoch 18.2396: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#                set color noise to 0 from 0.1
+# epoch 21.1949: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 25.3718: set epsw to 0.00001 from 0.0001
+# epoch 28.3271: killed
+# ok test erro rate is a bit worse than 131, restarting with epsw 0.001, color noise 0.1
+# epoch 44.183: set epsw to 0.0001 from 0.001
+# epoch 56: eek, it started getting worse on validation :/
+
+# 141 notes:
+# this is like #137 but with conv6, also communication in conv6
+# /nobackup/kriz/tmp/ConvNet__2012-09-03_16.27.48
+# logs/layers-141.log
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 48: set epsw to 0.0001 from 0.001
+# epoch 60: this seems overfitty....killing
+# but will use these weights to initialize a net on 2009... why the hell not?
diff --git a/layers/layer-params-141-2009.cfg b/layers/layer-params-141-2009.cfg
new file mode 100644
index 0000000..d3e1f59
--- /dev/null
+++ b/layers/layer-params-141-2009.cfg
@@ -0,0 +1,231 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv6a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv6b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# training on lsvrc-2010
+# initialized from 141 trained on lsvrc-2012, then 2009
+# using def file layers-141-2009-2010.cfg
+# /nobackup/kriz/tmp/ConvNet__2012-09-12_01.06.32
+# on guppy8
+# init epsw 0.001
+# logs/layers-141-2010-pretrain-2009-pretrain-2012.log
+# epoch 14: set epsw to 0.0001 from 0.001
+# epoch 30: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 36: set epsw to 0 on conv1/2
+# epoch 47: set epsw to 0.00001 from 0.0001
+# epoch 54: killed
+# logprob:  1.511725, 0.356707, 0.154893 
+
+# training on lsvrc-2012
+# initialized from 141 trained on lsvrc-2012, then 2009
+# using def file layers-141-2009-2012.cfg
+# init epsw 0.001
+# logs/layers-141-2012-pretrain-2009-pretrain-2012.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-09_03.36.13
+# backup: /ais/gobi3/u/kriz/tmp/ConvNet__2012-09-09_03.36.13
+#         also /ais/gobi3/u/kriz/net-backups/
+# on guppy8
+# epoch 13: set epsw to 0.0001 from 0.001
+# epoch 26: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 32: set epsw to 0 on conv1/2
+# epoch 43: set epsw to 0.00001 from 0.0001
+# epoch 54: killed
+# python convnet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-09_03.36.13 --test-only=1 --test-one=0 --multiview-test=1
+# logprob:  1.671316, 0.395620, 0.172060 
+#python convnet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-09_03.36.13 --test-only=1 --test-one=0 --multiview-test=0
+# logprob:  1.779082, 0.415920, 0.186780 
+
+# 141-2009 notes, before going back to 2012:
+# initialized from 141 trained on lsvrc-2012
+# init epsw 0.001
+# logs/layers-141-2009-pretrain-2012.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-07_05.22.51
+# epoch 4.1189: set epsw to 0.0001 from 0.001
+# epoch 5.1596: killed, not improving much. lets go back to training on lsvrc-2012 with these weights now.
+# 
+# 141 notes:
+# this is like #137 but with conv6, also communication in conv6
+# /nobackup/kriz/tmp/ConvNet__2012-09-03_16.27.48
+# logs/layers-141.log
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 48: set epsw to 0.0001 from 0.001
+# epoch 60: this seems overfitty....killing
+# but will use these weights to initialize a net on 2009... why the hell not?
diff --git a/layers/layer-params-141.cfg b/layers/layer-params-141.cfg
new file mode 100644
index 0000000..d128778
--- /dev/null
+++ b/layers/layer-params-141.cfg
@@ -0,0 +1,187 @@
+[conv1a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv6a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv6b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #137 but with conv6, also communication in conv6
+# /nobackup/kriz/tmp/ConvNet__2012-09-03_16.27.48
+# logs/layers-141.log
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 48: set epsw to 0.0001 from 0.001
+# epoch 60: this seems overfitty....killing
+# but will use these weights to initialize a net on 2009... why the hell not?
diff --git a/layers/layer-params-145-2010.cfg b/layers/layer-params-145-2010.cfg
new file mode 100644
index 0000000..3f6b07d
--- /dev/null
+++ b/layers/layer-params-145-2010.cfg
@@ -0,0 +1,206 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #137 but without low-res stuff
+# on lsvrc-2010:
+# guppy9
+# logs/layers-145-2010.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-27_12.39.44
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 51: set epsw to 0.0001 from 0.001
+# epoch 68: set epsw to 0.00001 from 0.0001 on conv1
+#           set color noise to 0 from 0.1
+# epoch 72: set epsw to 0 on conv1
+# epoch 78: set epsw to 0.00001 from 0.0001
+# epoch 93: killed
+# test multliview:
+# logprob:  1.614660, 0.374727, 0.169987 
+# test center patch:
+# logprob:  1.706031, 0.390247, 0.182953 (NOTE, NOT MULTIVIEW!!)
+
+# on gpu (now guppy8)
+# logs/layers-145.log
+# /storage/tmp/ConvNet__2012-09-13_03.43.56
+# epoch 25: set epsw to 0.001 from 0.01
+# epoch 36: paused for localization experiments
+# resuming on guppy9
+# logs/layers-145-cont.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-13_03.43.56
+# epoch 51: set epsw to 0.0001 from 0.001
+# epoch 58: paused for imgnet-20k experiments
+# moved to guppy8
+# epoch 67: set epsw to 0.00001 from 0.0001 on conv1
+#           set color noise to 0 from 0.1
+# epoch 72: set epsw to 0 on conv1
+# epoch 79: set epsw to 0.00001 from 0.0001
+# epoch 91: killed
+# logprob:  1.741473, 0.406640, 0.182100 
+
+# on 2012-full:
+# on guppy7
+# logs/layers-145-full.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-23_19.38.45
+# epoch 19: set epsw to 0.001 from 0.01
+# epoch 47: set epsw to 0.0001 from 0.001
+# epoch 61: moved to gpu
diff --git a/layers/layer-params-145-half.cfg b/layers/layer-params-145-half.cfg
new file mode 100644
index 0000000..53b23a2
--- /dev/null
+++ b/layers/layer-params-145-half.cfg
@@ -0,0 +1,100 @@
+[conv1a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+
+[fc4096a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc4096ba]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1000]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+# this is #145 but only one column, although conv5 is as wide as 2 columns because otherwise this net would have about half as many parameters as the 2-column net, which wouldnt make for a fair comparison.
+# on guppy9
+# trained on 2010
+# logs/layers-145-half.log
+# /nobackup/kriz/tmp/ConvNet__2012-11-03_01.00.35
+# epoch 20: set epstw ot 0.001 from 0.01
+# epoch 48: set epstw ot 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1, set color noise to 0 from 0.1
+# epoch 72: set epsw to 0 from 0.00001 on conv1
+# epoch 96: killed
+# test multiview logprob:  1.702802, 0.391680, 0.182287
diff --git a/layers/layer-params-145.cfg b/layers/layer-params-145.cfg
new file mode 100644
index 0000000..ff72160
--- /dev/null
+++ b/layers/layer-params-145.cfg
@@ -0,0 +1,204 @@
+[conv1a]
+epsW=0.000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #137 but without low-res stuff
+# on gpu (now guppy9)
+# logs/layers-145.log
+# /storage/tmp/ConvNet__2012-09-13_03.43.56
+# epoch 25: set epsw to 0.001 from 0.01
+# epoch 36: paused for localization experiments
+# resuming on guppy9
+# logs/layers-145-cont.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-13_03.43.56
+# epoch 51: set epsw to 0.0001 from 0.001
+# epoch 58: paused for imgnet-20k experiments
+# moved to guppy8
+# epoch 67: set epsw to 0.00001 from 0.0001 on conv1
+#           set color noise to 0 from 0.1
+# epoch 72: set epsw to 0 on conv1
+# epoch 79: set epsw to 0.00001 from 0.0001
+# epoch 91: killed
+# logprob:  1.741473, 0.406640, 0.182100 
+
+# on 2012-full:
+# on guppy7
+# logs/layers-145-full.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-23_19.38.45
+# epoch 19: set epsw to 0.001 from 0.01
+# epoch 47: set epsw to 0.0001 from 0.001
+# epoch 61: moved to gpu
+
+# pushing learning rate back up:
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-09-13_03.43.56.2
+# start epsw 0.001 (still 0 on conv1)
+# color noise put back to 0.1
+# logs/layers-145.log
+# epoch 107: set epsw to 0.0001 from 0.001
+# epoch 124: set epsw to 0.00001 from 0.0001
+#            set color noise to 0 from 0.1
+# epoch 135: killed
+# multiview test:
+# logprob:  1.725738, 0.402500, 0.179940 
+
diff --git a/layers/layer-params-146-2009-tree.cfg b/layers/layer-params-146-2009-tree.cfg
new file mode 100644
index 0000000..181db0f
--- /dev/null
+++ b/layers/layer-params-146-2009-tree.cfg
@@ -0,0 +1,182 @@
+[conv1a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv6a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv6b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is 146-2009 but with tree
+# epoch 9: set epsw to 0.001 from 0.01
diff --git a/layers/layer-params-146-2009.cfg b/layers/layer-params-146-2009.cfg
new file mode 100644
index 0000000..780c7ea
--- /dev/null
+++ b/layers/layer-params-146-2009.cfg
@@ -0,0 +1,188 @@
+[conv1a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv6a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv6b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is 145, and initialized from 145 (up to conv5) but with also conv6 and trained on 2009-10k
+# on guppy8
+# /nobackup/kriz/tmp/ConvNet__2012-09-14_22.48.00
+# initialized from /nobackup/kriz/tmp/ConvNet__2012-09-13_03.43.56.bak
+# init epsw 0.001 on conv1-5, 0.01 on fc, conv6
+# logs/layers-146-2009.log
+# epoch 2.1600: set epsw to 0.001 from 0.01 on fc, conv6
+# epoch 6.6491: killed
diff --git a/layers/layer-params-146-2011.cfg b/layers/layer-params-146-2011.cfg
new file mode 100644
index 0000000..7c6c769
--- /dev/null
+++ b/layers/layer-params-146-2011.cfg
@@ -0,0 +1,221 @@
+[conv1a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv6a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv6b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is 146, trained on 2011-20k, initialized from 2009-10k (conv layers only)
+# init epw 0.001 on conv layers, 0.01 on fc
+# logs/layers-146-2011.log
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-09-19_23.29.04
+# epoch 6: set epsw to 0.001 from 0.01
+# epoch 8.11295: killed
+
+# 146-2009 notes:
+# this is 145, and initialized from 145 (up to conv5) but with also conv6 and trained on 2009-10k
+# on guppy8
+# /nobackup/kriz/tmp/ConvNet__2012-09-14_22.48.00
+# initialized from /nobackup/kriz/tmp/ConvNet__2012-09-13_03.43.56.bak
+# init epsw 0.001 on conv1-5, 0.01 on fc, conv6
+# logs/layers-146-2009.log
+# epoch 2.1600: set epsw to 0.001 from 0.01 on fc, conv6
+# epoch 6.6491: killed
diff --git a/layers/layer-params-146-2012-2009.cfg b/layers/layer-params-146-2012-2009.cfg
new file mode 100644
index 0000000..b2f9742
--- /dev/null
+++ b/layers/layer-params-146-2012-2009.cfg
@@ -0,0 +1,203 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv6a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv6b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is 146 but pre-trained on 2009-10k (notes below), so initialized from below net
+# init epsw 0.001, 0.0001 on conv1
+# on guppy8
+# logs/layers-146-2012-2009.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-17_17.01.42
+# epoch 3: set conv1 epsw to 0.00001 from 0.0001
+# epoch 4: set conv1 epsw to 0 from 0.00001
+# epoch 22: set epsw to 0.0001 from 0.001
+# epoch 38: set color noise to 0 from 0.1
+# epoch 42: set epsw to 0.00001 from 0.0001
+# epoch 52: killed
+# multiview validation:
+# logprob:  1.646452, 0.391000, 0.168760 
+
+# 146-2009 notes:
+# this is 145, and initialized from 145 (up to conv5) but with also conv6 and trained on 2009-10k
+# /nobackup/kriz/tmp/ConvNet__2012-09-14_22.48.00
+# on guppy8
+# initialized from /nobackup/kriz/tmp/ConvNet__2012-09-13_03.43.56.bak
+# init epsw 0.001 on conv1-5, 0.01 on fc, conv6
+# logs/layers-146-2009.log
+# epoch 2.1600: set epsw to 0.001 from 0.01 on fc, conv6
+# epoch 6.6491: killed
diff --git a/layers/layer-params-146-2012-2011.cfg b/layers/layer-params-146-2012-2011.cfg
new file mode 100644
index 0000000..72b21fa
--- /dev/null
+++ b/layers/layer-params-146-2012-2011.cfg
@@ -0,0 +1,205 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv6a]
+epsW=0.00001,0.00001
+epsB=0.022
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv6b]
+epsW=0.00001,0.00001
+epsB=0.022
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is 146 but pretrained on 2011, now training on 2012
+# init epsw 0.0001 on conv1, 0.001 elsewhere
+# guppy9
+# logs/layers-146-2012-2011.log
+# /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12
+# 300 batches: set epsw to 0.01 from 0.001 on fc1000
+# epoch 5: set conv1 epsw to 0.00001 from 0.0001
+# epoch 7: set conv1 epsw to 0 from 0.00001
+# epoch 9: set fc1000 epsw to 0.001 from 0.01
+# epoch 22: set epsw to 0.0001 from 0.001
+# epoch 38: set color noise to 0 from 0.1
+# epoch 42: set epsw to 0.00001 from 0.0001
+# epoch 54: killed
+# multiview validation:
+# logprob:  1.633191, 0.389900, 0.166220 
+
+# 146-2011 notes:
+# this is 146, trained on 2011-20k, initialized from 2009-10k (conv layers only)
+# init epw 0.001 on conv layers, 0.01 on fc
+# logs/layers-146-2011.log
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-09-19_23.29.04
+# epoch 6: set epsw to 0.001 from 0.01
+# epoch 8.11295: killed
+
diff --git a/layers/layer-params-147.cfg b/layers/layer-params-147.cfg
new file mode 100644
index 0000000..b753a34
--- /dev/null
+++ b/layers/layer-params-147.cfg
@@ -0,0 +1,174 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #145 but with non-overlapping pooling
+# on guppy9
+# logs/layers-147.log
+# /nobackup/kriz/tmp/ConvNet__2012-10-07_23.42.30
+# epoch 23: set epsw to 0.001 from 0.01
+# epoch 51: set epsw to 0.0001 from 0.001
+# epoch 79: set epsw to 0 on conv1, 0.00001 elsewhere
+# epoch 90: killed
+# validation multiview:
+# logprob:  1.757644, 0.410580, 0.185100 
diff --git a/layers/layer-params-147.cfg.save b/layers/layer-params-147.cfg.save
new file mode 100644
index 0000000..efbda80
--- /dev/null
+++ b/layers/layer-params-147.cfg.save
@@ -0,0 +1,169 @@
+ano[conv1a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #145 but with non-overlapping pooling
+# on guppy9
+# logs/layers-147.log
+# /nobackup/kriz/tmp/ConvNet__2012-10-07_23.42.30
+# epoch 23: set epsw to 0.001 from 0.01
diff --git a/layers/layer-params-148.cfg b/layers/layer-params-148.cfg
new file mode 100644
index 0000000..ff75786
--- /dev/null
+++ b/layers/layer-params-148.cfg
@@ -0,0 +1,148 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+# this is like #145 but without normalization layers
+# on guppy9
+# logs/layers-148.log
+# /nobackup/kriz/tmp/ConvNet__2012-11-02_23.33.30
+# epoch 22: set epsw to 0.001 from 0.01
+# epoch 44: set epsw to 0.0001 from 0.001
+# epoch 69: set epsw to 0.00001 from 0.0001 on conv1, set color noise to 0 from 0.1
+# epoch 73: set epsw to 0 from 0.00001 on conv1
+# epoch 86: set epsw to 0.00001 from 0.0001
+# epoch 97: killed
+# validation multiview:
+# logprob:  1.822358, 0.420340, 0.193620 (1.4% top-1 worse than 145)
diff --git a/layers/layer-params-149.cfg b/layers/layer-params-149.cfg
new file mode 100644
index 0000000..71c32f4
--- /dev/null
+++ b/layers/layer-params-149.cfg
@@ -0,0 +1,182 @@
+[conv1a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[rnorm5a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm5b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+# this is like #145 but with rnorm over conv5
+# on guppy
+# /nobackup_a/kriz/tmp/ConvNet__2012-11-13_23.21.47
+# logs/layers-149.log
+# epoch 21: set epsw to 0.001 from 0.01
+# epoch 54: set epsw to 0.0001 from 0.001
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1
+#           set color noise to 0 from 0.1
diff --git a/layers/layer-params-150.cfg b/layers/layer-params-150.cfg
new file mode 100644
index 0000000..b7faa6e
--- /dev/null
+++ b/layers/layer-params-150.cfg
@@ -0,0 +1,180 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv3b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc4096a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc4096b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048ba]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048bb]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #145 but with no column communication, trained on 2012
+# guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-11-20_01.25.08
+# logs/layers-150.log
+# moved to gpu
+# epoch 25: set epsw to 0.001 from 0.01
+# epoch 49: set epsw to 0.0001 from 0.001
+# epoch 50: move back to guppy9
+# epoch 66: set epsw to 0.00001 from 0.0001 on conv1
+#           set color noise to 0 from 0.1
+# epoch 72: set epsw to 0 on conv1
+# epoch 79: set epsw to 0.00001 from 0.0001
+# epoch 92: killed
+# validation multiview: 
+# logprob:  1.811173, 0.418280, 0.193300 
+
diff --git a/layers/layer-params-153.cfg b/layers/layer-params-153.cfg
new file mode 100644
index 0000000..1f9aec1
--- /dev/null
+++ b/layers/layer-params-153.cfg
@@ -0,0 +1,184 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=linear[1000]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=linear[1000]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=linear[1000]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=linear[1000]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=linear[1000]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=linear[1000]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=linear[1000]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=linear[1000]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=linear[1000]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=linear[1000]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=linear[1000]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=linear[1000]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=linear[1000]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=linear[1000]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=linear[1000]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #145 but with linear learning rate schedule
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-12-18_22.39.10
+# logs/layers-153.log
+# epoch 18: killed. i realized linear learning rate schedule is completely mental.
diff --git a/layers/layer-params-154.cfg b/layers/layer-params-154.cfg
new file mode 100644
index 0000000..bc36108
--- /dev/null
+++ b/layers/layer-params-154.cfg
@@ -0,0 +1,372 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[1000]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[1000]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[1000]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[1000]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[1000]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[1000]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[1000]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[1000]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with exp learning rate schedule
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-12-18_23.40.29
+# logs/layers-154.log
+# moved to gpu
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[1000]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[1000]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[1000]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[1000]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[1000]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[1000]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[1000]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[1000]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[1000]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with exp learning rate schedule
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-12-18_23.40.29
+# logs/layers-154.log
+# moved to gpu
+# /storage/tmp/ConvNet__2012-12-18_23.40.29
+# i think something got corrupted
+# resuming from epoch 10 on guppy7
+# /nobackup/kriz/tmp/ConvNet__2012-12-18_23.40.29
diff --git a/layers/layer-params-155.cfg b/layers/layer-params-155.cfg
new file mode 100644
index 0000000..b6177d6
--- /dev/null
+++ b/layers/layer-params-155.cfg
@@ -0,0 +1,190 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[4000]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[4000]
+
+[conv2a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[4000]
+
+[conv2b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[4000]
+
+[conv3a]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[4000]
+
+[conv3b]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[4000]
+
+[conv4a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[4000]
+
+[conv4b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[4000]
+
+[conv5a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[4000]
+
+[conv5b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[4000]
+
+[fc2048a]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[4000]
+
+[fc2048b]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[4000]
+
+[fc2048ba]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[4000]
+
+[fc2048bb]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[4000]
+
+[fc1000]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[4000]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with exp learning rate schedule
+# its also like #154, but with learning rates in the range 0.02 to 0.000005
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-12-19_22.13.12
+# logs/layers-155.log
+# epoch 61: set color noise to 0 from 0.1
+# epoch 74: set epsw to 0 from 0.02 on conv1
+# validation:
+# logprob:  1.861853, 0.426530, 0.199652
+# validation multiview:
+# logprob:  1.750063, 0.407440, 0.185240 
diff --git a/layers/layer-params-156.cfg b/layers/layer-params-156.cfg
new file mode 100644
index 0000000..b2971c7
--- /dev/null
+++ b/layers/layer-params-156.cfg
@@ -0,0 +1,190 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[2000]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[2000]
+
+[conv2a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[2000]
+
+[conv2b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[2000]
+
+[conv3a]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[2000]
+
+[conv3b]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[2000]
+
+[conv4a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[2000]
+
+[conv4b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[2000]
+
+[conv5a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[2000]
+
+[conv5b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[2000]
+
+[fc2048a]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[2000]
+
+[fc2048b]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[2000]
+
+[fc2048ba]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[2000]
+
+[fc2048bb]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[2000]
+
+[fc1000]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[2000]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with exp learning rate schedule
+# its also like #154 and #155, but with learning rates in the range 0.02 to 0.00001
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2012-12-20_01.29.32
+# logs/layers-156.log
+# epoch 61: set color noise to 0 from 0.1
+# epoch 72: set epsw to 0 from 0.02 on conv1
+# validation:
+# logprob:  1.870253, 0.428933, 0.198336 
+# validation multiview: 
+# logprob:  1.751178, 0.407640, 0.183500 
diff --git a/layers/layer-params-157.cfg b/layers/layer-params-157.cfg
new file mode 100644
index 0000000..d40a3b9
--- /dev/null
+++ b/layers/layer-params-157.cfg
@@ -0,0 +1,188 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[8000]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[8000]
+
+[conv2a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[8000]
+
+[conv2b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[8000]
+
+[conv3a]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[8000]
+
+[conv3b]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[8000]
+
+[conv4a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[8000]
+
+[conv4b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[8000]
+
+[conv5a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[8000]
+
+[conv5b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[8000]
+
+[fc2048a]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[8000]
+
+[fc2048b]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[8000]
+
+[fc2048ba]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[8000]
+
+[fc2048bb]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[8000]
+
+[fc1000]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[8000]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with exp learning rate schedule
+# its also like #155, but with learning rates in the range 0.04 to 0.000005
+# on guppy8
+# logs/layers-157.log
+# /nobackup/kriz/tmp/ConvNet__2012-12-23_02.12.31
+# epoch 62: set color noise to 0 from 0.1
+# epoch 73: set conv1 epsw to 0 from 0.04
+# valid:            logprob:  1.880485, 0.431177, 0.203271 
+# multiview valid:  logprob:  1.767696, 0.411140, 0.187040 
diff --git a/layers/layer-params-158.cfg b/layers/layer-params-158.cfg
new file mode 100644
index 0000000..51d092f
--- /dev/null
+++ b/layers/layer-params-158.cfg
@@ -0,0 +1,189 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[20000]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[20000]
+
+[conv2a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[20000]
+
+[conv2b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=exp[20000]
+
+[conv3a]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[20000]
+
+[conv3b]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[20000]
+
+[conv4a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[20000]
+
+[conv4b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[20000]
+
+[conv5a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[20000]
+
+[conv5b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=exp[20000]
+
+[fc2048a]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[20000]
+
+[fc2048b]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[20000]
+
+[fc2048ba]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[20000]
+
+[fc2048bb]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[20000]
+
+[fc1000]
+epsW=0.04,0.04
+epsB=0.08
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=exp[20000]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with exp learning rate schedule
+# its also like #155, but with learning rates in the range 0.04 to 0.000002
+# on guppy8
+# logs/layers-158.log
+# /nobackup/kriz/tmp/ConvNet__2012-12-23_17.34.48
+# epoch 63: set color noise to 0 from 0.1
+# epoch  77: set epsw to 0 from 0.04 on conv1
+# validation:           logprob:  1.862656, 0.428884, 0.199910 
+# validation multiview: logprob:  1.757155, 0.410260, 0.185380 
+
diff --git a/layers/layer-params-160.cfg b/layers/layer-params-160.cfg
new file mode 100644
index 0000000..c816838
--- /dev/null
+++ b/layers/layer-params-160.cfg
@@ -0,0 +1,187 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[2000,4]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[2000,4]
+
+[conv2a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[2000,4]
+
+[conv2b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[2000,4]
+
+[conv3a]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[2000,4]
+
+[conv3b]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[2000,4]
+
+[conv4a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[2000,4]
+
+[conv4b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[2000,4]
+
+[conv5a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[2000,4]
+
+[conv5b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[2000,4]
+
+[fc2048a]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[2000,4]
+
+[fc2048b]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[2000,4]
+
+[fc2048ba]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[2000,4]
+
+[fc2048bb]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[2000,4]
+
+[fc1000]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[2000,4]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with dexp learning rate schedule
+# its also like 159 but with rates in the range 0.02 to 0.00001
+# on guppy7
+# logs/layers-160.log
+# /nobackup/kriz/tmp/ConvNet__2012-12-24_17.07.46
+# epoch 61: set color noise to 0 from 0.1
+# validation:              1.884187, 0.433855, 0.205452 
+# validation multiview: :  1.789202, 0.413740, 0.190360 
diff --git a/layers/layer-params-161.cfg b/layers/layer-params-161.cfg
new file mode 100644
index 0000000..8661a8d
--- /dev/null
+++ b/layers/layer-params-161.cfg
@@ -0,0 +1,183 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=jdexp[1000,4]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=jdexp[1000,4]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=jdexp[1000,4]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=jdexp[1000,4]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=jdexp[1000,4]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=jdexp[1000,4]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=jdexp[1000,4]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=jdexp[1000,4]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=jdexp[1000,4]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=jdexp[1000,4]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=jdexp[1000,4]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=jdexp[1000,4]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=jdexp[1000,4]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=jdexp[1000,4]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=jdexp[1000,4]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with jumpy dexp learning rate schedule
+# on guppy7
+# logs/layers-161.log
+# (guppy7 is dead for now so doing nothing)
diff --git a/layers/layer-params-162.cfg b/layers/layer-params-162.cfg
new file mode 100644
index 0000000..8e49811
--- /dev/null
+++ b/layers/layer-params-162.cfg
@@ -0,0 +1,187 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[1000,3]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[1000,3]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[1000,3]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[1000,3]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,3]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,3]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[1000,3]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[1000,3]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[1000,3]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[1000,3]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,3]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,3]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,3]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,3]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,3]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with dexp learning rate schedule
+# its also like 159 but with 3 levels of learning rates in the range 0.01 to 0.00001
+# on guppy9
+# logs/layers-162.log
+# /nobackup/kriz/tmp/ConvNet__2012-12-25_22.41.00
+# epoch 61: set color noise to 0 from 0.1
+# validation: logprob:  1.894451, 0.438533, 0.207935 
+# validation multiview: ah screw it, it'll suck
diff --git a/layers/layer-params-163.cfg b/layers/layer-params-163.cfg
new file mode 100644
index 0000000..a01b9b9
--- /dev/null
+++ b/layers/layer-params-163.cfg
@@ -0,0 +1,188 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[1000,4]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[1000,4]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[1000,4]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+sched=dexp[1000,4]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,4]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+sched=dexp[1000,4]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[1000,4]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[1000,4]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[1000,4]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+sched=dexp[1000,4]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+sched=dexp[1000,4]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+sched=dexp[1000,4]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+sched=dexp[1000,4]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+sched=dexp[1000,4]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+sched=dexp[1000,4]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with dexp learning rate schedule
+# its also like 159 but with 2x the wc on fc layers
+# on guppy9
+# logs/layers-163.log
+# /nobackup/kriz/tmp/ConvNet__2012-12-26_01.15.38
+# epoch 61: set color noise to 0 from 0.1
+# epoch 73: set conv1 epsw to 0 from 0.01
+# validation: logprob:  1.849131, 0.429085, 0.199072 
+# validation multiview: ah screw it, it'll suck
diff --git a/layers/layer-params-165.cfg b/layers/layer-params-165.cfg
new file mode 100644
index 0000000..987362d
--- /dev/null
+++ b/layers/layer-params-165.cfg
@@ -0,0 +1,204 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.001,0.001
+wball=0,0
+schedW=dexp[500,4]
+schedB=dexp[10,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #153 (so uses same file) but with dexp learning rate schedule
+# its also like 163 but final learning rate is 0.00002 instead of 0.00001
+# on guppy7
+# logs/layers-165.log
+# /nobackup/kriz/tmp/ConvNet__2012-12-30_18.42.56
+# NOTE: performance to be compared with 163
+# epoch 63: set color noise to 0 from 0.1
+# epoch 78: set conv1 epsw to 0 from 0.01
+#           logprob:  1.847919, 0.427840, 0.198452 
+# multiview logprob:  1.757196, 0.409820, 0.183920 
diff --git a/layers/layer-params-166.cfg b/layers/layer-params-166.cfg
new file mode 100644
index 0000000..adc8750
--- /dev/null
+++ b/layers/layer-params-166.cfg
@@ -0,0 +1,204 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #159 but with separate schedb and also it puts the 2nd layer normalization *below* relu
+# on guppy9
+# logs/layers-166.log
+# /storage/tmp/ConvNet__2012-12-23_19.38.31
+# 
+# epoch 64: set color noise to 0 from 0.1
+# epoch 73: set epsw conv1 to 0 from 0.01
+#           logprob:  1.863109, 0.428586, 0.201039 
+# multiview logprob:  1.768124, 0.410960, 0.186740 
+
diff --git a/layers/layer-params-167.cfg b/layers/layer-params-167.cfg
new file mode 100644
index 0000000..dff9c9c
--- /dev/null
+++ b/layers/layer-params-167.cfg
@@ -0,0 +1,187 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[1000,4]
+schedB=dexp[10,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+# this is like #153 but with dexp learning rate schedule
+# also deletes rnorm over conv2
+# on guppy8
+# logs/layers-167.log
+# /nobackup/kriz/tmp/ConvNet__2013-01-03_23.39.35
+# epoch 63: set color noise to 0 from 0.1
+# epoch 73: set conv1 epsw to 0 from 0.01
+#           logprob:  1.851845, 0.426772, 0.197590 
+# multiview logprob:  1.738715, 0.404880, 0.181180 
+
+
diff --git a/layers/layer-params-169.cfg b/layers/layer-params-169.cfg
new file mode 100644
index 0000000..cfa2b7d
--- /dev/null
+++ b/layers/layer-params-169.cfg
@@ -0,0 +1,212 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #159 (so uses same def file: 153) but with learning rate decaying to 0.00002
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2013-01-07_04.03.15
+# logs/layers-169.log
+# epoch 60: set color noise to 0 from 0.1
+# epoch 73: set conv1 epsw to 0 from 0.01
+#           logprob:  1.841218, 0.425298, 0.195489 
+# multiview logprob:  1.735123, 0.405960, 0.181120 
+
+# now lets run this for 80 epochs instead of 95:
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2013-01-13_15.31.51
+# logs/layers-169-80.log
+# epoch 46: paused for experiments with dropout in conv layers
+# epoch 53: set color noise to 0 from 0.1
+# epoch 74: set epsw to 0 from 0.01 on conv1
+# epoch 80: killed :(
+# logprob:  1.866277, 0.429784, 0.200178
diff --git a/layers/layer-params-170-256-0.015.cfg b/layers/layer-params-170-256-0.015.cfg
new file mode 100644
index 0000000..d7ea526
--- /dev/null
+++ b/layers/layer-params-170-256-0.015.cfg
@@ -0,0 +1,201 @@
+[conv1a]
+epsW=0.015
+epsB=0.03
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.015
+epsB=0.03
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.015
+epsB=0.03
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.015
+epsB=0.03
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.015,0.015
+epsB=0.03
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.015,0.015
+epsB=0.03
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.015
+epsB=0.03
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.015
+epsB=0.03
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.015
+epsB=0.03
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.015
+epsB=0.03
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.015,0.015
+epsB=0.03
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.015,0.015
+epsB=0.03
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.015,0.015
+epsB=0.03
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.015,0.015
+epsB=0.03
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.015,0.015
+epsB=0.03
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is 170 but running with minibatch 256 on krunch, also setting eps to 0.015:
+# use def file 153
+# not doing fading
+# krunch
+# /nobackup/kriz/tmp/ConvNet__2013-02-10_10.41.07
+# logs/layers-170-256-0.015.log
+# epoch 25: killed to do 4gpu experiments
diff --git a/layers/layer-params-170-256-double.cfg b/layers/layer-params-170-256-double.cfg
new file mode 100644
index 0000000..8bc8778
--- /dev/null
+++ b/layers/layer-params-170-256-double.cfg
@@ -0,0 +1,203 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.02,0.02
+epsB=0.04
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is 170 but running with minibatch 256 on krunch, also doubling eps to compensate for big batch size:
+# use def file 153
+# not doing fading
+# /nobackup/kriz/tmp/ConvNet__2013-02-05_13.04.30
+# logs/layers-170-256-double-eps.log
+# epoch 61: set color noise to 0 from 0.1
+# epoch 73: set conv1 epsw to 0 from 0.02
+# after 84 epochs: logprob:  1.840643, 0.422726, 0.196598
+# this matches #170 exactly, so im killing it to run 4gpu experiments
diff --git a/layers/layer-params-170-256.cfg b/layers/layer-params-170-256.cfg
new file mode 100644
index 0000000..ddebd98
--- /dev/null
+++ b/layers/layer-params-170-256.cfg
@@ -0,0 +1,199 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is 170 but running with minibatch 256 on krunch:
+# use def file 153
+# not doing fading
+# /nobackup/kriz/tmp/ConvNet__2013-02-05_12.50.10
+# logs/layers-170-256.log
diff --git a/layers/layer-params-170-4gpu-exp.cfg b/layers/layer-params-170-4gpu-exp.cfg
new file mode 100644
index 0000000..6f4cb78
--- /dev/null
+++ b/layers/layer-params-170-4gpu-exp.cfg
@@ -0,0 +1,412 @@
+[conv1a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv1b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv1c]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv1d]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv2a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv2b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv2c]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv2d]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv3a]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv3b]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv3c]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv3d]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv4a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv4b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv4c]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv4d]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv5a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv5b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv5c]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv5d]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024a]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024b]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024c]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024d]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024ba]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024bb]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024bc]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024bd]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1000a]
+epsW=0.04,0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1000b]
+epsW=0.04,0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1000c]
+epsW=0.04,0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1000d]
+epsW=0.04,0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like 170-4gpu but with exp leraning rate
+# uses config file #153-4gpu
+# on krunch
+# logs/layers-170-4gpu-exp.log
+# /nobackup/kriz/tmp/ConvNet__2013-02-24_23.19.05
+# epoch 14: moved to guppy9
+# /nobackup/kriz/tmp/ConvNet__2013-02-24_23.19.05
+# killed, exp is bad
diff --git a/layers/layer-params-170-4gpu.cfg b/layers/layer-params-170-4gpu.cfg
new file mode 100644
index 0000000..3620190
--- /dev/null
+++ b/layers/layer-params-170-4gpu.cfg
@@ -0,0 +1,416 @@
+[conv1a]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=default
+schedB=default
+
+[conv1b]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=default
+schedB=default
+
+[conv1c]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=default
+schedB=default
+
+[conv1d]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=default
+schedB=default
+
+[conv2a]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=default
+schedB=default
+
+[conv2b]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=default
+schedB=default
+
+[conv2c]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=default
+schedB=default
+
+[conv2d]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=default
+schedB=default
+
+[conv3a]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[conv3b]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[conv3c]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[conv3d]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[conv4a]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=default
+schedB=default
+
+[conv4b]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=default
+schedB=default
+
+[conv4c]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=default
+schedB=default
+
+[conv4d]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=default
+schedB=default
+
+[conv5a]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=default
+schedB=default
+
+[conv5b]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=default
+schedB=default
+
+[conv5c]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=default
+schedB=default
+
+[conv5d]
+epsW=0.00008
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=default
+schedB=default
+
+[fc1024a]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[fc1024b]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[fc1024c]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[fc1024d]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[fc1024ba]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[fc1024bb]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[fc1024bc]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[fc1024bd]
+epsW=0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=default
+schedB=default
+
+[fc1000a]
+epsW=0.00008,0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=default
+schedB=default
+
+[fc1000b]
+epsW=0.00008,0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=default
+schedB=default
+
+[fc1000c]
+epsW=0.00008,0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=default
+schedB=default
+
+[fc1000d]
+epsW=0.00008,0.00008,0.00008,0.00008
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=default
+schedB=default
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# uses config file #153-4gpu
+# on krunch
+# /nobackup/kriz/tmp/ConvNet__2013-02-07_16.20.22
+# logs/layers-170-4gpu.log
+# epoch 5: set mini to 256 from 128, epsw to 0.02 from 0.01
+# epoch 7: set epsw to 0.003 from 0.02
+# epoch 35: made backup to .bak
+#           set epsw to 0.0005 from 0.003
+# epoch 55: it seems strangely bad, restarting from 35
+# epoch 49: set color noise 0 from 0.1, set epsw to 0.00008 from 0.0005 on conv1
+# epoch 52: set epsw to 0.00008 from 0.0005 everywhere, set epsw to 0 from 0.00008 on conv1
+# epoch 63: killed.. its no better than 146
diff --git a/layers/layer-params-170-quant.cfg b/layers/layer-params-170-quant.cfg
new file mode 100644
index 0000000..05f88e0
--- /dev/null
+++ b/layers/layer-params-170-quant.cfg
@@ -0,0 +1,234 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[pool2a]
+quantF=half
+quantB=half
+
+[pool2b]
+quantF=half
+quantB=half
+
+[pool3a]
+quantF=half
+quantB=half
+
+[pool3b]
+quantF=half
+quantB=half
+
+[hs1a]
+quantF=half
+quantB=half
+
+[hs1b]
+quantF=half
+quantB=half
+
+[hs2a]
+quantF=half
+quantB=half
+
+[hs2b]
+quantF=half
+quantB=half
+
+# this is like #170 (def file 153) but uses half quantization on all communicated layers
+# logs/layers-170-quant.log
+# /nobackup/kriz/tmp/ConvNet__2013-02-21_22.33.24
+# guppy9
+# epoch 68: set color noise to 0 from 0.1
+# epoch 74: set epsw conv1 to 0 from 0.01
+#           logprob:  1.861123, 0.425700, 0.199970 
+# multiview logprob:  1.749848, 0.405240, 0.184160 
diff --git a/layers/layer-params-170.cfg b/layers/layer-params-170.cfg
new file mode 100644
index 0000000..c2bd390
--- /dev/null
+++ b/layers/layer-params-170.cfg
@@ -0,0 +1,242 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #159 (so uses same def file: 153) but with learning rate decaying to 0.00004
+# not starting this because its first ~24 epochs are gonna be completley identical to 169, so might as well wait for that one
+# 169 backup at 23 epochs: /nobackup/kriz/tmp/ConvNet__2013-01-07_04.03.15.bak (guppy9)
+# on guppy8
+# /nobackup/kriz/tmp/ConvNet__2013-01-07_04.03.15.layer-170
+# logs/layers-170.log
+# epoch 68: set color noise to 0 from 0.1
+#NOTE: free gpus 2,3 after done
+# epoch 74: set conv1 epsw to 0 from 0.01
+#           logprob:  1.838933, 0.423407, 0.195827 
+# multiview logprob:  1.730076, 0.405280, 0.180480 
+
+# resuming for 55 more epochs, this time adding in the fade-in of images with prob 5%:
+# /nobackup/kriz/tmp/ConvNet__2013-01-07_04.03.15.layer-170.fade
+# guppy8
+# logs/layers-170-contfade.log
+# epoch 107: set epochs to 300 to increase learning rate, also turned on 0.1
+# epoch 127: set epsw to  0.0025148669 from 0.01, set schedw to default from dexp[250,0,4]
+# epoch 127: set epsw to 0.00063245555 from 0.0025148669
+# epoch 127: set epsw to 0.0015874011 -- which is the correct one for thi slevel
+# epoch 127: set epsw to 0.00025198421
+# epoch 137: set color noise to 0 from 0.1, also turned off fade-in
+
+# restart from scratch with fade:
+# logs/layers-170-fade.log
+# /nobackup/kriz/tmp/ConvNet__2013-01-31_18.52.37
+# guppy8
+# epoch 61: set color noise to 0 from 0.1
+# epoch 76: set conv1 epsw to 0 from 0.01
+# logprob:  1.831731, 0.422944, 0.195334 
+# multiview: logprob:  1.726111, 0.402500, 0.181160 
+
+# now with fade probability 0.15 instead of 0.05:
+# krunch
+# logs/layers-170-fade-0.15.log
+# /nobackup/kriz/tmp/ConvNet__2013-02-06_08.27.55
+# moving to guppy9
+# epoch 68: set color noise to 0 from 0.1
+# epoch 74: set conv1 epsw to 0 from 0.1, set fade prob to 0.05 from 0.15
+# logprob:  1.836181, 0.425008, 0.196351 
+
+# restart normal 170 but on 2 gpus which can't talk to each other -- just to make sure it'll work fine, and to measure the effect of quantization (#170-quant)
+# guppy9
+# logs/layers-170-gpu-1-2.log
+# /nobackup/kriz/tmp/ConvNet__2013-02-23_12.16.26
+# epoch 60: set color noise to 0 from 0.1
+# epoch 63: moved to guppy5
+# /nobackup_a/kriz/tmp/ConvNet__2013-02-23_12.16.26/
diff --git a/layers/layer-params-171.cfg b/layers/layer-params-171.cfg
new file mode 100644
index 0000000..49d2457
--- /dev/null
+++ b/layers/layer-params-171.cfg
@@ -0,0 +1,202 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[500,5]
+schedB=dexp[10,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #169 (so uses same def file: 153) but with 5 epsw levels
+# on guppy7
+# /nobackup/kriz/tmp/ConvNet__2013-01-07_04.07.16
+# logs/layers-171.log
+# epoch 60: set color noise to 0 from 0.1
+# epoch 72: set conv1 epsw to 0 from 0.01
+#           logprob:  1.843676, 0.423356, 0.197664 
+# multiview logprob:  1.735358, 0.404200, 0.181400 
diff --git a/layers/layer-params-172.cfg b/layers/layer-params-172.cfg
new file mode 100644
index 0000000..4e1f505
--- /dev/null
+++ b/layers/layer-params-172.cfg
@@ -0,0 +1,202 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[1000,4]
+schedB=dexp[10,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #159 (so uses same def file: 153) but with jdexp
+# on guppy9
+# /nobackup/kriz/tmp/ConvNet__2013-01-07_06.07.30
+# logs/layers-172.log
+# epoch 60: set color noise to 0 from 0.1
+# epoch 64: made backup to /nobackup/kriz/tmp/ConvNet__2013-01-07_06.07.30.bak
+# epoch 76: set conv1 epsw to 0 from 0.01
+# epoch 84: killed, its at 1.898
diff --git a/layers/layer-params-174.cfg b/layers/layer-params-174.cfg
new file mode 100644
index 0000000..0442287
--- /dev/null
+++ b/layers/layer-params-174.cfg
@@ -0,0 +1,209 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[500,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #159 (so uses same def file: 153) but with jdexp
+# this is also like 172, which uses jdexp, but here i'm usign the kind of jdexp that jumps to geometric mean instead of previous level (and also 172 decayed learning rate by factor of 1000 instead of 500)
+# its also like 169, in that it decays learning rate to 0.00002, but this one's jumpy. so it should be compared to 169.
+# on gpu
+# initialized from 24 epochs of #169
+# logs/layers-174.log
+# /storage/tmp/ConvNet__2013-01-07_04.03.15.layers-174
+# epoch 58: made backup to /storage/tmp/ConvNet__2013-01-07_04.03.15.layers-174.bak
+# epoch 62: set color noise to 0 from 0.1
+# epoch 74: set conv1 epsw to 0 from 0.01
+# epoch 86: killed for noisy epsw experiments
+# moved to guppy9
+#           logprob:  1.825121, 0.425222, 0.193946 
+# multiview logprob:  1.729982, 0.404380, 0.180420 
+
diff --git a/layers/layer-params-175.cfg b/layers/layer-params-175.cfg
new file mode 100644
index 0000000..884b763
--- /dev/null
+++ b/layers/layer-params-175.cfg
@@ -0,0 +1,202 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #169 (so uses same def file: 153) but with learning rate decaying to 0.00008
+# on guppy8
+# logs/layers-175.log
+# /nobackup/kriz/tmp/ConvNet__2013-01-07_04.03.15.layers-175
+# epoch 60: set color noise to 0 from 0.1
+# epoch 75: set conv1 epsw to 0 from 0.01
+# epoch 91: changed schedw factor to 250 from 125
+# logprob:  1.842557, 0.425160, 0.194863
diff --git a/layers/layer-params-177.cfg b/layers/layer-params-177.cfg
new file mode 100644
index 0000000..b2dd38f
--- /dev/null
+++ b/layers/layer-params-177.cfg
@@ -0,0 +1,210 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[hsconv4a]
+enable=true
+
+[hsconv4b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #170 but with dropout over conv4
+# /nobackup/kriz/tmp/ConvNet__2013-01-16_11.40.03
+# on guppy9
+# logs/layers-177.log
+# epoch 60: set color noise to 0 from 0.1
+# epoch 73: set epsw to 0 from 0.01 on conv1
+# epoch 73: made backup to /nobackup/kriz/tmp/ConvNet__2013-01-16_11.40.03.bak
+#           consider using this backup to see what happens if i turn off conv dropout
+#           logprob:  1.836304, 0.424376, 0.197056 
+# multiview logprob:  1.744414, 0.406900, 0.183040 
diff --git a/layers/layer-params-178.cfg b/layers/layer-params-178.cfg
new file mode 100644
index 0000000..02d27f7
--- /dev/null
+++ b/layers/layer-params-178.cfg
@@ -0,0 +1,215 @@
+[conv1a]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.0
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=dexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[pool2a]
+doMax=true
+
+[pool2b]
+doMax=true
+
+# this is like #170 but with pool2 now rand instead of max
+# on guppy8
+# logs/layers-178.log
+# /nobackup/kriz/tmp/ConvNet__2013-01-19_01.39.21
+# epoch 68: set color noise to 0 from 0.1
+# epoch 69: set pool2a,pool2b domax=true from false
+# 1.85 :(
+
+# restart from 31, turning off rnadomness
+# logs/layers-178-domax31.log
+# /nobackup/kriz/tmp/ConvNet__2013-01-19_01.39.21.restart-31/
+# epoch 60: set color noise to 0 from 0.1
+# epoch 75: set epsw conv1 to 0 from 0.01
+# logprob:  1.845021, 0.424038, 0.197017 
+
diff --git a/layers/layer-params-180.cfg b/layers/layer-params-180.cfg
new file mode 100644
index 0000000..e51027d
--- /dev/null
+++ b/layers/layer-params-180.cfg
@@ -0,0 +1,202 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+schedW=jdexp[250,0,4]
+schedB=dexp[10,0,2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+# this is like #159 (so uses same def file: 153) but with jdexp
+# this is also like 174, which uses jdexp, but 172 decayed learning rate by factor of 500 instead of 250
+# so this one should be compared to 174 (which decays learning rate less) and 170 (which decays learning rate the same, but is not jumpy)
+# on gpu
+# initialized from 24 epochs of #169
+# logs/layers-180.log
+# /storage/tmp/ConvNet__2013-01-07_04.03.15.layers-180
+
diff --git a/layers/layer-params-183-4gpu-26epc.cfg b/layers/layer-params-183-4gpu-26epc.cfg
new file mode 100644
index 0000000..eee7423
--- /dev/null
+++ b/layers/layer-params-183-4gpu-26epc.cfg
@@ -0,0 +1,451 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3a]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3b]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3c]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3d]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6a]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6b]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6c]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6d]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024a]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024b]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024c]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024d]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024ba]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bb]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bc]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bd]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000a]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000b]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000c]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000d]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like 170-4gpu but with 6th conv layer and also 48 low-level filters
+# this one is also meant to be trained for 26 epochs and with mini 128 -- lets see what happens
+# on guppy9
+# logs/layers-183-dexp-26epc.log
+# /nobackup/kriz/tmp/ConvNet__2013-03-03_23.17.25
+# # killed in favor of 184
+# this is obvioulsy the wrong net to run -- it has way too many params.
diff --git a/layers/layer-params-183-4gpu-exp.cfg b/layers/layer-params-183-4gpu-exp.cfg
new file mode 100644
index 0000000..0c7509c
--- /dev/null
+++ b/layers/layer-params-183-4gpu-exp.cfg
@@ -0,0 +1,450 @@
+[conv1a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv1b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv1c]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv1d]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv2a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv2b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv2c]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv2d]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv3a]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv3b]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv3c]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv3d]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv4a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv4b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv4c]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv4d]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv5a]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv5b]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv5c]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv5d]
+epsW=0.04
+epsB=0.08
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv6a]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv6b]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv6c]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[conv6d]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024a]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024b]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024c]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024d]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024ba]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024bb]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024bc]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1024bd]
+epsW=0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1000a]
+epsW=0.04,0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1000b]
+epsW=0.04,0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1000c]
+epsW=0.04,0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[fc1000d]
+epsW=0.04,0.04,0.04,0.04
+epsB=0.08
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=exp[tgtFactor=1000,noiseStdev=0]
+schedB=exp[tgtFactor=10,noiseStdev=0]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like 170-4gpu-exp but with 6th conv layer and also 48 low-level filters
+# on krunch
+# /nobackup/kriz/tmp/ConvNet__2013-02-26_20.53.34
+# logs/layers-183.log
+# killed, exp is bad
+
diff --git a/layers/layer-params-183-4gpu.cfg b/layers/layer-params-183-4gpu.cfg
new file mode 100644
index 0000000..104b8be
--- /dev/null
+++ b/layers/layer-params-183-4gpu.cfg
@@ -0,0 +1,450 @@
+[conv1a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1c]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1d]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2c]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2d]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3a]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3b]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3c]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3d]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4c]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4d]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5c]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5d]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6a]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6b]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6c]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6d]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024a]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024b]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024c]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024d]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024ba]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bb]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bc]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bd]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000a]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000b]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000c]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000d]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like 170-4gpu but with 6th conv layer and also 48 low-level filters
+# on krunch
+# logs/layers-183-dexp.log
+# /nobackup/kriz/tmp/ConvNet__2013-03-03_23.14.01
+# killed in favor of 184
+# this is obvioulsy the wrong net to run -- it has way too many params.
diff --git a/layers/layer-params-184-4gpu-26epc.cfg b/layers/layer-params-184-4gpu-26epc.cfg
new file mode 100644
index 0000000..40709c5
--- /dev/null
+++ b/layers/layer-params-184-4gpu-26epc.cfg
@@ -0,0 +1,448 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3a]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3b]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3c]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3d]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5c]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5d]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6a]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6b]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6c]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6d]
+epsW=0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024a]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024b]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024c]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024d]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024ba]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bb]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bc]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bd]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000a]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000b]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000c]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000d]
+epsW=0.01,0.01,0.01,0.01
+epsB=0.02
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like 184 but for 26 epochs
+# guppy9
+# logs/layers-184-26epc.log
+# /nobackup/kriz/tmp/ConvNet__2013-03-04_04.08.39
diff --git a/layers/layer-params-184-4gpu.cfg b/layers/layer-params-184-4gpu.cfg
new file mode 100644
index 0000000..9f42f64
--- /dev/null
+++ b/layers/layer-params-184-4gpu.cfg
@@ -0,0 +1,450 @@
+[conv1a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1c]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv1d]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2c]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv2d]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3a]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3b]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3c]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv3d]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4c]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv4d]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5a]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5b]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5c]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv5d]
+epsW=0.02
+epsB=0.04
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6a]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6b]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6c]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[conv6d]
+epsW=0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024a]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024b]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024c]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024d]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024ba]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bb]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bc]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1024bd]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000a]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000b]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000c]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[fc1000d]
+epsW=0.02,0.02,0.02,0.02
+epsB=0.04
+momW=0.9,0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005,0.0005
+wball=0,0,0,0
+schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4]
+schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2]
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs1c]
+enable=true
+
+[hs1d]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs2b]
+enable=true
+
+[hs2c]
+enable=true
+
+[hs2d]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm1d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2c]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[rnorm2d]
+scale=0.0001
+pow=0.75
+minDiv=2
+
+[cnorm2a]
+scale=0.001
+pow=0.75
+
+[cnorm2b]
+scale=0.001
+pow=0.75
+
+[cnorm2c]
+scale=0.001
+pow=0.75
+
+[cnorm2d]
+scale=0.001
+pow=0.75
+
+# this is like 183 but with half as many top-level conv filters so the # of params is not @#$%&! 
+# also the fc layers are not connected .. fully (not to 3)
+# on krunch
+# 50 epochs
+# logs/layers-184.log
+# /nobackup/kriz/tmp/ConvNet__2013-03-04_04.05.18
diff --git a/layers/layer-params-2009-101.cfg b/layers/layer-params-2009-101.cfg
new file mode 100644
index 0000000..8e7c2a1
--- /dev/null
+++ b/layers/layer-params-2009-101.cfg
@@ -0,0 +1,162 @@
+[conv1a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc10184]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy9
+# this is like #101 (on gpu) but this is trained on imgnet-2009
+# epoch 6: set epsw to 0.001 from 0.01
+# epoch 14: set epsw to 0.0001 from 0.001
+# epoch 19: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 27: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 30: set epsw to 0.00001 from 0.0001
+# epoch 33: killed
+# [3.4620055494832287, 0.69382157140195966, 0.43646610858041701]
diff --git a/layers/layer-params-96-16k.cfg b/layers/layer-params-96-16k.cfg
new file mode 100644
index 0000000..0fa1f7d
--- /dev/null
+++ b/layers/layer-params-96-16k.cfg
@@ -0,0 +1,156 @@
+[conv1a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.001,0.001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy7
+# logs/layers-96-16k.log
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-06-24_02.01.57
+# epoch 5: set epsw to 0.001 from 0.01
+# epoch 6: enabled dropout
diff --git a/layers/layer-params-98-16kinit.cfg b/layers/layer-params-98-16kinit.cfg
new file mode 100644
index 0000000..403ddf6
--- /dev/null
+++ b/layers/layer-params-98-16kinit.cfg
@@ -0,0 +1,164 @@
+[conv1a]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.00
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.00
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy9
+# logs/layers-98-16kinit.log
+# weights initialized from net trained on 16k imgnet for a few epochs: /ais/gobi3/u/kriz/tmp/ConvNet__2012-06-24_02.01.57
+# /nobackup/kriz/tmp/ConvNet__2012-06-25_17.55.06
+# logs/layers-98-16kinit.log
+# epoch 30: set epsw to 0.0001 from 0.001
+# epoch 44: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 51: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 64: set epsw to 0.00001 from 0.0001
+# epoch 71: killed
+# (294, 0.37132068707483007, 0.1679778095238095)
diff --git a/layers/layer-params-99.cfg b/layers/layer-params-99.cfg
new file mode 100644
index 0000000..7c7fe02
--- /dev/null
+++ b/layers/layer-params-99.cfg
@@ -0,0 +1,162 @@
+[conv1a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.005
+
+[conv1b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.005
+
+[conv2a]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0.003,0.003,0.003
+
+[conv2b]
+epsW=0.01,0.01,0.01
+epsB=0.002
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0.003,0.003,0.003
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.01,0.01
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy7
+# this is like #97 (on gpu) but with different rnorm2
+# logs/layers-99.log
+# /nobackup/kriz/tmp/ConvNet__2012-06-26_20.35.00
+# diff rnorm2 doesnt seem to stop conv2 filters from dying
+# now trying wball on conv1, conv2
+# logs/layers-99a.log
+# /nobackup/kriz/tmp/ConvNet__2012-06-26_23.41.56
+# /nobackup/kriz/tmp/ConvNet__2012-06-27_03.57.56
+# lot of filters seem to remain random on conv2
diff --git a/layers/layer-params-flickr-102-inet-init.cfg b/layers/layer-params-flickr-102-inet-init.cfg
new file mode 100644
index 0000000..03c7d78
--- /dev/null
+++ b/layers/layer-params-flickr-102-inet-init.cfg
@@ -0,0 +1,158 @@
+[conv1a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001,0.001
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0.00,0.00,0.00
+
+[conv2b]
+epsW=0.0001,0.0001,0.001
+epsB=0.02
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0.00,0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc10003]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[crossent]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy7
+# this is like #97, but on flickr
+# also initialized from #97 on imgnet
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-06-30_23.41.20
+# epoch 59: set epsw to 0.0001 from 0.001
+# epoch 78: killed because i realized its not really fair 
diff --git a/layers/layer-params-flickr-102.cfg b/layers/layer-params-flickr-102.cfg
new file mode 100644
index 0000000..9fb8611
--- /dev/null
+++ b/layers/layer-params-flickr-102.cfg
@@ -0,0 +1,161 @@
+[conv1a]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.0
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000,0.0000
+epsB=0.0
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0.00,0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000,0.0000
+epsB=0.0
+momW=0.9,0.9,0.9
+momB=0.9
+wc=0.0005,0.0005,0.0005
+wball=0.00,0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc10003]
+epsW=0.00001,0.00001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[crossent]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy7
+# this is like #97, but on flickr
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-06-30_17.00.32
+# epoch 85: set epsw to 0.0001 from 0.001
+# epoch 108: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#            set color noise to 0 from 0.1
+# epoch 120: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 136: set epsw to 0.00001 from 0.0001
+# epoch 162: killed
diff --git a/layers/layer-params-flickr-103.cfg b/layers/layer-params-flickr-103.cfg
new file mode 100644
index 0000000..d861e89
--- /dev/null
+++ b/layers/layer-params-flickr-103.cfg
@@ -0,0 +1,155 @@
+[conv1a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.00005
+wball=0.00
+
+[conv1b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.00005
+wball=0.00
+
+[conv2a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0,0
+
+[conv3b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0,0
+
+[conv4a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.00005
+wball=0
+
+[conv4b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.00005
+wball=0
+
+[conv5a]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.00005
+wball=0
+
+[conv5b]
+epsW=0.01
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.00005
+wball=0
+
+[fc2048a]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0,0
+
+[fc2048b]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0,0
+
+[fc2048ba]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0,0
+
+[fc2048bb]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0,0
+
+[fc10003]
+epsW=0.01,0.01
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.00005,0.00005
+wball=0,0
+
+[rcost]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy9
+# this is like #101, but on flickr, and with robust flickr cost
+# 
diff --git a/layers/layer-params-flickr-105.cfg b/layers/layer-params-flickr-105.cfg
new file mode 100644
index 0000000..056a9e9
--- /dev/null
+++ b/layers/layer-params-flickr-105.cfg
@@ -0,0 +1,156 @@
+[conv1a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.0001
+epsB=0.02
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc10003]
+epsW=0.0001,0.0001
+epsB=0.02
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[sqdiff]
+coeff=1
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy9
+# this is like #101, but on flickr, and with sqdiff objective
+# /ais/gobi3/u/kriz/tmp/ConvNet__2012-07-04_23.30.19
+# epoch 15: set wc to 0.0005 from 0.00005, set epsw to 0.001 from 0.01
+# epoch 93: set epsw to 0.0001 from 0.001
diff --git a/layers/layer-params-inet-5layer-conv94-2gpu.cfg b/layers/layer-params-inet-5layer-conv94-2gpu.cfg
new file mode 100644
index 0000000..72b0764
--- /dev/null
+++ b/layers/layer-params-inet-5layer-conv94-2gpu.cfg
@@ -0,0 +1,164 @@
+[conv1a]
+epsW=0.0000
+epsB=0.000
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv1b]
+epsW=0.0000
+epsB=0.000
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0.00
+
+[conv2a]
+epsW=0.0000,0.0000
+epsB=0.000
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv2b]
+epsW=0.0000,0.0000
+epsB=0.000
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0.00,0.00
+
+[conv3a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv3b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[conv4a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv4b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5a]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[conv5b]
+epsW=0.00001
+epsB=0.002
+momW=0.9
+momB=0.9
+wc=0.0005
+wball=0
+
+[fc2048a]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048b]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048ba]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc2048bb]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[fc1000]
+epsW=0.00001,0.00001
+epsB=0.002
+momW=0.9,0.9
+momB=0.9
+wc=0.0005,0.0005
+wball=0,0
+
+[logprob]
+coeff=1
+topk=5
+
+[hs1a]
+enable=true
+
+[hs2a]
+enable=true
+
+[hs1b]
+enable=true
+
+[hs2b]
+enable=true
+
+[rnorm1a]
+scale=0.0001
+pow=0.75
+
+[rnorm1b]
+scale=0.0001
+pow=0.75
+
+[rnorm2a]
+scale=0.0001
+pow=0.75
+
+[rnorm2b]
+scale=0.0001
+pow=0.75
+
+# on guppy9
+# logs/layers-inet-5layer-conv94-2gpu.log
+# /nobackup/kriz/tmp/ConvNet__2012-06-18_18.34.17
+# logs/layers-inet-5layer-conv94-2gpu.log
+# epoch 13: set epsw to 0.001 from 0.01
+# epoch 46: set epsw to 0.0001 from 0.001
+# epoch 55: set epsw to 0.00001 from 0.0001 on conv1,conv2
+#           set color noise to 0 from 0.1
+# epoch 62: set epsw to 0 from 0.00001 on conv1,conv2
+# epoch 84: set epsw to 0.00001 from 0.0001
+# epoch 90: killed
+# 0.38107167346938753, 0.17608947619047613
diff --git a/layers/layers-100.cfg b/layers/layers-100.cfg
new file mode 100644
index 0000000..f8e56ef
--- /dev/null
+++ b/layers/layers-100.cfg
@@ -0,0 +1,314 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a,rnorm1b
+filters=128,64,64
+padding=0,2,2
+stride=2,1,1
+filterSize=5,5,5
+channels=3,32,32
+initW=0.01,0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+randSparse=0,1,1
+groups=1,2,2
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1a,rnorm1b
+filters=128,64,64
+padding=0,2,2
+stride=2,1,1
+filterSize=5,5,5
+channels=3,32,32
+initW=0.01,0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+randSparse=0,1,1
+groups=1,2,2
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-106.cfg b/layers/layers-106.cfg
new file mode 100644
index 0000000..f6e25a2
--- /dev/null
+++ b/layers/layers-106.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-109.cfg b/layers/layers-109.cfg
new file mode 100644
index 0000000..25bc8b6
--- /dev/null
+++ b/layers/layers-109.cfg
@@ -0,0 +1,340 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc1536a]
+type=fc
+inputs=pool3a,pool3b
+outputs=1536
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1536b]
+type=fc
+inputs=pool3a,pool3b
+outputs=1536
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1536a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1536b
+
+[fc1536ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=1536
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1536bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=1536
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1536ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1536bb
+
+# NEW LAYERS FOR THIS EXPERIMENT
+
+[fc1536ca]
+type=fc
+inputs=hs2a,hs2b
+outputs=1536
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1536cb]
+type=fc
+inputs=hs2b,hs2a
+outputs=1536
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs3a]
+type=hs
+keep=0.5
+inputs=fc1536ca
+
+[hs3b]
+type=hs
+keep=0.5
+inputs=fc1536cb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs3a,hs3b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-110.cfg b/layers/layers-110.cfg
new file mode 100644
index 0000000..7e376ff
--- /dev/null
+++ b/layers/layers-110.cfg
@@ -0,0 +1,286 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[pool2a]
+type=pool
+pool=max
+inputs=conv2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=conv2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-111.cfg b/layers/layers-111.cfg
new file mode 100644
index 0000000..9570a0e
--- /dev/null
+++ b/layers/layers-111.cfg
@@ -0,0 +1,340 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+# NEW LAYERS FOR THIS EXPERIMENT
+
+[fc2048ca]
+type=fc
+inputs=hs2a,hs2b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048cb]
+type=fc
+inputs=hs2b,hs2a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs3a]
+type=hs
+keep=0.5
+inputs=fc2048ca
+
+[hs3b]
+type=hs
+keep=0.5
+inputs=fc2048cb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs3a,hs3b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-112.cfg b/layers/layers-112.cfg
new file mode 100644
index 0000000..8b56dc0
--- /dev/null
+++ b/layers/layers-112.cfg
@@ -0,0 +1,310 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-113.cfg b/layers/layers-113.cfg
new file mode 100644
index 0000000..f9cf6b6
--- /dev/null
+++ b/layers/layers-113.cfg
@@ -0,0 +1,310 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0
+filters=128
+padding=0
+stride=2
+filterSize=5
+channels=3
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0
+filters=128
+padding=0
+stride=2
+filterSize=5
+channels=3
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=rnorm1a,rnorm1b,pool2a,pool2b
+filters=192,192,192,192
+padding=0,0,1,1
+stride=2,2,1,1
+filterSize=3,3,3,3
+channels=32,32,128,128
+initW=0.03,0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=rnorm1a,rnorm1b,pool2a,pool2b
+filters=192,192,192,192
+padding=0,0,1,1
+stride=2,2,1,1
+filterSize=3,3,3,3
+channels=32,32,128,128
+initW=0.03,0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-114.cfg b/layers/layers-114.cfg
new file mode 100644
index 0000000..32a0d73
--- /dev/null
+++ b/layers/layers-114.cfg
@@ -0,0 +1,340 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc3072a]
+type=fc
+inputs=pool3a,pool3b
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc3072b]
+type=fc
+inputs=pool3a,pool3b
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc3072a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc3072b
+
+[fc3072ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc3072bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc3072ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc3072bb
+
+# NEW LAYERS FOR THIS EXPERIMENT
+
+[fc3072ca]
+type=fc
+inputs=hs2a,hs2b
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc3072cb]
+type=fc
+inputs=hs2b,hs2a
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs3a]
+type=hs
+keep=0.5
+inputs=fc3072ca
+
+[hs3b]
+type=hs
+keep=0.5
+inputs=fc3072cb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs3a,hs3b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-115-jpeg.cfg b/layers/layers-115-jpeg.cfg
new file mode 100644
index 0000000..10a9fc5
--- /dev/null
+++ b/layers/layers-115-jpeg.cfg
@@ -0,0 +1,310 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc3072a]
+type=fc
+inputs=pool3a,pool3b
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc3072b]
+type=fc
+inputs=pool3a,pool3b
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc3072a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc3072b
+
+[fc3072ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc3072bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=3072
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc3072ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc3072bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-116.cfg b/layers/layers-116.cfg
new file mode 100644
index 0000000..f5158f3
--- /dev/null
+++ b/layers/layers-116.cfg
@@ -0,0 +1,616 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+# GPU 0
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+# GPU 1
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+# GPU 2
+
+[conv1c]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=2
+
+[pool1c]
+type=pool
+pool=max
+inputs=conv1c
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1c]
+type=cmrnorm
+inputs=pool1c
+channels=32
+size=5
+
+[conv2c]
+type=conv
+inputs=nails0,rnorm1c
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[rnorm2c]
+type=cmrnorm
+inputs=conv2c
+channels=128
+size=5
+
+[pool2c]
+type=pool
+pool=max
+inputs=rnorm2c
+sizeX=3
+stride=2
+channels=128
+
+
+# GPU 3
+
+[conv1d]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=3
+
+[pool1d]
+type=pool
+pool=max
+inputs=conv1d
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1d]
+type=cmrnorm
+inputs=pool1d
+channels=32
+size=5
+
+[conv2d]
+type=conv
+inputs=nails0,rnorm1d
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[rnorm2d]
+type=cmrnorm
+inputs=conv2d
+channels=128
+size=5
+
+[pool2d]
+type=pool
+pool=max
+inputs=rnorm2d
+sizeX=3
+stride=2
+channels=128
+
+# GPU 0
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+# GPU 1
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+# GPU 2
+
+[conv3c]
+type=conv
+inputs=pool2c,pool2d
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv4c]
+type=conv
+inputs=conv3c
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5c]
+type=conv
+inputs=conv4c
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3c]
+type=pool
+pool=max
+inputs=conv5c
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+# GPU 3
+
+[conv3d]
+type=conv
+inputs=pool2c,pool2d
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[conv4d]
+type=conv
+inputs=conv3d
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5d]
+type=conv
+inputs=conv4d
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3d]
+type=pool
+pool=max
+inputs=conv5d
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+# GPU 0
+
+[fc1024a]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1024a
+
+# GPU 1
+
+[fc1024b]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1024b
+
+# GPU 2
+
+[fc1024c]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[hs1c]
+type=hs
+keep=0.5
+inputs=fc1024c
+
+# GPU 3
+
+[fc1024d]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs1d]
+type=hs
+keep=0.5
+inputs=fc1024d
+
+# GPU 0
+
+[fc1024-2a]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1024-2a
+
+# GPU 1
+
+[fc1024-2b]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1024-2b
+
+# GPU 2
+
+[fc1024-2c]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[hs2c]
+type=hs
+keep=0.5
+inputs=fc1024-2c
+
+# GPU 3
+
+[fc1024-2d]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs2d]
+type=hs
+keep=0.5
+inputs=fc1024-2d
+
+# GPU 0
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-117.cfg b/layers/layers-117.cfg
new file mode 100644
index 0000000..756641f
--- /dev/null
+++ b/layers/layers-117.cfg
@@ -0,0 +1,471 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+# GPU 0
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+# GPU 1
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+# GPU 2
+
+[conv1c]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=2
+
+[pool1c]
+type=pool
+pool=max
+inputs=conv1c
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1c]
+type=cmrnorm
+inputs=pool1c
+channels=32
+size=5
+
+[conv2c]
+type=conv
+inputs=nails0,rnorm1c
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[rnorm2c]
+type=cmrnorm
+inputs=conv2c
+channels=128
+size=5
+
+[pool2c]
+type=pool
+pool=max
+inputs=rnorm2c
+sizeX=3
+stride=2
+channels=128
+
+# GPU 0
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b,pool2c
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+# GPU 1
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b,pool2c
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+# GPU 2
+
+[conv3c]
+type=conv
+inputs=pool2a,pool2b,pool2c
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv4c]
+type=conv
+inputs=conv3c
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5c]
+type=conv
+inputs=conv4c
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3c]
+type=pool
+pool=max
+inputs=conv5c
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+# GPU 0
+
+[fc1408a]
+type=fc
+inputs=pool3a,pool3b,pool3c
+outputs=1408
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1408a
+
+# GPU 1
+
+[fc1408b]
+type=fc
+inputs=pool3a,pool3b,pool3c
+outputs=1408
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1408b
+
+# GPU 2
+
+[fc1408c]
+type=fc
+inputs=pool3a,pool3b,pool3c
+outputs=1408
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[hs1c]
+type=hs
+keep=0.5
+inputs=fc1408c
+
+# GPU 0
+
+[fc1408-2a]
+type=fc
+inputs=hs1a,hs1b,hs1c
+outputs=1408
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1408-2a
+
+# GPU 1
+
+[fc1408-2b]
+type=fc
+inputs=hs1a,hs1b,hs1c
+outputs=1408
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1408-2b
+
+# GPU 2
+
+[fc1408-2c]
+type=fc
+inputs=hs1a,hs1b,hs1c
+outputs=1408
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[hs2c]
+type=hs
+keep=0.5
+inputs=fc1408-2c
+
+# GPU 0
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b,hs2c
+initW=0.01,0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-118.cfg b/layers/layers-118.cfg
new file mode 100644
index 0000000..8b56dc0
--- /dev/null
+++ b/layers/layers-118.cfg
@@ -0,0 +1,310 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-120-4gpu.cfg b/layers/layers-120-4gpu.cfg
new file mode 100644
index 0000000..3eaf2e5
--- /dev/null
+++ b/layers/layers-120-4gpu.cfg
@@ -0,0 +1,605 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[conv1c]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=2
+
+[conv1d]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=3
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1c]
+type=pool
+pool=max
+inputs=conv1c
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1d]
+type=pool
+pool=max
+inputs=conv1d
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[rnorm1c]
+type=cmrnorm
+inputs=pool1c
+channels=32
+size=5
+
+[rnorm1d]
+type=cmrnorm
+inputs=pool1d
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=64,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=64,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv2c]
+type=conv
+inputs=nails0,rnorm1c
+filters=64,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv2d]
+type=conv
+inputs=nails0,rnorm1d
+filters=64,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=64
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=64
+size=5
+
+[rnorm2c]
+type=cmrnorm
+inputs=conv2c
+channels=64
+size=5
+
+[rnorm2d]
+type=cmrnorm
+inputs=conv2d
+channels=64
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=64
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=64
+size=5
+
+[cnorm2c]
+type=cnorm
+inputs=rnorm2c
+channels=64
+size=5
+
+[cnorm2d]
+type=cnorm
+inputs=rnorm2d
+channels=64
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=64
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=64
+
+[pool2c]
+type=pool
+pool=max
+inputs=cnorm2c
+sizeX=3
+stride=2
+channels=64
+
+[pool2d]
+type=pool
+pool=max
+inputs=cnorm2d
+sizeX=3
+stride=2
+channels=64
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=64,64
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=64,64
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv3c]
+type=conv
+inputs=pool2c,pool2d
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=64,64
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv3d]
+type=conv
+inputs=pool2c,pool2d
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=64,64
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4c]
+type=conv
+inputs=conv3c
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4d]
+type=conv
+inputs=conv3d
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5c]
+type=conv
+inputs=conv4c
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5d]
+type=conv
+inputs=conv4d
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3c]
+type=pool
+pool=max
+inputs=conv5c
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3d]
+type=pool
+pool=max
+inputs=conv5d
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc1024-1a]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024-1b]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024-1c]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024-1d]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1024-1a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1024-1b
+
+[hs1c]
+type=hs
+keep=0.5
+inputs=fc1024-1c
+
+[hs1d]
+type=hs
+keep=0.5
+inputs=fc1024-1d
+
+[fc1024-2a]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024-2b]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024-2c]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024-2d]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1024-2a
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1024-2b
+
+[hs2c]
+type=hs
+keep=0.5
+inputs=fc1024-2c
+
+[hs2d]
+type=hs
+keep=0.5
+inputs=fc1024-2d
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-120.cfg b/layers/layers-120.cfg
new file mode 100644
index 0000000..b075392
--- /dev/null
+++ b/layers/layers-120.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-121.cfg b/layers/layers-121.cfg
new file mode 100644
index 0000000..b3c3bef
--- /dev/null
+++ b/layers/layers-121.cfg
@@ -0,0 +1,334 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[cnorm1a]
+type=cnorm
+inputs=rnorm1a
+channels=32
+size=5
+
+[cnorm1b]
+type=cnorm
+inputs=rnorm1b
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,cnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,cnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-126.cfg b/layers/layers-126.cfg
new file mode 100644
index 0000000..cf59f25
--- /dev/null
+++ b/layers/layers-126.cfg
@@ -0,0 +1,334 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[href-data]
+type=href
+inputs=data
+channels=3
+gpu=1
+
+[href-nails]
+type=href
+inputs=nails0
+channels=3
+gpu=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=href-data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=href-nails,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-127.cfg b/layers/layers-127.cfg
new file mode 100644
index 0000000..e9aab0b
--- /dev/null
+++ b/layers/layers-127.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a
+filters=256
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2b
+filters=256
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=256
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=256
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-128.cfg b/layers/layers-128.cfg
new file mode 100644
index 0000000..e60fb6b
--- /dev/null
+++ b/layers/layers-128.cfg
@@ -0,0 +1,324 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a,conv3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=192,192
+neuron=relu
+initW=0.03,0.03
+initB=1
+partialSum=13
+sharedBiases=1
+gpu=0
+
+[conv4b]
+type=conv
+inputs=conv3a,conv3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=192,192
+neuron=relu
+initW=0.03,0.03
+initB=1
+partialSum=13
+sharedBiases=1
+gpu=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-129.cfg b/layers/layers-129.cfg
new file mode 100644
index 0000000..ef7a261
--- /dev/null
+++ b/layers/layers-129.cfg
@@ -0,0 +1,605 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[conv1c]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=2
+
+[conv1d]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=3
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1c]
+type=pool
+pool=max
+inputs=conv1c
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1d]
+type=pool
+pool=max
+inputs=conv1d
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[rnorm1c]
+type=cmrnorm
+inputs=pool1c
+channels=32
+size=5
+
+[rnorm1d]
+type=cmrnorm
+inputs=pool1d
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=64,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=64,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv2c]
+type=conv
+inputs=nails0,rnorm1c
+filters=64,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv2d]
+type=conv
+inputs=nails0,rnorm1d
+filters=64,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=64
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=64
+size=5
+
+[rnorm2c]
+type=cmrnorm
+inputs=conv2c
+channels=64
+size=5
+
+[rnorm2d]
+type=cmrnorm
+inputs=conv2d
+channels=64
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=64
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=64
+size=5
+
+[cnorm2c]
+type=cnorm
+inputs=rnorm2c
+channels=64
+size=5
+
+[cnorm2d]
+type=cnorm
+inputs=rnorm2d
+channels=64
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=64
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=64
+
+[pool2c]
+type=pool
+pool=max
+inputs=cnorm2c
+sizeX=3
+stride=2
+channels=64
+
+[pool2d]
+type=pool
+pool=max
+inputs=cnorm2d
+sizeX=3
+stride=2
+channels=64
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=64,64
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=64,64
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv3c]
+type=conv
+inputs=pool2c,pool2d
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=64,64
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv3d]
+type=conv
+inputs=pool2c,pool2d
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=64,64
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4c]
+type=conv
+inputs=conv3c
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4d]
+type=conv
+inputs=conv3d
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=64
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=64
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5c]
+type=conv
+inputs=conv4c
+filters=64
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5d]
+type=conv
+inputs=conv4d
+filters=64
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=64
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=64
+neuron=relu
+
+[pool3c]
+type=pool
+pool=max
+inputs=conv5c
+sizeX=3
+stride=2
+channels=64
+neuron=relu
+
+[pool3d]
+type=pool
+pool=max
+inputs=conv5d
+sizeX=3
+stride=2
+channels=64
+neuron=relu
+
+[fc1024-1a]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024-1b]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024-1c]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024-1d]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1024-1a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1024-1b
+
+[hs1c]
+type=hs
+keep=0.5
+inputs=fc1024-1c
+
+[hs1d]
+type=hs
+keep=0.5
+inputs=fc1024-1d
+
+[fc1024-2a]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024-2b]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024-2c]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024-2d]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1024-2a
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1024-2b
+
+[hs2c]
+type=hs
+keep=0.5
+inputs=fc1024-2c
+
+[hs2d]
+type=hs
+keep=0.5
+inputs=fc1024-2d
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-130.cfg b/layers/layers-130.cfg
new file mode 100644
index 0000000..e7ae336
--- /dev/null
+++ b/layers/layers-130.cfg
@@ -0,0 +1,605 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[conv1c]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=2
+
+[conv1d]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=3
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1c]
+type=pool
+pool=max
+inputs=conv1c
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1d]
+type=pool
+pool=max
+inputs=conv1d
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[rnorm1c]
+type=cmrnorm
+inputs=pool1c
+channels=32
+size=5
+
+[rnorm1d]
+type=cmrnorm
+inputs=pool1d
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv2c]
+type=conv
+inputs=nails0,rnorm1c
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv2d]
+type=conv
+inputs=nails0,rnorm1d
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[rnorm2c]
+type=cmrnorm
+inputs=conv2c
+channels=128
+size=5
+
+[rnorm2d]
+type=cmrnorm
+inputs=conv2d
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[cnorm2c]
+type=cnorm
+inputs=rnorm2c
+channels=128
+size=5
+
+[cnorm2d]
+type=cnorm
+inputs=rnorm2d
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[pool2c]
+type=pool
+pool=max
+inputs=cnorm2c
+sizeX=3
+stride=2
+channels=128
+
+[pool2d]
+type=pool
+pool=max
+inputs=cnorm2d
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv3c]
+type=conv
+inputs=pool2c,pool2d
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv3d]
+type=conv
+inputs=pool2c,pool2d
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4c]
+type=conv
+inputs=conv3c
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4d]
+type=conv
+inputs=conv3d
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=64
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=64
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5c]
+type=conv
+inputs=conv4c
+filters=64
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5d]
+type=conv
+inputs=conv4d
+filters=64
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=64
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=64
+neuron=relu
+
+[pool3c]
+type=pool
+pool=max
+inputs=conv5c
+sizeX=3
+stride=2
+channels=64
+neuron=relu
+
+[pool3d]
+type=pool
+pool=max
+inputs=conv5d
+sizeX=3
+stride=2
+channels=64
+neuron=relu
+
+[fc1024-1a]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024-1b]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024-1c]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024-1d]
+type=fc
+inputs=pool3a,pool3b,pool3c,pool3d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1024-1a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1024-1b
+
+[hs1c]
+type=hs
+keep=0.5
+inputs=fc1024-1c
+
+[hs1d]
+type=hs
+keep=0.5
+inputs=fc1024-1d
+
+[fc1024-2a]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024-2b]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024-2c]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024-2d]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1024-2a
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1024-2b
+
+[hs2c]
+type=hs
+keep=0.5
+inputs=fc1024-2c
+
+[hs2d]
+type=hs
+keep=0.5
+inputs=fc1024-2d
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-131-2009.cfg b/layers/layers-131-2009.cfg
new file mode 100644
index 0000000..5c9e1b8
--- /dev/null
+++ b/layers/layers-131-2009.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=10184
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-131.cfg b/layers/layers-131.cfg
new file mode 100644
index 0000000..bc802e7
--- /dev/null
+++ b/layers/layers-131.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-132.cfg b/layers/layers-132.cfg
new file mode 100644
index 0000000..cd20c36
--- /dev/null
+++ b/layers/layers-132.cfg
@@ -0,0 +1,323 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=5
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a,conv3b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=192,192
+neuron=relu
+initW=0.03,0.03
+initB=1
+partialSum=13
+sharedBiases=1
+gpu=0
+
+[conv4b]
+type=conv
+inputs=conv3b,conv3a
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=192,192
+neuron=relu
+initW=0.03,0.03
+initB=1
+partialSum=13
+sharedBiases=1
+gpu=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-133.cfg b/layers/layers-133.cfg
new file mode 100644
index 0000000..7cd7b3c
--- /dev/null
+++ b/layers/layers-133.cfg
@@ -0,0 +1,323 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a,conv3b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=192,192
+neuron=relu
+initW=0.03,0.03
+initB=1
+partialSum=13
+sharedBiases=1
+gpu=0
+
+[conv4b]
+type=conv
+inputs=conv3b,conv3a
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=192,192
+neuron=relu
+initW=0.03,0.03
+initB=1
+partialSum=13
+sharedBiases=1
+gpu=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-134.cfg b/layers/layers-134.cfg
new file mode 100644
index 0000000..bc802e7
--- /dev/null
+++ b/layers/layers-134.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-135-2009-2012.cfg b/layers/layers-135-2009-2012.cfg
new file mode 100644
index 0000000..8b2a00b
--- /dev/null
+++ b/layers/layers-135-2009-2012.cfg
@@ -0,0 +1,352 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048a)
+initBFunc=initw.makeb(fc2048a)
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048b)
+initBFunc=initw.makeb(fc2048b)
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048ba)
+initBFunc=initw.makeb(fc2048ba)
+
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048bb)
+initBFunc=initw.makeb(fc2048bb)
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-135-2009.cfg b/layers/layers-135-2009.cfg
new file mode 100644
index 0000000..5c9e1b8
--- /dev/null
+++ b/layers/layers-135-2009.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=10184
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-135.cfg b/layers/layers-135.cfg
new file mode 100644
index 0000000..bc802e7
--- /dev/null
+++ b/layers/layers-135.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=cnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=cnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-137-tree.cfg b/layers/layers-137-tree.cfg
new file mode 100644
index 0000000..126ea26
--- /dev/null
+++ b/layers/layers-137-tree.cfg
@@ -0,0 +1,326 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[concat]
+type=concat
+inputs=hs2a,hs2b
+gpu=0
+
+[fc1000]
+type=treefc
+inputs=concat
+initW=0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-137.cfg b/layers/layers-137.cfg
new file mode 100644
index 0000000..b291b0c
--- /dev/null
+++ b/layers/layers-137.cfg
@@ -0,0 +1,322 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-141-2009-2010.cfg b/layers/layers-141-2009-2010.cfg
new file mode 100644
index 0000000..a12b0ca
--- /dev/null
+++ b/layers/layers-141-2009-2010.cfg
@@ -0,0 +1,381 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6a)
+initBFunc=initw.makeb(conv6a)
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6b)
+initBFunc=initw.makeb(conv6b)
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048a)
+initBFunc=initw.makeb(fc2048a)
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048b)
+initBFunc=initw.makeb(fc2048b)
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048ba)
+initBFunc=initw.makeb(fc2048ba)
+
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048bb)
+initBFunc=initw.makeb(fc2048bb)
+
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-141-2009-2012.cfg b/layers/layers-141-2009-2012.cfg
new file mode 100644
index 0000000..a12b0ca
--- /dev/null
+++ b/layers/layers-141-2009-2012.cfg
@@ -0,0 +1,381 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6a)
+initBFunc=initw.makeb(conv6a)
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6b)
+initBFunc=initw.makeb(conv6b)
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048a)
+initBFunc=initw.makeb(fc2048a)
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048b)
+initBFunc=initw.makeb(fc2048b)
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048ba)
+initBFunc=initw.makeb(fc2048ba)
+
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048bb)
+initBFunc=initw.makeb(fc2048bb)
+
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-141-2009-half.cfg b/layers/layers-141-2009-half.cfg
new file mode 100644
index 0000000..33bbb81
--- /dev/null
+++ b/layers/layers-141-2009-half.cfg
@@ -0,0 +1,347 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=10184
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-141-2009.cfg b/layers/layers-141-2009.cfg
new file mode 100644
index 0000000..1521e1a
--- /dev/null
+++ b/layers/layers-141-2009.cfg
@@ -0,0 +1,381 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6a)
+initBFunc=initw.makeb(conv6a)
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6b)
+initBFunc=initw.makeb(conv6b)
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048a)
+initBFunc=initw.makeb(fc2048a)
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048b)
+initBFunc=initw.makeb(fc2048b)
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048ba)
+initBFunc=initw.makeb(fc2048ba)
+
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048bb)
+initBFunc=initw.makeb(fc2048bb)
+
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=10184
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-141.cfg b/layers/layers-141.cfg
new file mode 100644
index 0000000..65794e5
--- /dev/null
+++ b/layers/layers-141.cfg
@@ -0,0 +1,347 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=nails0,pool1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,pool1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-145-half.cfg b/layers/layers-145-half.cfg
new file mode 100644
index 0000000..b196fbf
--- /dev/null
+++ b/layers/layers-145-half.cfg
@@ -0,0 +1,165 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=128
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=256
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=256
+neuron=relu
+
+[fc4096a]
+type=fc
+inputs=pool3a
+outputs=4096
+initW=0.01
+initB=1
+neuron=relu
+gpu=0
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc4096a
+
+[fc4096ba]
+type=fc
+inputs=hs1a
+outputs=4096
+initW=0.01
+initB=1
+neuron=relu
+gpu=0
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc4096ba
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a
+initW=0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-145.cfg b/layers/layers-145.cfg
new file mode 100644
index 0000000..ba578b0
--- /dev/null
+++ b/layers/layers-145.cfg
@@ -0,0 +1,308 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-146-2009-tree.cfg b/layers/layers-146-2009-tree.cfg
new file mode 100644
index 0000000..74778c6
--- /dev/null
+++ b/layers/layers-146-2009-tree.cfg
@@ -0,0 +1,336 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[concat]
+type=concat
+inputs=hs2a,hs2b
+
+[fc1000]
+type=treefc
+inputs=concat
+initW=0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-146-2009.cfg b/layers/layers-146-2009.cfg
new file mode 100644
index 0000000..aa4c1a7
--- /dev/null
+++ b/layers/layers-146-2009.cfg
@@ -0,0 +1,354 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=10184
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-146-2011.cfg b/layers/layers-146-2011.cfg
new file mode 100644
index 0000000..3da84e5
--- /dev/null
+++ b/layers/layers-146-2011.cfg
@@ -0,0 +1,390 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6a)
+initBFunc=initw.makeb(conv6a)
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6b)
+initBFunc=initw.makeb(conv6b)
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1a]
+type=fc
+outputs=1024
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[fc1b]
+type=fc
+outputs=1024
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[fc1000a]
+type=fc
+outputs=10921
+inputs=fc1a,fc1b
+initW=0.01,0.01
+gpu=0
+
+[fc1000b]
+type=fc
+outputs=10922
+inputs=fc1a,fc1b
+initW=0.01,0.01
+gpu=1
+
+[concat]
+type=concat
+inputs=fc1000a,fc1000b
+
+#[fc1000]
+#type=fc
+#outputs=21843
+#inputs=fc1a,fc1b
+#initW=0.01,0.01
+#gpu=0
+
+[probs]
+type=softmax
+inputs=concat
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-146-2012-2009.cfg b/layers/layers-146-2012-2009.cfg
new file mode 100644
index 0000000..f7aae1e
--- /dev/null
+++ b/layers/layers-146-2012-2009.cfg
@@ -0,0 +1,366 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6a)
+initBFunc=initw.makeb(conv6a)
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6b)
+initBFunc=initw.makeb(conv6b)
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048a)
+initBFunc=initw.makeb(fc2048a)
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048b)
+initBFunc=initw.makeb(fc2048b)
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048ba)
+initBFunc=initw.makeb(fc2048ba)
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048bb)
+initBFunc=initw.makeb(fc2048bb)
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-146-2012-2011.cfg b/layers/layers-146-2012-2011.cfg
new file mode 100644
index 0000000..f7aae1e
--- /dev/null
+++ b/layers/layers-146-2012-2011.cfg
@@ -0,0 +1,366 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6a)
+initBFunc=initw.makeb(conv6a)
+
+[conv6b]
+type=conv
+inputs=pool3b,pool3a
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+initWFunc=initw.makew(conv6b)
+initBFunc=initw.makeb(conv6b)
+
+[fc2048a]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048a)
+initBFunc=initw.makeb(fc2048a)
+
+[fc2048b]
+type=fc
+inputs=conv6a,conv6b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048b)
+initBFunc=initw.makeb(fc2048b)
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048ba)
+initBFunc=initw.makeb(fc2048ba)
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048bb)
+initBFunc=initw.makeb(fc2048bb)
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-147.cfg b/layers/layers-147.cfg
new file mode 100644
index 0000000..78955a2
--- /dev/null
+++ b/layers/layers-147.cfg
@@ -0,0 +1,307 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=2
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=2
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=1
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=4
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=1
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=4
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=2
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=2
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=0
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=11
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=0
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=11
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=2
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=2
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-148.cfg b/layers/layers-148.cfg
new file mode 100644
index 0000000..67cc7df
--- /dev/null
+++ b/layers/layers-148.cfg
@@ -0,0 +1,272 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[pool2a]
+type=pool
+pool=max
+inputs=conv2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=conv2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-149.cfg b/layers/layers-149.cfg
new file mode 100644
index 0000000..9fe03a5
--- /dev/null
+++ b/layers/layers-149.cfg
@@ -0,0 +1,321 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[rnorm5a]
+type=cmrnorm
+inputs=conv5a
+channels=128
+size=5
+
+[rnorm5b]
+type=cmrnorm
+inputs=conv5b
+channels=128
+size=5
+
+
+[pool3a]
+type=pool
+pool=max
+inputs=rnorm5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=rnorm5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-150.cfg b/layers/layers-150.cfg
new file mode 100644
index 0000000..ec4d224
--- /dev/null
+++ b/layers/layers-150.cfg
@@ -0,0 +1,308 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=160
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=160
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=160
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=160
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=160
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=160
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=160
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=160
+
+[conv3a]
+type=conv
+inputs=pool2a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=160
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=160
+initW=0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc4096a]
+type=fc
+inputs=pool3a
+outputs=4096
+initW=0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc4096b]
+type=fc
+inputs=pool3b
+outputs=4096
+initW=0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc4096a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc4096b
+
+[fc2048ba]
+type=fc
+inputs=hs1a
+outputs=2048
+initW=0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b
+outputs=2048
+initW=0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-153-4gpu.cfg b/layers/layers-153-4gpu.cfg
new file mode 100644
index 0000000..f824308
--- /dev/null
+++ b/layers/layers-153-4gpu.cfg
@@ -0,0 +1,609 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=1
+
+[conv1c]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=2
+
+[conv1d]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=3
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=32
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=32
+size=5
+
+[rnorm1c]
+type=cmrnorm
+inputs=conv1c
+channels=32
+size=5
+
+[rnorm1d]
+type=cmrnorm
+inputs=conv1d
+channels=32
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1c]
+type=pool
+pool=max
+inputs=rnorm1c
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1d]
+type=pool
+pool=max
+inputs=rnorm1d
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=32
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=32
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2c]
+type=conv
+inputs=pool1c
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=32
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2d]
+type=conv
+inputs=pool1d
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=32
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[rnorm2c]
+type=cmrnorm
+inputs=conv2c
+channels=128
+size=5
+
+[rnorm2d]
+type=cmrnorm
+inputs=conv2d
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[cnorm2c]
+type=rnorm
+inputs=rnorm2c
+channels=128
+size=5
+
+[cnorm2d]
+type=rnorm
+inputs=rnorm2d
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[pool2c]
+type=pool
+pool=max
+inputs=cnorm2c
+sizeX=3
+stride=2
+channels=128
+
+[pool2d]
+type=pool
+pool=max
+inputs=cnorm2d
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b,pool2c
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b,pool2d
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv3c]
+type=conv
+inputs=pool2c,pool2d,pool2a
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv3d]
+type=conv
+inputs=pool2c,pool2d,pool2b
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4c]
+type=conv
+inputs=conv3c
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4d]
+type=conv
+inputs=conv3d
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5c]
+type=conv
+inputs=conv4c
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5d]
+type=conv
+inputs=conv4d
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3c]
+type=pool
+pool=max
+inputs=conv5c
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3d]
+type=pool
+pool=max
+inputs=conv5d
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc1024a]
+type=fc
+inputs=pool3a,pool3b,pool3c
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024b]
+type=fc
+inputs=pool3a,pool3b,pool3d
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024c]
+type=fc
+inputs=pool3c,pool3d,pool3a
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024d]
+type=fc
+inputs=pool3c,pool3d,pool3b
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1024a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1024b
+
+[hs1c]
+type=hs
+keep=0.5
+inputs=fc1024c
+
+[hs1d]
+type=hs
+keep=0.5
+inputs=fc1024d
+
+[fc1024ba]
+type=fc
+inputs=hs1a,hs1b,hs1c
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024bb]
+type=fc
+inputs=hs1b,hs1a,hs1d
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024bc]
+type=fc
+inputs=hs1c,hs1d,hs1a
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024bd]
+type=fc
+inputs=hs1c,hs1d,hs1b
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1024ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1024bb
+
+[hs2c]
+type=hs
+keep=0.5
+inputs=fc1024bc
+
+[hs2d]
+type=hs
+keep=0.5
+inputs=fc1024bd
+
+[fc1000a]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=0
+
+[fc1000b]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=1
+
+[fc1000c]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=2
+
+[fc1000d]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=3
+
+[concat]
+type=concat
+inputs=fc1000a,fc1000b,fc1000c,fc1000d
+
+[probs]
+type=softmax
+inputs=concat
+gpu=0
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-153.cfg b/layers/layers-153.cfg
new file mode 100644
index 0000000..ac407c2
--- /dev/null
+++ b/layers/layers-153.cfg
@@ -0,0 +1,308 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-166.cfg b/layers/layers-166.cfg
new file mode 100644
index 0000000..39c9048
--- /dev/null
+++ b/layers/layers-166.cfg
@@ -0,0 +1,308 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-167.cfg b/layers/layers-167.cfg
new file mode 100644
index 0000000..2b90141
--- /dev/null
+++ b/layers/layers-167.cfg
@@ -0,0 +1,284 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[pool2a]
+type=pool
+pool=max
+inputs=conv2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=conv2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-177.cfg b/layers/layers-177.cfg
new file mode 100644
index 0000000..729572a
--- /dev/null
+++ b/layers/layers-177.cfg
@@ -0,0 +1,317 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[hsconv4a]
+type=hs
+keep=0.8
+inputs=conv4a
+
+[hsconv4b]
+type=hs
+keep=0.8
+inputs=conv4b
+
+[conv5a]
+type=conv
+inputs=hsconv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=hsconv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-178.cfg b/layers/layers-178.cfg
new file mode 100644
index 0000000..cc88d1b
--- /dev/null
+++ b/layers/layers-178.cfg
@@ -0,0 +1,308 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=rand
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=rand
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=0
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-183-4gpu.cfg b/layers/layers-183-4gpu.cfg
new file mode 100644
index 0000000..fe27c2b
--- /dev/null
+++ b/layers/layers-183-4gpu.cfg
@@ -0,0 +1,665 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=1
+
+[conv1c]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=2
+
+[conv1d]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=3
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[rnorm1c]
+type=cmrnorm
+inputs=conv1c
+channels=48
+size=5
+
+[rnorm1d]
+type=cmrnorm
+inputs=conv1d
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1c]
+type=pool
+pool=max
+inputs=rnorm1c
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1d]
+type=pool
+pool=max
+inputs=rnorm1d
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2c]
+type=conv
+inputs=pool1c
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2d]
+type=conv
+inputs=pool1d
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[rnorm2c]
+type=cmrnorm
+inputs=conv2c
+channels=128
+size=5
+
+[rnorm2d]
+type=cmrnorm
+inputs=conv2d
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[cnorm2c]
+type=rnorm
+inputs=rnorm2c
+channels=128
+size=5
+
+[cnorm2d]
+type=rnorm
+inputs=rnorm2d
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[pool2c]
+type=pool
+pool=max
+inputs=cnorm2c
+sizeX=3
+stride=2
+channels=128
+
+[pool2d]
+type=pool
+pool=max
+inputs=cnorm2d
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b,pool2c
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b,pool2d
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv3c]
+type=conv
+inputs=pool2c,pool2d,pool2a
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv3d]
+type=conv
+inputs=pool2c,pool2d,pool2b
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4c]
+type=conv
+inputs=conv3c
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4d]
+type=conv
+inputs=conv3d
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5c]
+type=conv
+inputs=conv4c
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5d]
+type=conv
+inputs=conv4d
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3c]
+type=pool
+pool=max
+inputs=conv5c
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3d]
+type=pool
+pool=max
+inputs=conv5d
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b,pool3c
+filters=128,128,128
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+gpu=0
+
+[conv6b]
+type=conv
+inputs=pool3a,pool3b,pool3d
+filters=128,128,128
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+gpu=1
+
+[conv6c]
+type=conv
+inputs=pool3c,pool3d,pool3a
+filters=128,128,128
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+gpu=2
+
+[conv6d]
+type=conv
+inputs=pool3c,pool3d,pool3b
+filters=128,128,128
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+gpu=3
+
+[fc1024a]
+type=fc
+inputs=conv6a,conv6b,conv6c
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024b]
+type=fc
+inputs=conv6a,conv6b,conv6d
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024c]
+type=fc
+inputs=conv6c,conv6d,conv6a
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024d]
+type=fc
+inputs=conv6c,conv6d,conv6b
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1024a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1024b
+
+[hs1c]
+type=hs
+keep=0.5
+inputs=fc1024c
+
+[hs1d]
+type=hs
+keep=0.5
+inputs=fc1024d
+
+[fc1024ba]
+type=fc
+inputs=hs1a,hs1b,hs1c
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024bb]
+type=fc
+inputs=hs1b,hs1a,hs1d
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024bc]
+type=fc
+inputs=hs1c,hs1d,hs1a
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024bd]
+type=fc
+inputs=hs1c,hs1d,hs1b
+outputs=1024
+initW=0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1024ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1024bb
+
+[hs2c]
+type=hs
+keep=0.5
+inputs=fc1024bc
+
+[hs2d]
+type=hs
+keep=0.5
+inputs=fc1024bd
+
+[fc1000a]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=0
+
+[fc1000b]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=1
+
+[fc1000c]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=2
+
+[fc1000d]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=3
+
+[concat]
+type=concat
+inputs=fc1000a,fc1000b,fc1000c,fc1000d
+
+[probs]
+type=softmax
+inputs=concat
+gpu=0
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-184-4gpu.cfg b/layers/layers-184-4gpu.cfg
new file mode 100644
index 0000000..57d2f2d
--- /dev/null
+++ b/layers/layers-184-4gpu.cfg
@@ -0,0 +1,665 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=1
+
+[conv1c]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=2
+
+[conv1d]
+type=conv
+inputs=data
+channels=3
+filters=48
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=11
+sharedBiases=1
+gpu=3
+
+[rnorm1a]
+type=cmrnorm
+inputs=conv1a
+channels=48
+size=5
+
+[rnorm1b]
+type=cmrnorm
+inputs=conv1b
+channels=48
+size=5
+
+[rnorm1c]
+type=cmrnorm
+inputs=conv1c
+channels=48
+size=5
+
+[rnorm1d]
+type=cmrnorm
+inputs=conv1d
+channels=48
+size=5
+
+[pool1a]
+type=pool
+pool=max
+inputs=rnorm1a
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=rnorm1b
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1c]
+type=pool
+pool=max
+inputs=rnorm1c
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[pool1d]
+type=pool
+pool=max
+inputs=rnorm1d
+sizeX=3
+stride=2
+channels=48
+neuron=relu
+
+[conv2a]
+type=conv
+inputs=pool1a
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2b]
+type=conv
+inputs=pool1b
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2c]
+type=conv
+inputs=pool1c
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[conv2d]
+type=conv
+inputs=pool1d
+filters=128
+padding=2
+stride=1
+filterSize=5
+channels=48
+initW=0.01
+initB=1
+partialSum=9
+sharedBiases=1
+neuron=relu
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=5
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=5
+
+[rnorm2c]
+type=cmrnorm
+inputs=conv2c
+channels=128
+size=5
+
+[rnorm2d]
+type=cmrnorm
+inputs=conv2d
+channels=128
+size=5
+
+[cnorm2a]
+type=rnorm
+inputs=rnorm2a
+channels=128
+size=5
+
+[cnorm2b]
+type=rnorm
+inputs=rnorm2b
+channels=128
+size=5
+
+[cnorm2c]
+type=rnorm
+inputs=rnorm2c
+channels=128
+size=5
+
+[cnorm2d]
+type=rnorm
+inputs=rnorm2d
+channels=128
+size=5
+
+[pool2a]
+type=pool
+pool=max
+inputs=cnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=cnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[pool2c]
+type=pool
+pool=max
+inputs=cnorm2c
+sizeX=3
+stride=2
+channels=128
+
+[pool2d]
+type=pool
+pool=max
+inputs=cnorm2d
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b,pool2c
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b,pool2d
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv3c]
+type=conv
+inputs=pool2c,pool2d,pool2a
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=2
+
+[conv3d]
+type=conv
+inputs=pool2c,pool2d,pool2b
+filters=192,192,192
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=3
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4c]
+type=conv
+inputs=conv3c
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4d]
+type=conv
+inputs=conv3d
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5c]
+type=conv
+inputs=conv4c
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[conv5d]
+type=conv
+inputs=conv4d
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3c]
+type=pool
+pool=max
+inputs=conv5c
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3d]
+type=pool
+pool=max
+inputs=conv5d
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[conv6a]
+type=conv
+inputs=pool3a,pool3b,pool3c
+filters=64,64,64
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+gpu=0
+
+[conv6b]
+type=conv
+inputs=pool3a,pool3b,pool3d
+filters=64,64,64
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+gpu=1
+
+[conv6c]
+type=conv
+inputs=pool3c,pool3d,pool3a
+filters=64,64,64
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+gpu=2
+
+[conv6d]
+type=conv
+inputs=pool3c,pool3d,pool3b
+filters=64,64,64
+padding=1,1,1
+stride=1,1,1
+filterSize=3,3,3
+channels=128,128,128
+initW=0.03,0.03,0.03
+initB=1
+partialSum=4
+neuron=relu
+gpu=3
+
+[fc1024a]
+type=fc
+inputs=conv6a,conv6b,conv6c,conv6d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024b]
+type=fc
+inputs=conv6a,conv6b,conv6c,conv6d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024c]
+type=fc
+inputs=conv6a,conv6b,conv6c,conv6d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024d]
+type=fc
+inputs=conv6a,conv6b,conv6c,conv6d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc1024a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc1024b
+
+[hs1c]
+type=hs
+keep=0.5
+inputs=fc1024c
+
+[hs1d]
+type=hs
+keep=0.5
+inputs=fc1024d
+
+[fc1024ba]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc1024bb]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[fc1024bc]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=2
+
+[fc1024bd]
+type=fc
+inputs=hs1a,hs1b,hs1c,hs1d
+outputs=1024
+initW=0.01,0.01,0.01,0.01
+initB=1
+neuron=relu
+gpu=3
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc1024ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc1024bb
+
+[hs2c]
+type=hs
+keep=0.5
+inputs=fc1024bc
+
+[hs2d]
+type=hs
+keep=0.5
+inputs=fc1024bd
+
+[fc1000a]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=0
+
+[fc1000b]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=1
+
+[fc1000c]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=2
+
+[fc1000d]
+type=fc
+outputs=2546
+inputs=hs2a,hs2b,hs2c,hs2d
+initW=0.01,0.01,0.01,0.01
+gpu=3
+
+[concat]
+type=concat
+inputs=fc1000a,fc1000b,fc1000c,fc1000d
+
+[probs]
+type=softmax
+inputs=concat
+gpu=0
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=0
diff --git a/layers/layers-2009-101.cfg b/layers/layers-2009-101.cfg
new file mode 100644
index 0000000..d31e3ec
--- /dev/null
+++ b/layers/layers-2009-101.cfg
@@ -0,0 +1,310 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc10184]
+type=fc
+outputs=10184
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc10184
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-96-16k.cfg b/layers/layers-96-16k.cfg
new file mode 100644
index 0000000..76c76b2
--- /dev/null
+++ b/layers/layers-96-16k.cfg
@@ -0,0 +1,321 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+groups=1,2
+randSparse=0,1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+groups=1,2
+randSparse=0,1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+groups=2,2
+randSparse=1,1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+groups=2,2
+randSparse=1,1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+groups=2
+randSparse=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+groups=2
+randSparse=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=16649
+inputs=hs2a,hs2b
+initW=0.001,0.001
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-98-16kinit.cfg b/layers/layers-98-16kinit.cfg
new file mode 100644
index 0000000..f792f04
--- /dev/null
+++ b/layers/layers-98-16kinit.cfg
@@ -0,0 +1,357 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+groups=1,2
+randSparse=0,1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+initCFunc=initw.makec(conv2a)
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+groups=1,2
+randSparse=0,1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+initCFunc=initw.makec(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+groups=2,2
+randSparse=1,1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+initCFunc=initw.makec(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+groups=2,2
+randSparse=1,1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+initCFunc=initw.makec(conv3b)
+
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+groups=2
+randSparse=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+initCFunc=initw.makec(conv4a)
+
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+groups=2
+randSparse=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+initCFunc=initw.makec(conv4b)
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048a)
+initBFunc=initw.makeb(fc2048a)
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048b)
+initBFunc=initw.makeb(fc2048b)
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048ba)
+initBFunc=initw.makeb(fc2048ba)
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048bb)
+initBFunc=initw.makeb(fc2048bb)
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-99.cfg b/layers/layers-99.cfg
new file mode 100644
index 0000000..f8e56ef
--- /dev/null
+++ b/layers/layers-99.cfg
@@ -0,0 +1,314 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a,rnorm1b
+filters=128,64,64
+padding=0,2,2
+stride=2,1,1
+filterSize=5,5,5
+channels=3,32,32
+initW=0.01,0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+randSparse=0,1,1
+groups=1,2,2
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1a,rnorm1b
+filters=128,64,64
+padding=0,2,2
+stride=2,1,1
+filterSize=5,5,5
+channels=3,32,32
+initW=0.01,0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+randSparse=0,1,1
+groups=1,2,2
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/layers/layers-flickr-102-inet-init.cfg b/layers/layers-flickr-102-inet-init.cfg
new file mode 100644
index 0000000..87f9854
--- /dev/null
+++ b/layers/layers-flickr-102-inet-init.cfg
@@ -0,0 +1,341 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+initWFunc=initw.makew(conv1a)
+initBFunc=initw.makeb(conv1a)
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+initWFunc=initw.makew(conv1b)
+initBFunc=initw.makeb(conv1b)
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a,rnorm1b
+filters=128,64,64
+padding=0,2,2
+stride=2,1,1
+filterSize=5,5,5
+channels=3,32,32
+initW=0.01,0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+randSparse=0,1,1
+groups=1,2,2
+gpu=0
+initWFunc=initw.makew(conv2a)
+initBFunc=initw.makeb(conv2a)
+initCFunc=initw.makec(conv2a)
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1a,rnorm1b
+filters=128,64,64
+padding=0,2,2
+stride=2,1,1
+filterSize=5,5,5
+channels=3,32,32
+initW=0.01,0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+randSparse=0,1,1
+groups=1,2,2
+gpu=1
+initWFunc=initw.makew(conv2b)
+initBFunc=initw.makeb(conv2b)
+initCFunc=initw.makec(conv2b)
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(conv3a)
+initBFunc=initw.makeb(conv3a)
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(conv3b)
+initBFunc=initw.makeb(conv3b)
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4a)
+initBFunc=initw.makeb(conv4a)
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+initWFunc=initw.makew(conv4b)
+initBFunc=initw.makeb(conv4b)
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5a)
+initBFunc=initw.makeb(conv5a)
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+initWFunc=initw.makew(conv5b)
+initBFunc=initw.makeb(conv5b)
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048a)
+initBFunc=initw.makeb(fc2048a)
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048b)
+initBFunc=initw.makeb(fc2048b)
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+initWFunc=initw.makew(fc2048ba)
+initBFunc=initw.makeb(fc2048ba)
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+initWFunc=initw.makew(fc2048bb)
+initBFunc=initw.makeb(fc2048bb)
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc10003]
+type=fc
+outputs=10003
+inputs=hs2a,hs2b
+initW=0.01,0.01
+initB=-5
+neuron=logistic
+gpu=1
+
+[crossent]
+type=cost.crossent2
+inputs=labels,fc10003
+gpu=1
diff --git a/layers/layers-flickr-102.cfg b/layers/layers-flickr-102.cfg
new file mode 100644
index 0000000..df133d1
--- /dev/null
+++ b/layers/layers-flickr-102.cfg
@@ -0,0 +1,312 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a,rnorm1b
+filters=128,64,64
+padding=0,2,2
+stride=2,1,1
+filterSize=5,5,5
+channels=3,32,32
+initW=0.01,0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+randSparse=0,1,1
+groups=1,2,2
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1a,rnorm1b
+filters=128,64,64
+padding=0,2,2
+stride=2,1,1
+filterSize=5,5,5
+channels=3,32,32
+initW=0.01,0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+randSparse=0,1,1
+groups=1,2,2
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc10003]
+type=fc
+outputs=10003
+inputs=hs2a,hs2b
+initW=0.01,0.01
+initB=-5
+neuron=logistic
+gpu=1
+
+[crossent]
+type=cost.crossent2
+inputs=labels,fc10003
+gpu=1
diff --git a/layers/layers-flickr-103.cfg b/layers/layers-flickr-103.cfg
new file mode 100644
index 0000000..1f7f0ee
--- /dev/null
+++ b/layers/layers-flickr-103.cfg
@@ -0,0 +1,308 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc10003]
+type=fc
+outputs=10003
+inputs=hs2a,hs2b
+initW=0.01,0.01
+initB=-5
+neuron=logistic
+gpu=1
+
+[rcost]
+type=cost.rflickr
+inputs=labels,fc10003
+gpu=1
diff --git a/layers/layers-flickr-105.cfg b/layers/layers-flickr-105.cfg
new file mode 100644
index 0000000..8ddb001
--- /dev/null
+++ b/layers/layers-flickr-105.cfg
@@ -0,0 +1,314 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,128
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=192,192
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=192
+padding=1
+stride=1
+filterSize=3
+channels=192
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=192
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc10003]
+type=fc
+outputs=10003
+inputs=hs2a,hs2b
+initW=0.01,0.01
+initB=-5
+neuron=logistic
+gpu=1
+
+[diff]
+inputs=labels,fc10003
+type=eltsum
+coeffs=1,-1
+gpu=1
+
+[sqdiff]
+type=cost.sum2
+inputs=diff
+gpu=1
diff --git a/layers/layers-inet-5layer-conv94-2gpu.cfg b/layers/layers-inet-5layer-conv94-2gpu.cfg
new file mode 100644
index 0000000..62bad30
--- /dev/null
+++ b/layers/layers-inet-5layer-conv94-2gpu.cfg
@@ -0,0 +1,321 @@
+[data]
+type=data
+dataIdx=0
+
+[labels]
+type=data
+dataIdx=1
+
+[blur0]
+type=blur
+inputs=data
+stdev=4
+filterSize=9
+channels=3
+gpu=0
+
+[nails0]
+type=nailbed
+inputs=blur0
+stride=4
+channels=3
+
+[conv1a]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=0
+
+[conv1b]
+type=conv
+inputs=data
+channels=3
+filters=32
+padding=0
+stride=4
+filterSize=11
+initW=0.01
+partialSum=5
+sharedBiases=1
+gpu=1
+
+[pool1a]
+type=pool
+pool=max
+inputs=conv1a
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[pool1b]
+type=pool
+pool=max
+inputs=conv1b
+sizeX=3
+stride=2
+channels=32
+neuron=relu
+
+[rnorm1a]
+type=cmrnorm
+inputs=pool1a
+channels=32
+size=9
+
+[rnorm1b]
+type=cmrnorm
+inputs=pool1b
+channels=32
+size=9
+
+[conv2a]
+type=conv
+inputs=nails0,rnorm1a
+filters=128,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+groups=1,2
+randSparse=0,1
+neuron=relu
+gpu=0
+
+[conv2b]
+type=conv
+inputs=nails0,rnorm1b
+filters=128,64
+padding=0,2
+stride=2,1
+filterSize=5,5
+channels=3,32
+initW=0.01,0.01
+initB=1
+partialSum=3
+sharedBiases=1
+groups=1,2
+randSparse=0,1
+neuron=relu
+gpu=1
+
+[rnorm2a]
+type=cmrnorm
+inputs=conv2a
+channels=128
+size=9
+
+[rnorm2b]
+type=cmrnorm
+inputs=conv2b
+channels=128
+size=9
+
+[pool2a]
+type=pool
+pool=max
+inputs=rnorm2a
+sizeX=3
+stride=2
+channels=128
+
+[pool2b]
+type=pool
+pool=max
+inputs=rnorm2b
+sizeX=3
+stride=2
+channels=128
+
+[conv3a]
+type=conv
+inputs=pool2a,pool2b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+groups=2,2
+randSparse=1,1
+neuron=relu
+gpu=0
+
+[conv3b]
+type=conv
+inputs=pool2a,pool2b
+filters=128,128
+padding=1,1
+stride=1,1
+filterSize=3,3
+channels=128,128
+initW=0.03,0.03
+partialSum=13
+sharedBiases=1
+groups=2,2
+randSparse=1,1
+neuron=relu
+gpu=1
+
+[conv4a]
+type=conv
+inputs=conv3a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+groups=2
+randSparse=1
+
+[conv4b]
+type=conv
+inputs=conv3b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+neuron=relu
+initW=0.03
+initB=1
+partialSum=13
+sharedBiases=1
+groups=2
+randSparse=1
+
+[conv5a]
+type=conv
+inputs=conv4a
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[conv5b]
+type=conv
+inputs=conv4b
+filters=128
+padding=1
+stride=1
+filterSize=3
+channels=256
+initW=0.03
+initB=1
+partialSum=13
+groups=1
+randSparse=0
+
+[pool3a]
+type=pool
+pool=max
+inputs=conv5a
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[pool3b]
+type=pool
+pool=max
+inputs=conv5b
+sizeX=3
+stride=2
+channels=128
+neuron=relu
+
+[fc2048a]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048b]
+type=fc
+inputs=pool3a,pool3b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs1a]
+type=hs
+keep=0.5
+inputs=fc2048a
+
+[hs1b]
+type=hs
+keep=0.5
+inputs=fc2048b
+
+[fc2048ba]
+type=fc
+inputs=hs1a,hs1b
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=0
+
+[fc2048bb]
+type=fc
+inputs=hs1b,hs1a
+outputs=2048
+initW=0.01,0.01
+initB=1
+neuron=relu
+gpu=1
+
+[hs2a]
+type=hs
+keep=0.5
+inputs=fc2048ba
+
+[hs2b]
+type=hs
+keep=0.5
+inputs=fc2048bb
+
+[fc1000]
+type=fc
+outputs=1000
+inputs=hs2a,hs2b
+initW=0.01,0.01
+gpu=1
+
+[probs]
+type=softmax
+inputs=fc1000
+
+[logprob]
+type=cost.logreg
+inputs=labels,probs
+gpu=1
diff --git a/multisoft-normed.py b/multisoft-normed.py
new file mode 100755
index 0000000..431e1e9
--- /dev/null
+++ b/multisoft-normed.py
@@ -0,0 +1,155 @@
+import sys
+import numpy as np
+import numpy.random as nr
+from math import exp, log
+
+def sumprod_brute(elts, size, fixed = -1):
+    if size > len(elts):
+        return 0
+    if fixed >= 0 and fixed < len(elts):
+        if size == 0:
+            return 0
+        z = 0
+        for s in xrange(size):
+            z += sumprod_brute(elts[:fixed], s) * sumprod_brute(elts[fixed+1:], size - 1 - s)
+        return z * exp(elts[fixed])
+    if size == 0:
+        return 1
+    return exp(elts[0]) * sumprod_brute(elts[1:], size - 1) + sumprod_brute(elts[1:], size)
+
+# Returns sum over all subsets of given size of product
+# of exp of elements.
+# Also returns, for each index, the said sum given that the element
+# at that index is in the subset.
+def sumprod(elts, size, fixed = -1):
+    N = len(elts)
+    B = np.zeros((size + 1, N + 1)) # Backward lattice
+    B[0, N] = 1
+
+    logBNorms = np.zeros(N + 1)
+    # Backward pass
+    for i in xrange(N - 1, -1, -1):
+        B[0, i] = (i >= size and i > fixed) * B[0, i + 1] # This can get quite small
+        for s in xrange(max(1, size - i), size + 1):
+            B[s, i] = B[s - 1, i + 1] * exp(elts[i]) + B[s, i + 1] * (fixed != i)
+        norm = B[:,i].sum()
+        B[:,i] /= norm
+        logBNorms[i] = log(norm) + logBNorms[i + 1]
+
+    F = np.zeros((size + 1,)) # Forward column
+    F[0] = 1
+
+    # Forward pass
+    # Compute y_j for each j (marginal prob)
+    y = np.zeros(N)
+    logFNorm = -logBNorms[0] # Subtract log partition function 
+    for i in xrange(1, N + 1):
+        for s in xrange(size, -1, -1):
+            if s < size:
+                y[i - 1] += F[s] * B[size - 1 - s, i]
+            if s > 0:
+                F[s] = F[s - 1] * exp(elts[i - 1]) + F[s] * (fixed != i - 1)
+            elif fixed == i - 1:
+                F[0] = 0
+        norm = F.sum()
+        F /= norm
+        y[i - 1] *= exp(elts[i - 1] + logBNorms[i] + logFNorm)
+        logFNorm += log(norm)
+    return y
+
+# Computes log(exp(x) + exp(y))
+def logadd(x, y):
+    if x == -np.inf and y == -np.inf:
+        return -np.inf
+    M = max(x,y)
+    m = min(x,y)
+    diff = M - m
+    return M if diff >= 15 else M + log(1 + exp(-diff))
+    
+
+# Returns sum over all subsets of given size of product
+# of exp of elements.
+# Also returns, for each index, the said sum given that the element
+# at that index is in the subset.
+def sumprod_logspace(elts, size, fixed = -1):
+    N = len(elts)
+    logB = -np.inf * np.ones((size + 1, N + 1)) # Backward lattice
+
+    logB[0, :] = 0
+    if fixed >= 0:
+        logB[0, :fixed + 1] = -np.inf
+
+    # Backward pass
+    for i in xrange(N - 1, -1, -1):
+        for s in xrange(max(1, size - i), size + 1):
+            logB[s, i] = logadd(logB[s - 1, i + 1] + elts[i], logB[s, i + 1] if fixed != i else -np.inf)
+
+    logF = -np.inf * np.ones((size + 1,)) # Forward column
+    logF[0] = 0
+
+    # Forward pass
+    # Compute y_j for each j (marginal prob)
+    logy = -np.inf * np.ones(N)
+    logFNorm = -logB[size, 0] # Subtract log partition function 
+    for i in xrange(1, N + 1):
+        for s in xrange(size, -1, -1):
+            if s < size:
+                logy[i - 1] = logadd(logy[i - 1], logF[s] + logB[size - 1 - s, i])
+            if s > 0:
+                logF[s] = logadd(logF[s - 1] + elts[i - 1], logF[s] if fixed != i - 1 else -np.inf)
+            elif fixed == i - 1:
+                logF[0] = -np.inf
+
+        logy[i - 1] += elts[i - 1] + logFNorm
+    return np.exp(logy)
+
+# Checks the gradient with respect to the objective
+# E = log(y_i)
+# where y_i = z_i/Z and i = the index of the correct label
+def check_grad(elts, size, correct=0):
+    eps = 0.01
+    N = len(elts)
+    y = sumprod_logspace(elts, size)
+    Cy = sumprod_logspace(elts, size, fixed=correct)
+    
+    grad = Cy - y
+    print "Analytic gradient: "
+    print grad
+    
+    grad_num = np.zeros_like(grad)
+    for i in xrange(N):
+        tmp = elts[i]
+        elts[i] += eps
+        y_n = sumprod_logspace(elts, size)
+        grad_num[i] = (log(y_n[correct]) - log(y[correct])) / eps
+        elts[i] = tmp
+    print "Numeric gradient: "
+    print grad_num
+    
+if __name__ == "__main__":
+    nr.seed(2)
+    N = 5 # The number of outputs in the softmax
+    size = 2 # The size of the multisoft set
+    fixed = -2 # Force this index to be on (negative = don't)
+    elts = nr.randn(N)
+    elts -= elts.max()
+    elts = np.array([-0.071459650993347, -0.517264485359192, -0.128548145294189, -0.113207340240479 ,0.000000000000000])
+    print elts
+    
+    dp_y = sumprod_logspace(elts, size, fixed=fixed)
+    bf_Z = sumprod_brute(elts, size, fixed=fixed)
+    print "Brute force Z: %f" % bf_Z
+    
+    print "Brute force z/Z:"
+    bf_z = np.zeros(N)
+    for i in xrange(N):
+        for s in xrange(size):
+            bf_z[i] += sumprod_brute(elts[:i], s, fixed=fixed) * sumprod_brute(elts[i+1:], size - 1 - s, fixed=fixed-i-1)
+        bf_z[i] *= exp(elts[i])
+
+    print bf_z / bf_Z
+    
+    print "DP z/Z:"
+    print dp_y
+    
+    check_grad(elts, size, correct=3)
\ No newline at end of file
diff --git a/multisoft.py b/multisoft.py
new file mode 100755
index 0000000..c018af0
--- /dev/null
+++ b/multisoft.py
@@ -0,0 +1,122 @@
+import sys
+import numpy as np
+import numpy.random as nr
+from math import exp, log
+
+def sumprod_brute(elts, size, fixed = -1):
+    if size > len(elts):
+        return 0
+    if fixed >= 0 and fixed < len(elts):
+        if size == 0:
+            return 0
+        z = 0
+        for s in xrange(size):
+            z += sumprod_brute(elts[:fixed], s) * sumprod_brute(elts[fixed+1:], size - 1 - s)
+        return z * exp(elts[fixed])
+    if size == 0:
+        return 1
+    return exp(elts[0]) * sumprod_brute(elts[1:], size - 1) + sumprod_brute(elts[1:], size)
+
+# Returns sum over all subsets of given size of product
+# of exp of elements.
+# Also returns, for each index, the said sum given that the element
+# at that index is in the subset.
+def sumprod(elts, size, fixed = -1):
+    N = len(elts)
+    B = np.zeros((size + 1, N + 1)) # Backward lattice
+    B[0, :] = 1
+    if fixed >= 0:
+        B[0, :fixed+1] = 0
+    logBNorms = np.zeros(N+2)
+    # Backward pass
+    for i in xrange(N - 1, -1, -1):
+        B[0,i] = B[0,i+1]
+        #B[1, i] = exp(LogBNorms[i+2] + elts[i]) + B[1, i + 1] * (fixed != i)
+        for s in xrange(1, size + 1):
+            B[s, i] = B[s - 1, i + 1] * exp(elts[i]) + B[s, i + 1] * (fixed != i)
+        norm = B[:,i].sum()
+        B[:,i] /= norm
+        logBNorms[i] = log(norm) + logBNorms[i+1]
+
+    #print LogBNorms
+    # Log partition function
+    #print B
+    #print B * np.exp(LogBNorms)
+    logZ = log(B[size, 0]) + logBNorms[0]
+    #print logZ; sys.exit()
+    F = np.zeros((size + 1,)) # Forward column
+    F[0] = 1
+    # Forward pass
+    # Compute z_j for each j (unnormalized prob)
+    z = np.zeros(N)
+    logFNorm = 0
+    for i in xrange(1, N + 1):
+        for s in xrange(size, -1, -1):
+            if s < size:
+                z[i - 1] += F[s] * B[size - 1 - s, i]
+            if s > 0:
+                F[s] = F[s - 1] * exp(elts[i - 1]) + F[s] * (fixed != i - 1)
+            elif fixed == i - 1:
+                F[0] = 0
+        norm = F.sum()
+        F /= norm
+        z[i - 1] *= exp(elts[i - 1] + logBNorms[i] + logFNorm - logZ)
+        logFNorm += log(norm)
+    return z, 1
+
+# Checks the gradient with respect to the objective
+# E = log(y_i)
+# where y_i = z_i/Z and i = the index of the correct label
+def check_grad(elts, size, correct=0):
+    eps = 0.01
+    N = len(elts)
+    z, Z = sumprod(elts, size)
+    cz, CZ = sumprod(elts, size, fixed=correct)
+    
+    y = z / Z
+    Cy = cz / CZ
+    grad = Cy - y
+    print "Analytic gradient: "
+    print grad
+    
+    grad_num = np.zeros_like(grad)
+    for i in xrange(N):
+        tmp = elts[i]
+        elts[i] += eps
+        z, Z = sumprod(elts, size)
+        y_n = z / Z
+        grad_num[i] = (log(y_n[correct]) - log(y[correct])) / eps
+        elts[i] = tmp
+    print "Numeric gradient: "
+    print grad_num
+    
+if __name__ == "__main__":
+    nr.seed(2)
+    N = 5 # The number of outputs in the softmax
+    size = 2 # The size of the multisoft set
+    fixed = 2 # Force this index to be on (negative = don't)
+    elts = nr.randn(N)
+    elts -= elts.max()
+    print elts
+    
+    dp_z, dp_Z = sumprod(elts, size, fixed=fixed)
+    bf_Z = sumprod_brute(elts, size, fixed=fixed)
+    print "Brute force Z: %f" % bf_Z
+    print "DP Z: %f" % dp_Z
+    
+    print "Brute force z/Z:"
+    bf_z = np.zeros(N)
+    for i in xrange(N):
+        for s in xrange(size):
+            bf_z[i] += sumprod_brute(elts[:i], s, fixed=fixed) * sumprod_brute(elts[i+1:], size - 1 - s, fixed=fixed-i-1)
+        bf_z[i] *= exp(elts[i])
+
+    print bf_z / bf_Z
+    
+    print "DP z/Z:"
+    print dp_z / dp_Z
+    
+    check_grad(elts, size, correct=1)
+    
+    
+    
diff --git a/package.sh b/package.sh
new file mode 100755
index 0000000..f1a8660
--- /dev/null
+++ b/package.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+DEST=./cuda-convnet/trunk
+PYTHON_MODULES=/home/spoon/dev/python_modules
+
+mkdir -p $DEST/src/common
+mkdir -p $DEST/src/cudaconv2
+mkdir -p $DEST/src/nvmatrix
+mkdir -p $DEST/include/common
+mkdir -p $DEST/include/cudaconv2
+mkdir -p $DEST/include/nvmatrix
+mkdir -p $DEST/example-layers
+
+cp src/*.cu $DEST/src
+cp include/*.cuh $DEST/include
+
+cp ABOUT convdata.py convnet.py layer.py shownet.py $DEST/
+
+cp $NVMATRIX_INCLUDE/*.cuh $DEST/include/nvmatrix
+cp $NVMATRIX_INCLUDE/../src/nvmatrix*.cu $DEST/src/nvmatrix
+cp $PYTHON_MODULES/util.py $PYTHON_MODULES/options.py $PYTHON_MODULES/ordereddict.py $PYTHON_MODULES/gpumodel.py $PYTHON_MODULES/data.py $DEST/
+cp $MYCPP_LIBS_INCLUDE/matrix.h $MYCPP_LIBS_INCLUDE/matrix_funcs.h $MYCPP_LIBS_INCLUDE/queue.h $MYCPP_LIBS_INCLUDE/thread.h $DEST/include/common
+cp $MYCPP_LIBS_INCLUDE/matrix.cpp $DEST/src/common
+cp $NVCONV2_INCLUDE/conv_util.cuh $NVCONV2_INCLUDE/cudaconv2.cuh $DEST/include/cudaconv2
+cp $NVCONV2_INCLUDE/../src/conv_util.cu $NVCONV2_INCLUDE/../src/filter_acts.cu $NVCONV2_INCLUDE/../src/img_acts.cu $NVCONV2_INCLUDE/../src/weight_acts.cu $DEST/src/cudaconv2
+
+cp ./example-layers/*.cfg $DEST/example-layers
+cp common-gcc-cuda-4.0.mk build.sh readme.html $DEST
+cp Makefile-distrib $DEST/Makefile
+
diff --git a/pyInterface.cutemp b/pyInterface.cutemp
new file mode 100755
index 0000000..f63ad18
--- /dev/null
+++ b/pyInterface.cutemp
@@ -0,0 +1,196 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <Python.h>
+#include <arrayobject.h>
+#include <assert.h>
+#include <cutil_inline.h>
+#include <cublas.h>
+#include <time.h>
+#include <vector>
+
+#include <matrix.h>
+#include <queue.h>
+#include <worker.cuh>
+#include <util.cuh>
+#include <cost.cuh>
+
+#include <py${MODEL_NAME_LOWER}.cuh>
+#include <${MODEL_NAME_LOWER}.cuh>
+
+using namespace std;
+static ${MODEL_NAME}* model = NULL;
+
+static PyMethodDef _${MODEL_NAME}Methods[] = {  { "initModel",          initModel,          METH_VARARGS },
+                                              { "startBatch",         startBatch,         METH_VARARGS },
+                                              { "finishBatch",        finishBatch,        METH_VARARGS },
+                                              { "checkGradients",     checkGradients,     METH_VARARGS },
+                                              { "startMultiviewTest", startMultiviewTest, METH_VARARGS },
+                                              { "startFeatureWriter",  startFeatureWriter,         METH_VARARGS },
+                                              { "syncWithHost",       syncWithHost,       METH_VARARGS },
+                                              { NULL, NULL }
+};
+
+#if defined(_WIN64) || defined(_WIN32)
+extern "C" __declspec(dllexport) void initpyconvnet() {
+    (void) Py_InitModule("pyconvnet", _ConvNetMethods);
+    import_array();
+}
+#else
+void INITNAME() {
+    (void) Py_InitModule(QUOTEME(MODELNAME), _ConvNetMethods);
+    import_array();
+}
+#endif
+
+PyObject* initModel(PyObject *self, PyObject *args) {
+    assert(model == NULL);
+
+${INIT_VARS}
+    if (!PyArg_ParseTuple(args, "${ARG_STRING}",
+${INIT_PARSE})) {
+        return NULL;
+    }
+${MODEL_START}
+    model->start();
+    return Py_BuildValue("i", 0);
+}
+
+/*
+ * Starts training/testing on the given batch (asynchronous -- returns immediately).
+ */
+PyObject* startBatch(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    PyListObject* data;
+    int test = 0;
+    if (!PyArg_ParseTuple(args, "O!|i",
+        &PyList_Type, &data,
+        &test)) {
+        return NULL;
+    }
+    MatrixV& mvec = *getMatrixV((PyObject*)data);
+    
+    TrainingWorker* wr = new TrainingWorker(*model, *new CPUData(mvec), test);
+    model->getWorkerQueue().enqueue(wr);
+    return Py_BuildValue("i", 0);
+}
+
+/*
+ * Starts testing on the given batch (asynchronous -- returns immediately).
+ */
+PyObject* startMultiviewTest(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    PyListObject* data;
+    int numViews, logregIdx;
+    if (!PyArg_ParseTuple(args, "O!ii",
+        &PyList_Type, &data,
+        &numViews,
+        &logregIdx)) {
+        return NULL;
+    }
+    MatrixV& mvec = *getMatrixV((PyObject*)data);
+    
+    MultiviewTestWorker* wr = new MultiviewTestWorker(*model, *new CPUData(mvec), numViews, logregIdx);
+    model->getWorkerQueue().enqueue(wr);
+    return Py_BuildValue("i", 0);
+}
+
+PyObject* startFeatureWriter(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    PyListObject* data;
+    int layerIdx;
+    if (!PyArg_ParseTuple(args, "O!i",
+        &PyList_Type, &data,
+        &layerIdx)) {
+        return NULL;
+    }
+    MatrixV& mvec = *getMatrixV((PyObject*)data);
+    Matrix& ftrs = *mvec.back();
+    mvec.pop_back();
+    
+    FeatureWorker* wr = new FeatureWorker(*model, *new CPUData(mvec), ftrs, layerIdx);
+    model->getWorkerQueue().enqueue(wr);
+    return Py_BuildValue("i", 0);
+}
+
+/*
+ * Waits for the trainer to finish training on the batch given to startBatch.
+ */
+PyObject* finishBatch(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    WorkResult* res = model->getResultQueue().dequeue();
+    assert(res != NULL);
+    assert(res->getResultType() == WorkResult::BATCH_DONE);
+    
+    Cost& cost = res->getResults();
+    PyObject* dict = PyDict_New();
+    CostMap& costMap = cost.getCostMap();
+    for (CostMap::const_iterator it = costMap.begin(); it != costMap.end(); ++it) {
+        PyObject* v = PyList_New(0);
+        for (vector<double>::const_iterator iv = it->second->begin(); iv != it->second->end(); ++iv) {
+            PyObject* f = PyFloat_FromDouble(*iv);
+            PyList_Append(v, f);
+        }
+        PyDict_SetItemString(dict, it->first.c_str(), v);
+    }
+    
+    PyObject* retVal = Py_BuildValue("Ni", dict, cost.getNumCases());
+    delete res; // Deletes cost too
+    return retVal;
+}
+
+PyObject* checkGradients(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    PyListObject* data;
+    if (!PyArg_ParseTuple(args, "O!",
+        &PyList_Type, &data)) {
+        return NULL;
+    }
+    MatrixV& mvec = *getMatrixV((PyObject*)data);
+    
+    GradCheckWorker* wr = new GradCheckWorker(*model, *new CPUData(mvec));
+    model->getWorkerQueue().enqueue(wr);
+    WorkResult* res = model->getResultQueue().dequeue();
+    assert(res != NULL);
+    assert(res->getResultType() == WorkResult::BATCH_DONE);
+    delete res;
+    return Py_BuildValue("i", 0);
+}
+
+/*
+ * Copies weight matrices from GPU to system memory.
+ */
+PyObject* syncWithHost(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    SyncWorker* wr = new SyncWorker(*model);
+    model->getWorkerQueue().enqueue(wr);
+    WorkResult* res = model->getResultQueue().dequeue();
+    assert(res != NULL);
+    assert(res->getResultType() == WorkResult::SYNC_DONE);
+    
+    delete res;
+    return Py_BuildValue("i", 0);
+}
diff --git a/readme.html b/readme.html
new file mode 100644
index 0000000..3c9d342
--- /dev/null
+++ b/readme.html
@@ -0,0 +1 @@
+<meta HTTP-EQUIV="REFRESH" content="0; url=http://code.google.com/p/cuda-convnet/">
diff --git a/run4.sh b/run4.sh
new file mode 100755
index 0000000..29e781b
--- /dev/null
+++ b/run4.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --epochs=22 >> logs/layers-120-4gpu.log
+python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto2.cfg --epochs=49 >> logs/layers-120-4gpu.log
+python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto3.cfg --epochs=66 >> logs/layers-120-4gpu.log
+python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto4.cfg --color-noise=0 --epochs=73 >> logs/layers-120-4gpu.log
+python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto5.cfg --epochs=81 >> logs/layers-120-4gpu.log
+python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto6.cfg --epochs=95 >> logs/layers-120-4gpu.log
diff --git a/shownet.py b/shownet.py
new file mode 100755
index 0000000..1019bb6
--- /dev/null
+++ b/shownet.py
@@ -0,0 +1,575 @@
+
+import numpy
+import sys
+import getopt as opt
+from util import *
+from math import sqrt, ceil, floor
+import os
+from gpumodel import IGPUModel
+import random as r
+import numpy.random as nr
+from convnet import ConvNet
+from options import *
+#import pygame as pg
+import Image
+#from pygame.locals import *
+from time import sleep
+#from collections import Counter
+import gc
+
+#import cv
+#this is important for capturing/displaying images
+#from opencv import highgui as hg
+
+try:
+    import pylab as pl
+except:
+    print "This script requires the matplotlib python library (Ubuntu/Fedora package name python-matplotlib). Please install it."
+    sys.exit(1)
+
+class ShowNetError(Exception):
+    pass
+
+class ShowConvNet(ConvNet):
+    def __init__(self, op, load_dic):
+        ConvNet.__init__(self, op, load_dic)
+    
+    def get_gpus(self):
+        self.need_gpu = self.op.get_value('show_preds') or self.op.get_value('write_features') \
+                        or self.op.get_value('show_data_grad') or self.op.get_value('webcam') or self.op.get_value('top5') \
+                        or self.op.get_value('show_maps')
+        if self.need_gpu:
+            ConvNet.get_gpus(self)
+            
+    def init_data_providers(self):
+        class Dummy:
+            def advance_batch(self):
+                pass
+        if self.need_gpu:
+            ConvNet.init_data_providers(self)
+        else:
+            self.train_data_provider = self.test_data_provider = Dummy()
+    
+    def import_model(self):
+        if self.need_gpu:
+            ConvNet.import_model(self)
+            
+    def init_model_state(self):
+        #ConvNet.init_model_state(self)
+        if self.op.get_value('show_maps'):
+            self.map_layer_idx = self.get_layer_idx(self.op.get_value('show_maps'))
+        if self.op.get_value('show_preds') or self.op.get_value('webcam'):
+            self.softmax_name = self.op.get_value('show_preds') or self.op.get_value('webcam')
+        if self.op.get_value('write_features'):
+            self.ftr_layer_name = self.op.get_value('write_features')
+        if self.op.get_value('top5'):
+            self.ftr_layer_idx = self.get_layer_idx(self.op.get_value('top5'))
+        if self.op.get_value('show_data_grad'):
+            self.data_layer_idx = self.get_layer_idx('data')
+            self.softmax_idx = self.get_layer_idx('probs')
+            for l in self.model_state['layers']:
+                if l['name'] != 'labels':
+                    l['actsGradTarget'] = -1
+                    l['gradConsumer'] = True
+                if l['name'] != 'data':
+                    l['conserveMem'] = True
+            
+    def init_model_lib(self):
+        if self.need_gpu:
+            ConvNet.init_model_lib(self)
+
+    def plot_cost(self):
+        if self.show_cost not in self.train_outputs[0][0]:
+            raise ShowNetError("Cost function with name '%s' not defined by given convnet." % self.show_cost)
+        train_errors = [o[0][self.show_cost][self.cost_idx] for o in self.train_outputs]
+        test_errors = [o[0][self.show_cost][self.cost_idx] for o in self.test_outputs]
+
+        numbatches = len(self.train_batch_range)
+        test_errors = numpy.row_stack(test_errors)
+        test_errors = numpy.tile(test_errors, (1, self.testing_freq))
+        test_errors = list(test_errors.flatten())
+        test_errors += [test_errors[-1]] * max(0,len(train_errors) - len(test_errors))
+        test_errors = test_errors[:len(train_errors)]
+
+        numepochs = len(train_errors) / float(numbatches)
+        pl.figure(1)
+        x = range(0, len(train_errors))
+        pl.plot(x, train_errors, 'k-', label='Training set')
+        pl.plot(x, test_errors, 'r-', label='Test set')
+        pl.legend()
+        ticklocs = range(numbatches, len(train_errors) - len(train_errors) % numbatches + 1, numbatches)
+        epoch_label_gran = int(ceil(numepochs / 20.)) # aim for about 20 labels
+        epoch_label_gran = int(ceil(float(epoch_label_gran) / 10) * 10) # but round to nearest 10
+        ticklabels = map(lambda x: str((x[1] / numbatches)) if x[0] % epoch_label_gran == epoch_label_gran-1 else '', enumerate(ticklocs))
+
+        pl.xticks(ticklocs, ticklabels)
+        pl.xlabel('Epoch')
+#        pl.ylabel(self.show_cost)
+        pl.title('%s[%d]' % (self.show_cost, self.cost_idx))
+        
+    def make_filter_fig(self, filters, filter_start, fignum, _title, num_filters, combine_chans, FILTERS_PER_ROW=16):
+        MAX_ROWS = 24
+        MAX_FILTERS = FILTERS_PER_ROW * MAX_ROWS
+        num_colors = filters.shape[0]
+        f_per_row = int(ceil(FILTERS_PER_ROW / float(1 if combine_chans else num_colors)))
+        filter_end = min(filter_start+MAX_FILTERS, num_filters)
+        filter_rows = int(ceil(float(filter_end - filter_start) / f_per_row))
+    
+        filter_pixels = filters.shape[1]
+        filter_size = int(sqrt(filters.shape[1]))
+        fig = pl.figure(fignum)
+        fig.text(.5, .95, '%s %dx%d filters %d-%d' % (_title, filter_size, filter_size, filter_start, filter_end-1), horizontalalignment='center') 
+        num_filters = filter_end - filter_start
+        if not combine_chans:
+            bigpic = n.zeros((filter_size * filter_rows + filter_rows + 1, filter_size*num_colors * f_per_row + f_per_row + 1), dtype=n.single)
+        else:
+            bigpic = n.zeros((3, filter_size * filter_rows + filter_rows + 1, filter_size * f_per_row + f_per_row + 1), dtype=n.single)
+    
+        for m in xrange(filter_start,filter_end ):
+            filter = filters[:,:,m]
+            y, x = (m - filter_start) / f_per_row, (m - filter_start) % f_per_row
+            if not combine_chans:
+                for c in xrange(num_colors):
+                    filter_pic = filter[c,:].reshape((filter_size,filter_size))
+                    bigpic[1 + (1 + filter_size) * y:1 + (1 + filter_size) * y + filter_size,
+                           1 + (1 + filter_size*num_colors) * x + filter_size*c:1 + (1 + filter_size*num_colors) * x + filter_size*(c+1)] = filter_pic
+            else:
+                filter_pic = filter.reshape((3, filter_size,filter_size))
+                bigpic[:,
+                       1 + (1 + filter_size) * y:1 + (1 + filter_size) * y + filter_size,
+                       1 + (1 + filter_size) * x:1 + (1 + filter_size) * x + filter_size] = filter_pic
+                
+        pl.xticks([])
+        pl.yticks([])
+        if not combine_chans:
+            pl.imshow(bigpic, cmap=pl.cm.gray, interpolation='nearest')
+        else:
+            bigpic = bigpic.swapaxes(0,2).swapaxes(0,1)
+            pl.imshow(bigpic, interpolation='nearest')        
+        
+    def plot_filters(self):
+        FILTERS_PER_ROW = 16
+        filter_start = 0 # First filter to show
+        if self.show_filters not in self.layers:
+            raise ShowNetError("Layer with name '%s' not defined by given convnet." % self.show_filters)
+        layer = self.layers[self.show_filters]
+        filters = layer['weights'][self.input_idx]
+#        filters = filters - filters.min()
+#        filters = filters / filters.max()
+        if layer['type'] == 'fc': # Fully-connected layer
+            num_filters = layer['outputs']
+            channels = self.channels
+            filters = filters.reshape(channels, filters.shape[0]/channels, filters.shape[1])
+        elif layer['type'] in ('conv', 'local'): # Conv layer
+            num_filters = layer['filters']
+            channels = layer['filterChannels'][self.input_idx]
+            if layer['type'] == 'local':
+                filters = filters.reshape((layer['modules'], channels, layer['filterPixels'][self.input_idx], num_filters))
+                filters = filters[:, :, :, self.local_plane] # first map for now (modules, channels, pixels)
+                filters = filters.swapaxes(0,2).swapaxes(0,1)
+                num_filters = layer['modules']
+#                filters = filters.swapaxes(0,1).reshape(channels * layer['filterPixels'][self.input_idx], num_filters * layer['modules'])
+#                num_filters *= layer['modules']
+                FILTERS_PER_ROW = layer['modulesX']
+            else:
+                filters = filters.reshape(channels, filters.shape[0]/channels, filters.shape[1])
+        
+        
+        # Convert YUV filters to RGB
+        if self.yuv_to_rgb and channels == 3:
+            R = filters[0,:,:] + 1.28033 * filters[2,:,:]
+            G = filters[0,:,:] + -0.21482 * filters[1,:,:] + -0.38059 * filters[2,:,:]
+            B = filters[0,:,:] + 2.12798 * filters[1,:,:]
+            filters[0,:,:], filters[1,:,:], filters[2,:,:] = R, G, B
+        combine_chans = not self.no_rgb and channels == 3
+        
+        # Make sure you don't modify the backing array itself here -- so no -= or /=
+        if self.norm_filters:
+            #print filters.shape
+            filters = filters - n.tile(filters.reshape((filters.shape[0] * filters.shape[1], filters.shape[2])).mean(axis=0).reshape(1, 1, filters.shape[2]), (3, filters.shape[1], 1))
+            filters = filters / n.sqrt(n.tile(filters.reshape((filters.shape[0] * filters.shape[1], filters.shape[2])).var(axis=0).reshape(1, 1, filters.shape[2]), (3, filters.shape[1], 1)))
+            #filters = filters - n.tile(filters.min(axis=0).min(axis=0), (3, filters.shape[1], 1))
+            #filters = filters / n.tile(filters.max(axis=0).max(axis=0), (3, filters.shape[1], 1))
+        #else:
+        filters = filters - filters.min()
+        filters = filters / filters.max()
+
+        self.make_filter_fig(filters, filter_start, 2, 'Layer %s' % self.show_filters, num_filters, combine_chans, FILTERS_PER_ROW=FILTERS_PER_ROW)
+    
+    def plot_predictions(self):
+        data = self.get_next_batch(train=False)[2] # get a test batch
+        num_classes = self.test_data_provider.get_num_classes()
+        NUM_ROWS = 2
+        NUM_COLS = 4
+        NUM_IMGS = NUM_ROWS * NUM_COLS
+        NUM_TOP_CLASSES = min(num_classes, 5) # show this many top labels
+        
+        label_names = [lab.split(',')[0] for lab in self.test_data_provider.batch_meta['label_names']]
+        if self.only_errors:
+            preds = n.zeros((data[0].shape[1], num_classes), dtype=n.single)
+        else:
+            preds = n.zeros((NUM_IMGS, num_classes), dtype=n.single)
+            #rand_idx = nr.permutation(n.r_[n.arange(1), n.where(data[1] == 552)[1], n.where(data[1] == 795)[1], n.where(data[1] == 449)[1], n.where(data[1] == 274)[1]])[:NUM_IMGS]
+            rand_idx = nr.randint(0, data[0].shape[1], NUM_IMGS)
+            data[0] = n.require(data[0][:,rand_idx], requirements='C')
+            data[1] = n.require(data[1][:,rand_idx], requirements='C')
+#        data += [preds]
+        # Run the model
+        self.libmodel.startFeatureWriter(data, [preds], [self.softmax_name])
+        self.finish_batch()
+        
+        fig = pl.figure(3, figsize=(12,9))
+        fig.text(.4, .95, '%s test samples' % ('Mistaken' if self.only_errors else 'Random'))
+        if self.only_errors:
+            # what the net got wrong
+            err_idx = [i for i,p in enumerate(preds.argmax(axis=1)) if p not in n.where(data[1][:,i] > 0)[0]]
+            err_idx = r.sample(err_idx, min(len(err_idx), NUM_IMGS))
+            data[0], data[1], preds = data[0][:,err_idx], data[1][:,err_idx], preds[err_idx,:]
+            
+        data[0] = self.test_data_provider.get_plottable_data(data[0])
+        import matplotlib.gridspec as gridspec
+        import matplotlib.colors as colors
+        cconv = colors.ColorConverter()
+        gs = gridspec.GridSpec(NUM_ROWS*2, NUM_COLS,
+                               width_ratios=[1]*NUM_COLS, height_ratios=[2,1]*NUM_ROWS )
+        #print data[1]
+        for row in xrange(NUM_ROWS):
+            for col in xrange(NUM_COLS):
+                img_idx = row * NUM_COLS + col
+                if data[0].shape[0] <= img_idx:
+                    break
+                pl.subplot(gs[(row * 2) * NUM_COLS + col])
+                #pl.subplot(NUM_ROWS*2, NUM_COLS, row * 2 * NUM_COLS + col + 1)
+                pl.xticks([])
+                pl.yticks([])
+                img = data[0][img_idx,:,:,:]
+                pl.imshow(img, interpolation='lanczos')
+                show_title = data[1].shape[0] == 1
+                true_label = [int(data[1][0,img_idx])] if show_title else n.where(data[1][:,img_idx]==1)[0]
+                #print true_label
+                #print preds[img_idx,:].shape
+                #print preds[img_idx,:].max()
+                true_label_names = [label_names[i] for i in true_label]
+                img_labels = sorted(zip(preds[img_idx,:], label_names), key=lambda x: x[0])[-NUM_TOP_CLASSES:]
+                #print img_labels
+                axes = pl.subplot(gs[(row * 2 + 1) * NUM_COLS + col])
+                height = 0.5
+                ylocs = n.array(range(NUM_TOP_CLASSES))*height
+                pl.barh(ylocs, [l[0] for l in img_labels], height=height, \
+                        color=['#ffaaaa' if l[1] in true_label_names else '#aaaaff' for l in img_labels])
+                #pl.title(", ".join(true_labels))
+                if show_title:
+                    pl.title(", ".join(true_label_names), fontsize=15, fontweight='bold')
+                else:
+                    print true_label_names
+                pl.yticks(ylocs + height/2, [l[1] for l in img_labels], x=1, backgroundcolor=cconv.to_rgba('0.65', alpha=0.5), weight='bold')
+                for line in enumerate(axes.get_yticklines()): 
+                    line[1].set_visible(False) 
+                #pl.xticks([width], [''])
+                #pl.yticks([])
+                pl.xticks([])
+                pl.ylim(0, ylocs[-1] + height)
+                pl.xlim(0, 1)
+    
+    def do_write_features(self):
+        if not os.path.exists(self.feature_path):
+            os.makedirs(self.feature_path)
+        next_data = self.get_next_batch(train=False)
+        b1 = next_data[1]
+        num_ftrs = self.layers[self.ftr_layer_name]['outputs']
+        
+#        def showimg(img):
+#            pixels = img.shape[0] / 3
+#            size = int(sqrt(pixels))
+#            img = img.reshape((3,size,size)).swapaxes(0,2).swapaxes(0,1)
+#            pl.imshow(img, interpolation='nearest')
+#            pl.show()
+        while True:
+            batch = next_data[1]
+            data = next_data[2]
+            ftrs = n.zeros((data[0].shape[1], num_ftrs), dtype=n.single)
+            self.libmodel.startFeatureWriter(data, [ftrs], [self.ftr_layer_name])
+            
+            # load the next batch while the current one is computing
+            next_data = self.get_next_batch(train=False)
+            self.finish_batch()
+            path_out = os.path.join(self.feature_path, 'data_batch_%d' % batch)
+#            print ftrs
+#            ftrs += self.train_data_provider.batch_meta['data_mean'].mean()
+#            ftrs /= 255
+#            showimg(ftrs[1,:]); sys.exit(0)
+
+            pickle(path_out, {'data': ftrs, 'labels': data[1]})
+            print "Wrote feature file %s" % path_out
+            if next_data[1] == b1:
+                break
+        pickle(os.path.join(self.feature_path, 'batches.meta'), {'source_model':self.load_file,
+                                                                 'num_vis':num_ftrs})
+        
+    def do_top5(self):
+        num_classes = self.test_data_provider.get_num_classes()
+        nv = self.train_data_provider.num_views
+
+        next_data = self.get_next_batch(train=False)
+        batch = next_data[1]
+        data = next_data[2]
+        print data[0].shape
+        num_cases = data[0].shape[1] / nv
+        print "num cases: %d" % num_cases
+        ftrs = [n.zeros((num_cases, num_classes), dtype=n.single) for i in xrange(2)]
+        for v in xrange(self.train_data_provider.num_views):
+            vdata = [d[:,v*num_cases:(v+1)*num_cases] for d in data] + [ftrs[1]]
+            print [d.shape for d in vdata]
+            self.libmodel.startFeatureWriter(vdata, self.ftr_layer_idx)
+            self.finish_batch()
+            ftrs[0] += ftrs[1]
+        ftrs = ftrs[0]
+        print ftrs.max()
+        print "Batch %d top5 error: i dunno" % batch
+        print ftrs
+        labels = data[1][:,:num_cases].astype(n.int32)
+        print labels, labels.shape
+        v = 0
+        for m in xrange(5):
+            maxlocs = ftrs.argmax(axis=1)
+            v += (maxlocs == labels).sum()
+            ftrs[n.arange(ftrs.shape[0]),maxlocs] = 0
+            print v
+            
+    # NOTE: THIS ROUTINE APPLIES RELU NONLINAERITY TO MAPS
+    # Change this if you're not actually using relu units
+    def do_showmaps(self):
+        NUM_MAPS = 16
+        NUM_IMGS = 12
+        nr.seed(87213)
+        data = self.get_next_batch(train=False)[2]
+        rand_idx = nr.randint(0, data[0].shape[1], NUM_IMGS)
+        data[0] = n.require(data[0][:,rand_idx], requirements='C')
+        data[1] = n.require(data[1][:,rand_idx], requirements='C')
+        cases = data[0].shape[1]
+        ldic = dict([(l['name'], l) for l in self.layers])
+        print ldic.keys()
+        num_ftrs = self.layers[self.map_layer_idx]['outputs']
+        map_size = self.layers[self.map_layer_idx]['modulesX'] if 'modulesX' in self.layers[self.map_layer_idx] else self.layers[self.map_layer_idx]['outputsX']
+        num_maps = num_ftrs / map_size**2
+        ftrs = n.zeros((data[0].shape[1], num_ftrs), dtype=n.single)
+        
+        self.libmodel.startFeatureWriter(data + [ftrs], self.map_layer_idx)
+        self.finish_batch()
+        
+        fig = pl.figure(5)
+        fig.text(.4, .95, 'Layer %s feature maps' % self.show_maps)
+
+        data[0] = self.test_data_provider.get_plottable_data(data[0])
+        # This map will have size (cases, num_maps, map_size, map_size)
+        print ftrs.shape
+        ftrs = ftrs.reshape(cases, num_maps, map_size, map_size)
+        print ftrs.min(), ftrs.max()
+        print ftrs.shape
+        ftrs[ftrs<0] = 0
+        ftrs -= ftrs.min()
+        ftrs /= ftrs.max()
+        rand_idx = nr.permutation(range(NUM_MAPS))[:ftrs.shape[1]]
+        ftrs = ftrs[:,rand_idx,:,:]
+#        ftrs = self.test_data_provider.get_plottable_data(ftrs.T, add_mean=False)
+
+        for i in xrange(NUM_IMGS):
+            pl.subplot(NUM_IMGS, NUM_MAPS + 1, i * (NUM_MAPS + 1) + 1)
+            
+            pl.xticks([])
+            pl.yticks([])
+            img = data[0][i,:,:,:]
+            pl.imshow(img, interpolation='lanczos')
+#            return
+            for m in xrange(NUM_MAPS):
+                pl.subplot(NUM_IMGS, NUM_MAPS + 1, i * (NUM_MAPS + 1) + m + 2)
+                pl.xticks([])
+                pl.yticks([])
+                img = ftrs[i,m, :,:]
+                pl.imshow(img, cmap=pl.cm.gray, interpolation='nearest')
+        
+    def do_show_data_grad(self):
+        NUM_ROWS = 2
+        NUM_COLS = 4
+        NUM_IMGS = NUM_ROWS * NUM_COLS
+        
+        data = self.get_next_batch(train=False)[2]
+        rand_idx = nr.randint(0, data[0].shape[1], NUM_IMGS)
+        data[0] = n.require(data[0][:,rand_idx], requirements='C')
+        data[1] = n.require(data[1][:,rand_idx], requirements='C')
+        
+        label_names = [lab.split(',')[0] for lab in self.test_data_provider.batch_meta['label_names']]
+        data_dim = self.layers[self.data_layer_idx]['outputs']
+
+        grads = n.zeros((data[0].shape[1], data_dim), dtype=n.single)
+        self.libmodel.startDataGrad(data + [grads], self.data_layer_idx, self.softmax_idx)
+        self.finish_batch()
+        
+        fig = pl.figure(4)
+        fig.text(.4, .95, 'Data gradients')
+        print grads.shape, data[0].shape
+        
+        grads = self.test_data_provider.get_plottable_data(grads.T, add_mean=False)
+#        grads -= grads.min()
+#        grads /= grads.max()
+#        grads[grads<0] = 0;
+#        grads[grads>0] = 0; grads = -grads;
+        data[0] = self.test_data_provider.get_plottable_data(data[0])
+        for row in xrange(NUM_ROWS):
+            for col in xrange(NUM_COLS):
+                img_idx = row * NUM_COLS + col
+                if data[0].shape[0] <= img_idx:
+                    break
+                pl.subplot(NUM_ROWS*2, NUM_COLS, row * 2 * NUM_COLS + col + 1)
+                pl.xticks([])
+                pl.yticks([])
+                img = data[0][img_idx,:,:,:]
+                pl.imshow(img, interpolation='nearest')
+                true_label = int(data[1][0,img_idx])
+                #true_labels = set(label_names[l] for l in list(n.where(data[1][:,img_idx] > 0)[0]))
+                
+                pl.subplot(NUM_ROWS*2, NUM_COLS, (row * 2 + 1) * NUM_COLS + col + 1)
+
+                #pl.title(", ".join(true_labels))
+                pl.title(label_names[true_label])
+                img = grads[img_idx,:]
+                # Suppress small grads
+                img -= img.mean()
+                s = n.sqrt(img.var())
+                img[n.abs(img)<3*s] = 0
+                img -= img.min()
+                img /= img.max()
+                pl.imshow(img, interpolation='nearest')
+                
+    def do_webcam(self):
+
+        num_classes = self.test_data_provider.get_num_classes()
+        label_names = [lab.split(',')[0] for lab in self.test_data_provider.batch_meta['label_names']]
+        camera = hg.cvCreateCameraCapture(1)
+        #highgui.cvSetCaptureProperty(camera, highgui.CV_CAP_PROP_FRAME_WIDTH, 320 );
+        #highgui.cvSetCaptureProperty(camera, highgui.CV_CAP_PROP_FRAME_HEIGHT, 240 );
+        
+        def get_image():
+            im = hg.cvQueryFrame(camera)
+            # Add the line below if you need it (Ubuntu 8.04+)
+        #    im = cv.cvGetMat(im)
+            #convert Ipl image to PIL image
+            return cv.adaptors.Ipl2NumPy(im) 
+        
+#        fps = 30.0
+        frames_per_run = 4
+        frames = 0
+        
+        pg.init()
+        pg.display.set_mode((224,224))
+        pg.display.set_caption("WebCam Demo")
+        screen = pg.display.get_surface()
+        
+        images = n.zeros((self.test_data_provider.get_data_dims(), 32), dtype=n.single)
+        labels = n.zeros((1, 32), dtype=n.single) # dummy
+        preds = [n.zeros((32, num_classes), dtype=n.single) for i in xrange(2)]
+        preds_idx = 0
+        while True:
+            im = get_image()
+            images[:,0:28] = images[:,4:]
+            cropped = im[128:352,208:432,:]
+            cropped_swapped = cropped.swapaxes(0,2).swapaxes(1,2) 
+            images[:,28] = cropped_swapped.reshape((self.test_data_provider.get_data_dims(),))
+            images[:,29] = cropped_swapped[:,:,::-1].reshape((self.test_data_provider.get_data_dims(),))
+            
+            cropped = im[16:464,96:544,:]
+            im = cv.adaptors.NumPy2PIL(cropped)
+            cropped = cv.adaptors.PIL2NumPy(im.resize((224,224)))
+            cropped_swapped = cropped.swapaxes(0,2).swapaxes(1,2) 
+            images[:,30] = cropped_swapped.reshape((self.test_data_provider.get_data_dims(),))
+            images[:,31] = cropped_swapped[:,:,::-1].reshape((self.test_data_provider.get_data_dims(),))
+            
+            im = cv.adaptors.NumPy2PIL(cropped)
+            pg_img = pg.image.frombuffer(im.tostring(), im.size, im.mode)
+            screen.blit(pg_img, (0,0))
+            pg.display.flip()
+            
+            images[:,28:] -= self.test_data_provider.data_mean_crop
+
+            if frames % frames_per_run == 0 and frames >= 32: # Run convnet
+                if frames - frames_per_run >= 32: # Wait for last batch to finish, if it hasn't yet
+                    self.finish_batch()
+                    p = preds[1 - preds_idx].mean(axis=0)
+                    m = p.argmax()
+#                    m = Counter(preds[1 - preds_idx].argmax(axis=1)).most_common(1)[0][0]
+                    print "Label: %s (%.2f)" % (label_names[m] if p[m] > 0.0 else "<<none>>", p[m])
+#                    ent = -(n.log(p) * p).sum(axis=0)
+#                    print "Label: %s (entropy: %.2f)" % (label_names[m], ent)
+#                    print "Label: %s " % (label_names[m])
+                    
+                # Run the model
+                self.libmodel.startFeatureWriter([images, labels, preds[preds_idx]], self.softmax_idx)
+                preds_idx = 1 - preds_idx
+                
+
+            frames += 1
+#            sleep(1.0 / fps)
+            
+
+                
+    def start(self):
+        self.op.print_values()
+        if self.show_cost:
+            self.plot_cost()
+        if self.show_filters:
+            self.plot_filters()
+        if self.show_preds:
+            self.plot_predictions()
+        if self.write_features:
+            self.do_write_features()
+        if self.show_data_grad:
+            self.do_show_data_grad()
+        if self.webcam:
+            self.do_webcam()
+        if self.top5:
+            self.do_top5()
+        if self.show_maps:
+            self.do_showmaps()
+        pl.show()
+        sys.exit(0)
+            
+    @classmethod
+    def get_options_parser(cls):
+        op = ConvNet.get_options_parser()
+        for option in list(op.options):
+            if option not in ('gpu', 'load_file', 'train_batch_range', 'test_batch_range', 'multiview_test', 'data_path', 'logreg_name', 'pca_noise', 'scalar_mean'):
+                op.delete_option(option)
+        op.add_option("show-cost", "show_cost", StringOptionParser, "Show specified objective function", default="")
+        op.add_option("show-filters", "show_filters", StringOptionParser, "Show learned filters in specified layer", default="")
+        op.add_option("norm-filters", "norm_filters", BooleanOptionParser, "Individually normalize filters shown with --show-filters", default=0)
+        op.add_option("input-idx", "input_idx", IntegerOptionParser, "Input index for layer given to --show-filters", default=0)
+        op.add_option("cost-idx", "cost_idx", IntegerOptionParser, "Cost function return value index for --show-cost", default=0)
+        op.add_option("no-rgb", "no_rgb", BooleanOptionParser, "Don't combine filter channels into RGB in layer given to --show-filters", default=False)
+        op.add_option("yuv-to-rgb", "yuv_to_rgb", BooleanOptionParser, "Convert RGB filters to YUV in layer given to --show-filters", default=False)
+        op.add_option("channels", "channels", IntegerOptionParser, "Number of channels in layer given to --show-filters (fully-connected layers only)", default=0)
+        op.add_option("show-preds", "show_preds", StringOptionParser, "Show predictions made by given softmax on test set", default="")
+        op.add_option("only-errors", "only_errors", BooleanOptionParser, "Show only mistaken predictions (to be used with --show-preds)", default=False, requires=['show_preds'])
+        op.add_option("write-features", "write_features", StringOptionParser, "Write test data features from given layer", default="", requires=['feature-path'])
+        op.add_option("feature-path", "feature_path", StringOptionParser, "Write test data features to this path (to be used with --write-features)", default="")
+        op.add_option("show-data-grad", "show_data_grad", BooleanOptionParser, "Show data gradient in given data layer", default=False)
+        op.add_option("webcam", "webcam", StringOptionParser, "Show webcam demo with given softmax layer's predictions", default="")
+        op.add_option("local-plane", "local_plane", IntegerOptionParser, "Local plane to show", default=0)
+        op.add_option("top5", "top5", StringOptionParser, "Compute top5 test error from given layer", default=False)
+        op.add_option("show-maps", "show_maps", StringOptionParser, "Show feature maps in given layer", default="")
+
+
+        op.options['load_file'].default = None
+        return op
+    
+if __name__ == "__main__":
+    #nr.seed(6)
+    try:
+        op = ShowConvNet.get_options_parser()
+        op, load_dic = IGPUModel.parse_options(op)
+        model = ShowConvNet(op, load_dic)
+        model.start()
+    except (UnpickleError, ShowNetError, opt.GetoptError), e:
+        print "----------------"
+        print "Error:"
+        print e 
+
diff --git a/src/convnet.cu b/src/convnet.cu
new file mode 100644
index 0000000..1e0ba06
--- /dev/null
+++ b/src/convnet.cu
@@ -0,0 +1,594 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <vector>
+#include <iostream> 
+#include <string>
+#include <set>
+#include <map>
+
+#include <nvmatrix.cuh>
+#include <nvmatrix_operators.cuh>
+#include <matrix.h>
+#include <convnet.cuh>
+#include <util.cuh>
+
+using namespace std;
+
+/* 
+ * =======================
+ * ConvNet
+ * =======================
+ */
+ConvNet::ConvNet(PyObject* layerParams, intv& deviceIDs, vector<intv*>& deviceCPUs, int minibatchSize, int weightUpdateFreq) : Thread(false) {
+    _weightUpdateFreq = weightUpdateFreq;
+    _numBwdMiniPasses = 0;
+    _deviceIDs = &deviceIDs;
+    _deviceCPUs = &deviceCPUs;
+    _data = NULL;
+    _trainingProgress = 0;
+    _sync = new ThreadSynchronizer(deviceIDs.size() + 1);
+    seti pipeSet;
+    pipeSet.insert(deviceIDs.begin(), deviceIDs.end());
+    _pd = new PipeDispenserNonBlocking(pipeSet);
+    PyObject* layerList = PyDict_Values(layerParams);
+    
+    // Data layers live on the manager thread (in CPU memory)
+    for (int i = 0; i < PyList_GET_SIZE(layerList); i++) {
+        PyObject* paramsDict = PyList_GET_ITEM(layerList, i);
+        string layerType = pyDictGetString(paramsDict, "type");
+        if (layerType == "data") {
+            DataLayer* d = new DataLayer(NULL, paramsDict);
+            _dataLayers.push_back(d);
+            _layerMap[d->getName()] = d;
+        }
+    }
+    
+    // Initialize GPU worker threads
+    for (int d = 0; d < deviceIDs.size(); ++d) {
+        ConvNetGPU* cng = new ConvNetGPU(layerList, deviceIDs[d], *deviceCPUs[d], this);
+        
+        _convNetThreads.push_back(cng);
+        for (map<string, Layer*>::iterator it = cng->getLayerMap().begin(); it != cng->getLayerMap().end(); ++it) {
+            _layerMap[it->first] = it->second;
+        }
+    }
+    // Connect forward/backward links in graph
+    for (map<string, Layer*>::iterator it = _layerMap.begin(); it != _layerMap.end(); ++it) {
+        PyObject* paramsDict = PyDict_GetItemString(layerParams, it->first.c_str());
+        PyObject* inputList = PyDict_GetItemString(paramsDict, "inputs");
+        if (inputList != NULL) {
+            for (int i = 0; i < PyList_GET_SIZE(inputList); i++) {
+                string inputName = PyString_AsString(PyList_GetItem(inputList, i));
+                it->second->addPrev(_layerMap[inputName]);
+                _layerMap[inputName]->addNext(it->second);
+            }
+        }
+    }
+    
+    _numFwdTerminal = 0;
+    // Execute post-initialization stuff
+    for (map<string, Layer*>::iterator it = _layerMap.begin(); it != _layerMap.end(); ++it) {
+        it->second->postInit();
+        _numFwdTerminal += it->second->getNext().size() == 0; // Number of terminal nodes going forward
+    }
+    // Find and count the terminal nodes in the backward pass
+    set<string> visited, terminal;
+    for (int t = 0; t < _convNetThreads.size(); t++) {
+        vector<CostLayer*>& cl = _convNetThreads[t]->getCostLayers();
+        for (int c = 0; c < cl.size(); c++) {
+            findBwdTerminal(*cl[c], visited, terminal);
+        }
+    }
+    _numBwdTerminal = terminal.size();
+//    printf("num fwd terminals: %d, back terminals:\n", _numFwdTerminal);
+//    for (set<string>::iterator it = terminal.begin(); it != terminal.end(); ++it) {
+//        printf("%s\n", (*it).c_str());
+//    }
+    _dp = new DataProvider(minibatchSize);
+
+    Py_DECREF(layerList);
+    assert(_weightUpdateFreq > 0);
+}
+
+void ConvNet::findBwdTerminal(Layer& l, set<string>& visited, set<string> &terminal) {
+    if (visited.count(l.getName()) == 0) { 
+        visited.insert(l.getName());
+        if (l.isGradConsumer()) {
+            bool hasPrevConsumer = false;
+            for (int i = 0; i < l.getPrev().size(); i++) {
+                hasPrevConsumer |= l.getPrev()[i]->isGradConsumer();
+            }
+            if (!hasPrevConsumer || !l.isGradProducer()) {
+                terminal.insert(l.getName());
+                l.setBwdTerminal(true);
+            } else if (l.isGradProducer()) {
+                for (int i = 0; i < l.getPrev().size(); i++) {
+                    if (l.getPrev()[i]->isGradConsumer()) {
+                        findBwdTerminal(*l.getPrev()[i], visited, terminal);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void* ConvNet::run() {
+    // The manager thread defaults to using the GPU of the first worker.
+    // Put more logic here if this is inappropriate.
+    NVMatrix::setDeviceID(_convNetThreads[0]->getDeviceID());
+    for (int t = 0; t < _convNetThreads.size(); t++) {
+        _convNetThreads[t]->start();
+    }
+    copyToGPU();
+    while (true) {
+        Worker* worker = _workerQueue.dequeue();
+        worker->run();
+        delete worker;
+    }
+    return NULL;
+}
+
+Queue<Worker*>& ConvNet::getWorkerQueue() {
+    return _workerQueue;
+}
+
+Queue<WorkResult*>& ConvNet::getResultQueue() {
+    return _resultQueue;
+}
+
+DataProvider& ConvNet::getDataProvider() {
+    return *_dp;
+}
+
+Layer& ConvNet::operator[](string& name) {
+    return *_layerMap[name];
+}
+
+Layer& ConvNet::getLayer(string& name) {
+    return *_layerMap[name];
+}
+
+void ConvNet::sendMessage(MESSAGES msg, bool sync) {
+    for (int i = 0; i < _convNetThreads.size(); i++) {
+        _convNetThreads[i]->enqueueMessage(new Message(msg));
+        if (sync) {
+            _convNetThreads[i]->enqueueMessage(new Message(SYNC));
+        }
+    }
+    
+    if  (sync) {
+        _sync->sync();
+    }
+}
+
+void ConvNet::copyToCPU() {
+    sendMessage(COPY_TO_CPU, true);
+}
+
+void ConvNet::copyToGPU() {
+    sendMessage(COPY_TO_GPU, false);
+}
+
+void ConvNet::updateWeights() {
+    sendMessage(UPDATE_WEIGHTS, true);
+}
+
+void ConvNet::reset() {
+    sendMessage(RESET, false);
+}
+
+void ConvNet::fprop(PASS_TYPE passType) {
+    assert(_data != NULL);
+    reset();
+    for (int i = 0; i < _dataLayers.size(); i++) {
+        _dataLayers[i]->startFprop(*_data, passType);
+    }
+    waitForTerminals(_numFwdTerminal, FPROP_TERMINAL);
+}
+
+void ConvNet::fprop(CPUData& data, PASS_TYPE passType) {
+    if (&data != _data) {
+        delete _data;
+    }
+    _data = &data;
+    fprop(passType);
+}
+
+void ConvNet::fprop(int miniIdx, PASS_TYPE passType) {
+    delete _data;
+    reset();
+    if (miniIdx == 0 || miniIdx != _dataLayers[0]->getBufferMinibatchIdx()) {
+        _data = &_dp->getMinibatch(miniIdx);
+        for (int i = 0; i < _dataLayers.size(); i++) {
+            _dataLayers[i]->startFprop(*_data, passType);
+        }
+    } else {
+        _data = _dataLayers[0]->getBufferData();
+        for (int i = 0; i < _dataLayers.size(); i++) {
+            _dataLayers[i]->startFpropFromBuffer(passType);
+        }
+    }
+    CPUData* nextData = miniIdx + 1 == _dp->getNumMinibatches() ? NULL : &_dp->getMinibatch(miniIdx + 1);
+    if (nextData != NULL) {
+        for (int i = 0; i < _dataLayers.size(); i++) {
+            _dataLayers[i]->setBuffer(*nextData, miniIdx + 1);
+        }
+    }
+    waitForTerminals(_numFwdTerminal, FPROP_TERMINAL);
+}
+
+void ConvNet::bprop(PASS_TYPE passType) {
+    // Weights are updated when this is zero
+    _numBwdMiniPasses = (_numBwdMiniPasses + 1) % _weightUpdateFreq;
+    for (int i = 0; i < _convNetThreads.size(); i++) {
+        _convNetThreads[i]->enqueueMessage(new BpropStartMessage(passType));;
+    }
+    waitForTerminals(_numBwdTerminal, BPROP_TERMINAL);
+    reset();
+}
+
+void ConvNet::waitForTerminals(int numMsgs, MESSAGES msg) {
+    int terminalsDone = 0;
+    while(terminalsDone++ < numMsgs) {
+        Message* m = _msgQueue.dequeue();
+        assert(m->getMessageType() == msg);
+        delete m;
+    }
+}
+
+// Same as getCost() but adds results to given cost and returns it
+Cost& ConvNet::getCost(Cost& cost) {
+    Cost &tmp = getCost();
+    cost += tmp;
+    delete &tmp;
+    return cost;
+}
+
+Cost& ConvNet::getCost() {
+    Cost &tmp = *new Cost(_data->getNumCases());
+    for (int i = 0; i < _convNetThreads.size(); i++) {
+        Cost& tmp2 = _convNetThreads[i]->getCost(_data->getNumCases());
+        tmp |= tmp2;
+        delete &tmp2;
+    }
+    return tmp;
+}
+
+double ConvNet::getCostValue() {
+    Cost& cost = getCost();
+    double val = cost.getValue();
+    delete &cost;
+    return val;
+}
+
+Queue<Message*>& ConvNet::getMessageQueue() {
+    return _msgQueue;
+}
+
+int ConvNet::getDeviceID(int gpuIdx) {
+    if (gpuIdx < 0) {
+        return -1;
+    }
+    return _deviceIDs->at(gpuIdx);
+}
+
+intv& ConvNet::getDeviceIDs() {
+    return *_deviceIDs;
+}
+
+ThreadSynchronizer& ConvNet::getSync() {
+    return *_sync;
+}
+
+PipeDispenser& ConvNet::getPipeDispenser() {
+    return *_pd;
+}
+
+void ConvNet::syncWithChildren() {
+    sendMessage(SYNC, false);
+    _sync->sync();
+}
+
+int ConvNet::getWeightUpdateFreq() {
+    return _weightUpdateFreq;
+}
+
+int ConvNet::getNumBwdMiniPasses() {
+    return _numBwdMiniPasses;
+}
+
+int ConvNet::getMinibatchSize() {
+    return _dp->getMinibatchSize();
+}
+
+void ConvNet::setTrainingProgress(double progress) {
+	_trainingProgress = progress;
+}
+
+double ConvNet::getTrainingProgress() const {
+	return _trainingProgress;
+}
+
+/*
+ * Gradient checking stuff
+ */
+void ConvNet::checkGradients() {
+    _numFailures = 0;
+    _numTests = 0;
+    fprop(0, PASS_GC);
+    _baseErr = getCostValue();
+    bprop(PASS_GC);
+    
+    for (map<string, Layer*>::iterator it = _layerMap.begin(); it != _layerMap.end(); ++it) {
+        if (it->second->getDeviceID() >= 0) {
+            NVMatrix::setDeviceID(it->second->getDeviceID());
+            it->second->checkGradients();
+        }
+    }
+    NVMatrix::setDeviceID(_convNetThreads[0]->getDeviceID());
+    
+    cout << "------------------------" << endl;
+    if (_numFailures > 0) {
+        cout << _numFailures << "/" << _numTests << " TESTS FAILED" << endl;
+    } else {
+        cout << "ALL " << _numTests << " TESTS PASSED" << endl;
+    }
+}
+
+/*
+ * name: weight matrix name
+ * eps: finite difference step
+ */
+bool ConvNet::checkGradient(const string& name, float eps, Weights& weights) {
+    Matrix numGrad(weights.getNumRows(), weights.getNumCols());
+    Matrix diff(numGrad);
+    numGrad.apply(Matrix::ZERO);
+    Matrix weightsCPU;
+
+    weights.getW().copyToHost(weightsCPU, true);
+
+    for(int i = 0; i < weights.getNumRows(); i++) {
+        for (int j = 0; j < weights.getNumCols(); j++) {
+            float v = weightsCPU(i,j);
+            weightsCPU(i,j) += eps;
+            weights.getW().copyFromHost(weightsCPU);
+            weightsCPU(i,j) = v;
+            fprop(PASS_GC);
+            double err = getCostValue();
+            numGrad(i,j) = (err - _baseErr) / (_data->getNumCases() * eps);
+            if (isnan(numGrad(i,j)) || isinf(numGrad(i,j))) {
+                cout << "Numerical computation produced nan or inf when checking '" << name << "': " << numGrad(i,j) << endl;
+                cout << "Consider reducing the sizes of the weights or finite difference steps." << endl;
+                cout << "Exiting." << endl;
+                exit(1);
+            }
+            weights.getW().copyFromHost(weightsCPU);
+        }
+    }
+    Matrix gradCPU;
+    weights.getGrad().copyToHost(gradCPU, true);
+    gradCPU.scale(-1.0 / _data->getNumCases());
+    float analNorm = gradCPU.norm();
+    float numNorm = numGrad.norm();
+    numGrad.subtract(gradCPU, diff);
+    float relErr = diff.norm() / analNorm;
+    bool fail = relErr >= GC_REL_ERR_THRESH;
+    if (fail || !GC_SUPPRESS_PASSES) {
+        cout << "========================" << endl;
+        printf("(%s) %s GRADIENT CHECK\n", fail ? "****FAIL****" : "PASS", name.c_str());
+        cout << "========================" << endl;
+        cout << "Analytic:" << endl;
+        gradCPU.print(0, 6, 0, 4);
+        cout << "Numeric:" << endl;
+        numGrad.print(0, 6, 0, 4);
+        printf("Analytic norm: %e\n", analNorm);
+        printf("Numeric norm:  %e\n", numNorm);
+        printf("Relative error: %e\n", relErr);
+    }
+    _numTests++;
+    _numFailures += fail;
+    return fail;
+}
+
+/* 
+ * =======================
+ * ConvNetGPU
+ * =======================
+ */
+ConvNetGPU::ConvNetGPU(PyObject* layerList, int deviceID, intv& deviceCPUs, ConvNet* convNet)
+    : Thread(false, deviceCPUs), _deviceID(deviceID), _convNet(convNet) {
+    try {
+        int numLayers = PyList_GET_SIZE(layerList);
+
+        for (int i = 0; i < numLayers; i++) {
+            PyObject* paramsDict = PyList_GET_ITEM(layerList, i);
+            int layerDeviceID = convNet->getDeviceID(pyDictGetInt(paramsDict, "gpu"));
+            if (layerDeviceID == _deviceID) {
+                initLayer(paramsDict);
+            }
+        }
+    } catch (string& s) {
+        cout << "Error creating ConvNet: " << s << endl;
+        exit(1);
+    }
+}
+
+void ConvNetGPU::initLayer(PyObject* paramsDict) {
+    string type = pyDictGetString(paramsDict, "type");
+    string name = pyDictGetString(paramsDict, "name");
+    if (type == "fc") {
+        _layerMap[name] = new FCLayer(this, paramsDict, false, true);
+    } else if (type == "treefc") {
+        _layerMap[name] = new TreeFCLayer(this, paramsDict);
+    } else if (type == "conv") {
+        _layerMap[name] = new ConvLayer(this, paramsDict);
+    } else if (type == "local") {
+        _layerMap[name] = new LocalUnsharedLayer(this, paramsDict);
+    } else if (type == "pool") {
+        _layerMap[name] = &PoolLayer::makePoolLayer(this, paramsDict);
+    } else if (type == "rnorm") {
+        _layerMap[name] = new ResponseNormLayer(this, paramsDict);
+    } else if (type == "cmrnorm") {
+        _layerMap[name] = new CrossMapResponseNormLayer(this, paramsDict);
+    } else if (type == "cnorm") {
+        _layerMap[name] = new ContrastNormLayer(this, paramsDict);
+    } else if (type == "softmax") {
+        _layerMap[name] = new SoftmaxLayer(this, paramsDict);
+    } else if (type == "eltsum") {
+        _layerMap[name] = new EltwiseSumLayer(this, paramsDict);
+    } else if (type == "eltmax") {
+        _layerMap[name] = new EltwiseMaxLayer(this, paramsDict);
+    } else if (type == "neuron") {
+        _layerMap[name] = new NeuronLayer(this, paramsDict);
+    } else if (type == "nailbed") {
+        _layerMap[name] = new NailbedLayer(this, paramsDict);
+    } else if (type == "blur") {
+        _layerMap[name] = new GaussianBlurLayer(this, paramsDict);
+    } else if (type == "href") {
+        _layerMap[name] = new HorizontalReflectionLayer(this, paramsDict);
+    } else if (type == "resize") {
+        _layerMap[name] = new ResizeLayer(this, paramsDict);
+    } else if (type == "rgb2yuv") {
+        _layerMap[name] = new RGBToYUVLayer(this, paramsDict);
+    } else if (type == "rgb2lab") {
+        _layerMap[name] = new RGBToLABLayer(this, paramsDict);
+    } else if (type == "rscale") {
+        _layerMap[name] = new RandomScaleLayer(this, paramsDict);
+    } else if (type == "concat") {
+        _layerMap[name] = new ConcatenationLayer(this, paramsDict);
+    } else if (type == "hs") {
+        _layerMap[name] = new HiddenSexLayer(this, paramsDict);
+    } else if (strncmp(type.c_str(), "cost.", 5) == 0) {
+        CostLayer *c = &CostLayer::makeCostLayer(this, type, paramsDict);
+        _layerMap[name] = c;
+        _costs.push_back(c);
+    } else {
+        throw string("Unknown layer type ") + type;
+    }
+}
+
+/*
+ * This executes in a new CPU thread so it's OK to initialize CUDA stuff here. 
+ */
+void ConvNetGPU::initCuda() { 
+    NVMatrix::setDeviceID(_deviceID);
+    checkCudaErrors(cudaDeviceSetCacheConfig(cudaFuncCachePreferShared));
+    for (int i = 0; i < _convNet->getDeviceIDs().size(); i++) {
+        int d = _convNet->getDeviceID(i);
+        if (d != _deviceID) {
+            if (NVMatrix::canAccessDevice(_deviceID, d)) {
+                printf("Enabling peer access %d --> %d\n", NVMatrix::getDeviceID(), d);
+                checkCudaErrors(cudaDeviceEnablePeerAccess(d, 0));
+            } else {
+                printf("No peer access %d --> %d\n", _deviceID, d);
+            }
+        }
+    }
+    NVMatrix::initCublas();
+    NVMatrix::initRandom();
+    srand(time(0));
+}
+
+void* ConvNetGPU::run() {
+    initCuda();
+
+    while (true) {
+        Message* m = _msgQueue.dequeue();
+        if (m->getMessageType() == FPROP_READY) {
+            FpropMessage* msg = static_cast<FpropMessage*>(m);
+            _layerMap[msg->getToLayer()]->fprop(msg->getPassType());
+        } else if (m->getMessageType() == BPROP_READY) {
+            BpropMessage* msg = static_cast<BpropMessage*>(m);
+            _layerMap[msg->getToLayer()]->incRcvdBInputMsgs();
+            _layerMap[msg->getToLayer()]->bprop(msg->getPassType());
+        } else if (m->getMessageType() == BPROP_START) {
+            BpropStartMessage* msg = static_cast<BpropStartMessage*>(m);
+            for (int i = 0; i < _costs.size(); i++) {
+                dynamic_cast<Layer*>(_costs[i])->bprop(msg->getPassType());
+            }
+        } else if (m->getMessageType() == SYNC) {
+            _convNet->getSync().sync();
+        } else if (m->getMessageType() == COPY_TO_CPU) {
+            for (map<string,Layer*>::iterator it = _layerMap.begin(); it != _layerMap.end(); ++it) {
+                it->second->copyToCPU();
+            }
+        } else if (m->getMessageType() == COPY_TO_GPU) {
+            for (map<string,Layer*>::iterator it = _layerMap.begin(); it != _layerMap.end(); ++it) {
+                it->second->copyToGPU();
+            }
+        } else if (m->getMessageType() == RESET) {
+            for (map<string,Layer*>::iterator it = _layerMap.begin(); it != _layerMap.end(); ++it) {
+                it->second->reset();
+            }
+        } else if (m->getMessageType() == UPDATE_WEIGHTS) {
+            for (map<string,Layer*>::iterator it = _layerMap.begin(); it != _layerMap.end(); ++it) {
+                it->second->updateWeights();
+            }
+        } else if (m->getMessageType() == RUNME) {
+            RunMeMessage* msg = static_cast<RunMeMessage*>(m);
+            msg->run();
+        }
+        delete m;
+    }
+    return NULL;
+}
+
+Cost& ConvNetGPU::getCost(int numCases) {
+    return *new Cost(numCases, _costs);
+}
+
+Layer& ConvNetGPU::operator[](string& name) {
+    return *_layerMap[name];
+}
+
+Layer& ConvNetGPU::getLayer(string& name) {
+    return *_layerMap[name];
+}
+
+int ConvNetGPU::getDeviceID() {
+    return _deviceID;
+}
+
+Queue<Message*>& ConvNetGPU::getMessageQueue() {
+    return _msgQueue;
+}
+
+void ConvNetGPU::enqueueMessage(Message* msg) {
+    getMessageQueue().enqueue(msg);
+}
+
+vector<CostLayer*>& ConvNetGPU::getCostLayers() {
+    return _costs;
+}
+
+map<string, Layer*>& ConvNetGPU::getLayerMap() {
+    return _layerMap;
+}
+
+ConvNet& ConvNetGPU::getConvNet() {
+    return *_convNet;
+}
diff --git a/src/cost.cu b/src/cost.cu
new file mode 100644
index 0000000..58f9230
--- /dev/null
+++ b/src/cost.cu
@@ -0,0 +1,126 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <iostream>
+#include <cost.cuh>
+
+using namespace std;
+
+/* 
+ * =====================
+ * Cost
+ * =====================
+ */
+
+Cost::Cost(int numCases) : _numCases(numCases) {
+}
+
+Cost::Cost(int numCases, vector<CostLayer*>& costs) : _numCases(numCases) {
+    for (vector<CostLayer*>::iterator it = costs.begin(); it != costs.end(); ++it) {
+        _costMap[(*it)->getName()] = &(*it)->getCost();
+        _costCoeffMap[(*it)->getName()] = (*it)->getCoeff();
+    }
+}
+
+int Cost::getNumCases() {
+    return _numCases;
+}
+
+doublev& Cost::operator [](const string s) {
+    return *_costMap[s];
+}
+
+CostMap& Cost::getCostMap() {
+    return _costMap;
+}
+
+CostCoeffMap& Cost::getCostCoeffMap() {
+    return _costCoeffMap;
+}
+
+double Cost::getValue() {
+    double val = 0;
+    for (CostMap::iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
+        val += _costCoeffMap[it->first] * it->second->at(0);
+    }
+    return val;
+}
+
+Cost& Cost::operator += (Cost& er) {
+    CostMap& otherMap = er.getCostMap();
+    CostCoeffMap& otherCoeffMap = er.getCostCoeffMap();
+    for (CostMap::const_iterator it = otherMap.begin(); it != otherMap.end(); ++it) {
+        if (_costMap.count(it->first) == 0) {
+            _costMap[it->first] = new doublev();
+            _costCoeffMap[it->first] = otherCoeffMap[it->first];
+        }
+        
+        vector<double>& myVec = *_costMap[it->first];
+        vector<double>& otherVec = *otherMap[it->first];
+        assert(myVec.size() == 0 || myVec.size() == otherVec.size());
+        for (int i = 0; i < otherVec.size(); i++) {
+            if (myVec.size() <= i) {
+                myVec.push_back(0);
+            }
+            myVec[i] += otherVec[i];
+        }
+    }
+    _numCases += er.getNumCases();
+    return *this;
+}
+
+// Merge costs in er into this cost
+Cost& Cost::operator |= (Cost& er) {
+    assert(er.getNumCases() == getNumCases());
+    CostMap& otherMap = er.getCostMap();
+    CostCoeffMap& otherCoeffMap = er.getCostCoeffMap();
+    for (CostMap::const_iterator it = otherMap.begin(); it != otherMap.end(); ++it) {
+        assert(_costMap.count(it->first) == 0);
+
+        _costMap[it->first] = new doublev();
+        _costCoeffMap[it->first] = otherCoeffMap[it->first];
+
+        vector<double>& myVec = *_costMap[it->first];
+        vector<double>& otherVec = *otherMap[it->first];
+        myVec.insert(myVec.begin(), otherVec.begin(), otherVec.end());
+    }
+    return *this;
+}
+
+Cost& Cost::operator /= (const double v) {
+    for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
+        for (doublev::iterator it2 = it->second->begin(); it2 != it->second->end(); ++it2) {
+            *it2 /= v;
+        }
+    }
+    return *this;
+}
+
+Cost::~Cost() {
+    for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
+        delete it->second;
+    }
+}
\ No newline at end of file
diff --git a/src/cpuCNN.cu b/src/cpuCNN.cu
new file mode 100644
index 0000000..f156593
--- /dev/null
+++ b/src/cpuCNN.cu
@@ -0,0 +1,65 @@
+
+
+
+#include "softmaxtree.cuh"
+/*
+ * weights: (numNodes, numFeatures)
+ * targets: (numNodes, numFeatures)
+ * 
+ */
+void cpuSoftmaxTreeFwd(float* weights, float* targets, const int numFeatures, SoftmaxTree& tree) {
+    for (int d = 0; d <= tree.getDepth(); ++d) {
+        for (SoftmaxNodeV::iterator it = tree.getNodesAtDepth(d).begin(); it!= tree.getNodesAtDepth(d).end(); ++it) {
+            SoftmaxNode& node = **it;
+            SoftmaxNode* parent = node.getParent();
+            for (int f = 0; f < numFeatures; ++f) {
+                targets[node.getLabel() * numFeatures + f] = weights[node.getLabel() * numFeatures + f]
+                                                           + (parent == NULL ? 0 : targets[parent->getLabel() * numFeatures + f]);
+            }
+        }
+    }
+}
+
+/*
+ * grads:   (numNodes, numFeatures)
+ * 
+ */
+void cpuSoftmaxTreeBwd(float* grads, const int numFeatures, SoftmaxTree& tree) {
+    for (int h = 1; h <= tree.getHeight(); ++h) {
+        for (SoftmaxNodeV::iterator it = tree.getNodesAtHeight(h).begin(); it!= tree.getNodesAtHeight(h).end(); ++it) {
+            SoftmaxNode& node = **it;
+            for (int f = 0; f < numFeatures; ++f) {
+                grads[node.getLabel() * numFeatures + f] = 0;
+            }
+            for (SoftmaxNodeV::iterator itc = node.getChildren().begin(); itc!= node.getChildren().end(); ++itc) {
+                SoftmaxNode& child = **itc;
+                for (int f = 0; f < numFeatures; ++f) {
+                    grads[node.getLabel() * numFeatures + f] += grads[child.getLabel() * numFeatures + f];
+                }
+            }          
+        }
+    }
+}
+
+/*
+ * weights:     (numNodes, numFeatures)
+ * weightsInc:  (numNodes, numFeatures)
+ * weightsGrad: (numNodes, numFeatures)
+ * nodeSizes:   numNodes-array whose ith element gives number of leaves under
+ *              node with label i.
+ */
+void cpuSoftmaxTreeUpdateWeights(float* weights, float* weightsInc, float* weightsGrad,
+                                 const int numFeatures, float eps, const float mom, float wc, SoftmaxTree& tree) {
+    for (int d = 0; d <= tree.getDepth(); d++) {
+        for (SoftmaxNodeV::iterator it = tree.getNodesAtDepth(d).begin(); it!= tree.getNodesAtDepth(d).end(); ++it) {
+            SoftmaxNode& node = **it;
+            float w = wc / node.getSize();
+            float e = eps;// * sqrt(node.getSize());
+            for (int f = 0; f < numFeatures; ++f) {
+                weightsInc[node.getLabel() * numFeatures + f] = mom * weightsInc[node.getLabel() * numFeatures + f]
+                                                              + e * (weightsGrad[node.getLabel() * numFeatures + f] - w * weights[node.getLabel() * numFeatures + f]);
+                weights[node.getLabel() * numFeatures + f] += weightsInc[node.getLabel() * numFeatures + f];
+            }
+        }
+    }
+}
diff --git a/src/data.cu b/src/data.cu
new file mode 100644
index 0000000..7829398
--- /dev/null
+++ b/src/data.cu
@@ -0,0 +1,98 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <data.cuh>
+#include <vector>
+
+using namespace std;
+
+DataProvider::DataProvider(int minibatchSize) : 
+    _minibatchSize(minibatchSize), _hData(NULL) {
+
+}
+
+void DataProvider::clearData() {
+    delete _hData;
+    _hData = NULL;
+}
+
+void DataProvider::setData(CPUData& hData) {
+    // This is now deleted by the DataWorker's destructor
+//    delete _hData; // Delete old CPU matrices
+
+    _hData = &hData;
+}
+
+CPUData& DataProvider::getMinibatch(int idx) {
+    assert(idx >= 0 && idx < getNumMinibatches());
+    return getDataSlice(idx * _minibatchSize, (idx + 1) * _minibatchSize);
+}
+
+CPUData& DataProvider::getDataSlice(int startCase, int endCase) {
+    assert(_hData != NULL);
+    assert(_hData->getNumCases() > 0);
+    endCase = min(_hData->getNumCases(), endCase);
+    // TODO: maintain these matrices, no point re-creating them all the time
+    MatrixV& miniData = *new MatrixV();
+    
+    for (int i = 0; i < _hData->getData().size(); i++) {
+        // NOTE: if hData is transposed, then the output minibatch matrix
+        // can be a view. No need to allocate new CPU memory here. Might
+        // want to look into optimizing that in the future, though it's 
+        // unlikely to be a big deal.
+        if (_hData->isTrans()) {
+            miniData.push_back(&(*_hData)[i].sliceCols(startCase, endCase));
+        } else {
+            miniData.push_back(new Matrix());
+            (*_hData)[i].sliceCols(startCase, endCase, *miniData.back());
+        }
+    }
+    return *new CPUData(&miniData);
+}
+
+int DataProvider::getNumMinibatches() {
+    assert(_hData != NULL);
+    assert(_hData->getNumCases() > 0);
+    return DIVUP(_hData->getNumCases(), _minibatchSize);
+}
+
+int DataProvider::getMinibatchSize() {
+    return _minibatchSize;
+}
+
+int DataProvider::getNumCases() {
+    assert(_hData != NULL);
+    assert(_hData->getNumCases() > 0);
+    return _hData->getNumCases();
+}
+
+int DataProvider::getNumCasesInMinibatch(int idx) {
+    assert(_hData != NULL);
+    assert(_hData->getNumCases() > 0);
+    assert(idx >= 0 && idx < getNumMinibatches());
+    return min(_minibatchSize, max(0, _hData->getNumCases() - idx * _minibatchSize));
+}
diff --git a/src/hostmem.cu b/src/hostmem.cu
new file mode 100644
index 0000000..7bfd194
--- /dev/null
+++ b/src/hostmem.cu
@@ -0,0 +1,34 @@
+#include <hostmem.cuh>
+
+PinnedHostMem::PinnedHostMem() : _numBytes(0), _data(NULL) {
+
+}
+
+PinnedHostMem::~PinnedHostMem() {
+    if (_numBytes > 0) {
+        checkCudaErrors(cudaFreeHost(_data));
+    }
+}
+
+void PinnedHostMem::resize(uint bytes) {
+    if (_numBytes != bytes) {
+        if (_numBytes > 0) {
+            checkCudaErrors(cudaFreeHost(_data));
+        }
+        checkCudaErrors(cudaHostAlloc(&_data, bytes, cudaHostAllocPortable));
+        _numBytes = bytes;
+    }
+}
+
+void PinnedHostMem::copyFrom(void* src, uint bytes) {
+    resize(bytes);
+    checkCudaErrors(cudaMemcpy(_data, src, bytes, cudaMemcpyDefault));
+}
+
+void PinnedHostMem::copyTo(void* dst) {
+    checkCudaErrors(cudaMemcpy(dst, _data, _numBytes, cudaMemcpyDefault));
+}
+
+void* PinnedHostMem::getData() {
+    return _data;
+}
diff --git a/src/layer.cu b/src/layer.cu
new file mode 100644
index 0000000..3023423
--- /dev/null
+++ b/src/layer.cu
@@ -0,0 +1,2002 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <helper_cuda.h>
+#include <iostream>
+
+#include <layer_kernels.cuh>
+#include <layer.cuh>
+#include <data.cuh>
+#include <util.cuh>
+#include <cudaconv2.cuh>
+#include <matrix.h>
+#include <set>
+
+using namespace std;
+
+/* 
+ * =======================
+ * Layer
+ * =======================
+ */
+
+Layer::Layer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans) : 
+             _convNetGPU(convNetGPU),  _trans(trans) {
+    _name = pyDictGetString(paramsDict, "name");
+    _type = pyDictGetString(paramsDict, "type");
+    
+    _numGradProducersNext = 0;
+    _foundGradConsumers = false;
+    _gradConsumer = pyDictGetInt(paramsDict, "gradConsumer");
+    _actsTarget = pyDictGetInt(paramsDict, "actsTarget");
+    _actsGradTarget = pyDictGetInt(paramsDict, "actsGradTarget");
+    _conserveMem = pyDictGetInt(paramsDict, "conserveMem");
+    _numOutputs = pyDictGetInt(paramsDict, "outputs");
+    _deviceID = _convNetGPU == NULL ? -1 : _convNetGPU->getDeviceID(); // DataLayer doesnt have a device ID
+
+    _bwdTerminal = false;
+    _rcvdBInputMsgs = 0;
+
+    PyObject* quantF = PyDict_GetItemString(paramsDict, "quantF");
+    PyObject* quantB = PyDict_GetItemString(paramsDict, "quantB");
+    _fwdQuantizer = &Quantizer::make(quantF);
+    _bwdQuantizer = &Quantizer::make(quantB);
+}
+
+void Layer::shuffle(intv& v) {
+    for (int i = 0; i < v.size(); ++i) {
+        int r1 = rand() % v.size();
+        int r2 = rand() % v.size();
+        int tmp = v[r1];
+        v[r1] = v[r2];
+        v[r2] = tmp;
+    }
+}
+
+void Layer::fpropNext(PASS_TYPE passType) {
+    set<int> devices; // The set of devices onto which I have copied my output
+    // If I must copy my outputs to a GPU on another PCH, make sure
+    // I copy them to host memory.
+    for (intv::iterator it = _nextDeviceIDs.begin(); it != _nextDeviceIDs.end(); ++it) {
+        int d = *it;
+        if (!NVMatrix::canAccessDevice(_deviceID, d)) {
+            _fwdQuantizer->quantize(*_outputs[_deviceID], _hostMemFwd);
+            break;
+        }
+    }
+    // Sync so that we don't send out messages about data that hasn't been formed yet
+    if (_nextDeviceIDs.size() > 1 || (_nextDeviceIDs.size() == 1 && _nextDeviceIDs[0] != _deviceID)) {
+        cudaDeviceSynchronize();
+    }
+    for (int i = 0; i < _next.size(); i++) {
+        int d = _next[i]->getDeviceID();
+        if (d != _deviceID && devices.count(d) == 0) {
+            // Copy my output to next layer's GPU
+            if (NVMatrix::canAccessDevice(_deviceID, d)) {
+                // Clone the matrix because the next layers in this thread may transpose it and stuff
+                _next[i]->getConvNetGPU().enqueueMessage(new CopyMessage(&_outputs[_deviceID]->getClone(), _outputs[d]));
+            } else { // This will go through host
+                _next[i]->getConvNetGPU().enqueueMessage(new DequantizeMessage(_fwdQuantizer, _outputs[d]));
+            }
+
+            devices.insert(d);
+        }
+        // Inform the next layer that my output is ready
+        _next[i]->getConvNetGPU().enqueueMessage(new FpropMessage(_name, _next[i]->getName(), passType));
+    }
+}
+
+void Layer::truncBwdActs() {
+    // Only truncate actsGrad if I own it
+    if (_conserveMem && _actsGradTarget < 0) {
+        for (map<int,NVMatrix*>::iterator it = _actsGrad.begin(); it != _actsGrad.end(); ++it) {
+            getActsGrad(it->first).truncate();
+        }
+    }
+    if (_conserveMem) {
+        getActs().truncate();
+        for (map<int,NVMatrix*>::iterator it = _outputs.begin(); it != _outputs.end(); ++it) {
+            NVMatrix::setDeviceID(it->first);
+            getActs(it->first).truncate();
+        }
+        NVMatrix::setDeviceID(_deviceID);
+    }
+}
+
+void Layer::fprop(PASS_TYPE passType) {
+    _rcvdFInputs++;
+    if (_rcvdFInputs == _prev.size()) {
+        assert(_deviceID == NVMatrix::getDeviceID());
+        NVMatrixV v;
+        for (int i = 0; i < _prev.size(); i++) {
+            v.push_back(&_prev[i]->getActs(_deviceID));
+        }
+        fprop(v, passType);
+    }
+}
+
+void Layer::fprop(NVMatrix& v, PASS_TYPE passType) {
+    NVMatrixV vl;
+    vl.push_back(&v);
+    fprop(vl, passType);
+}
+
+void Layer::fprop(NVMatrixV& v, PASS_TYPE passType) {
+    assert(v.size() == _prev.size());
+    _inputs.clear();
+    _inputs.insert(_inputs.begin(), v.begin(), v.end());
+    _outputs[_deviceID] = _actsTarget < 0 ? _outputs[_deviceID] : _inputs[_actsTarget];
+    _rcvdFInputs = _prev.size();
+    for (NVMatrixV::iterator it = v.begin(); it != v.end(); ++it) {
+        (*it)->transpose(_trans);
+    }
+    getActs().transpose(_trans);
+    
+    // First do fprop on the input whose acts matrix I'm sharing, if any
+    if (_actsTarget >= 0) {
+        fpropActs(_actsTarget, 0, passType);
+    }
+    // Then add the rest of the inputs to that
+    for (int i = 0; i < _prev.size(); i++) {
+        if (i != _actsTarget) {
+            fpropActs(i, _actsTarget >= 0 || i > 0, passType);
+        }
+    }
+    fpropNext(passType);
+}
+
+void Layer::bprop(PASS_TYPE passType) {
+    //printf("layer %s got bprop, total rcvd %d/%d\n", _name.c_str(), getRcvdBInputs(_deviceID) + _rcvdBInputMsgs, _numGradProducersNext);
+    if (getRcvdBInputs(_deviceID) + _rcvdBInputMsgs == _numGradProducersNext) {
+        // Sum the contributions to my activity gradients from each GPU
+        // and store the result in this matrix called v.
+#ifdef USE_PD
+        NVMatrix* v = &getActsGrad();
+        v->transpose(_trans);
+        bool doCopy = getRcvdBInputs(_deviceID) == 0;
+        seti interested;
+        interested.insert(_nextDeviceIDs.begin(), _nextDeviceIDs.end());
+        interested.erase(_deviceID);
+        while (interested.size() > 0) {
+            int d = getPipeDispenser().getPipe(interested);
+            //cout << _name << " got pipe " << d << endl;
+            interested.erase(d);
+            NVMatrix& mat = getActsGrad(d); // This lives on the other device
+            mat.transpose(_trans);
+            if (mat.getNumElements() != 0 && getRcvdBInputs(d) > 0) {
+                if (!NVMatrix::canAccessDevice(_deviceID, d)) {
+                    // Copy the gradients produced by device d from his GPU to CPU memory
+                    // (since a direct GPU-GPU copy is impossible in this case)
+
+                    NVMatrix::setDeviceID(d);
+                    _bwdQuantizer->quantize(mat, _hostMemBwd);
+                    // I have verified that synchronization *is* necessary here.
+                    // Even without any explicit mention of streams, kernel calls
+                    // from the same host thread on different devices execute simultaneously.
+                    cudaDeviceSynchronize();
+                    getPipeDispenser().freePipe(d);
+                    NVMatrix::setDeviceID(_deviceID);
+//                    getPipeDispenser().getPipe(_deviceID);
+                    _bwdQuantizer->dequantize(*v, !doCopy, 1);
+//                    cudaDeviceSynchronize();
+//                    getPipeDispenser().freePipe(_deviceID);
+                } else { 
+//                    getPipeDispenser().getPipe(_deviceID);
+                    v->add(mat, !doCopy, 1);
+                    cudaDeviceSynchronize();
+//                    getPipeDispenser().freePipe(_deviceID);
+                    getPipeDispenser().freePipe(d);
+                }
+                doCopy = false;
+            } else {
+                getPipeDispenser().freePipe(d);
+            }
+        }
+#else
+        NVMatrix* v = NULL;
+        bool skipMine = false;
+        for (intv::iterator it = _nextDeviceIDs.begin(); it != _nextDeviceIDs.end(); ++it) {
+            int d = *it;
+            if (d == _deviceID && skipMine) {
+                continue;
+            }
+            NVMatrix& mat = getActsGrad(d); // This lives on the other device
+            mat.transpose(_trans);
+            if (mat.getNumElements() != 0 && getRcvdBInputs(d) > 0) {
+                bool doCopy = v == NULL && getRcvdBInputs(_deviceID) == 0;
+                if (v == NULL) {
+                    v = &getActsGrad();
+                    // We have handled _actsGrad[_deviceID] so we don't
+                    // have to consider it in the remainder of the loop.
+                    skipMine = true;
+                    v->transpose(_trans);
+                }
+                if (d != _deviceID) {
+                    if (!NVMatrix::canAccessDevice(_deviceID, d)) {
+                        // Copy the gradients produced by device d from his GPU to CPU memory
+                        // (since a direct GPU-GPU copy is impossible in this case)
+
+                        NVMatrix::setDeviceID(d);
+                        _bwdQuantizer->quantize(mat, _hostMemBwd);
+                        // I have verified that synchronization *is* necessary here.
+                        // Even without any explicit mention of streams, kernel calls
+                        // from the same host thread on different devices execute simultaneously.
+                        cudaDeviceSynchronize();
+                        NVMatrix::setDeviceID(_deviceID);
+
+                        _bwdQuantizer->dequantize(*v, !doCopy, 1);
+                    } else {
+                        v->add(mat, !doCopy, 1);
+                    }
+                }
+            }
+
+        }
+#endif
+        // Increment so we never hit this code again
+        incRcvdBInputs(_deviceID);
+        // Cost layers won't have any actual actGrads, so just pass some
+        // empty matrix rather than passing NULL (which would cause a segfault)
+        bprop(v == NULL ? getActsGrad() : *v, passType);
+        
+        if (_bwdTerminal) {
+            // I am a terminal node, so let the parent know that I'm done.
+            cudaDeviceSynchronize();
+            _convNetGPU->getConvNet().getMessageQueue().enqueue(new Message(BPROP_TERMINAL));
+        }
+    }
+}
+
+void Layer::bprop(NVMatrix& v, PASS_TYPE passType) {
+    v.transpose(_trans);
+    assert(_deviceID == NVMatrix::getDeviceID());
+    for (int i = 0; i < _prev.size(); i++) {
+        _inputs[i]->transpose(_trans);
+        _prev[i]->getActsGrad().transpose(_trans);
+    }
+    getActs().transpose(_trans);
+    bpropCommon(v, passType);
+    
+    if (isGradProducer()) {
+        // First propagate activity gradient to all layers whose activity
+        // gradient matrix I'm definitely not sharing.
+        for (int i = 0; i < _prev.size(); i++) {
+            if (_prev[i]->isGradConsumer() && isGradProducer(_prev[i]->getName()) && _actsGradTarget != i) {
+                bpropActs(v, i, _prev[i]->getRcvdBInputs(_deviceID) > 0 ? 1 : 0, passType);
+                _prev[i]->incRcvdBInputs(_deviceID);
+            }
+        }
+        // Then propagate activity gradient to the layer whose activity gradient
+        // matrix I'm sharing, if any.
+        if (_actsGradTarget >= 0 && _prev[_actsGradTarget]->isGradConsumer() && isGradProducer(_prev[_actsGradTarget]->getName())) {
+            bpropActs(v, _actsGradTarget, _prev[_actsGradTarget]->getRcvdBInputs(_deviceID) > 0 ? 1 : 0, passType);
+            _prev[_actsGradTarget]->incRcvdBInputs(_deviceID);
+        }
+    }
+    truncBwdActs();
+    
+    // This is necessary because the kernel calls that compute my backward acts
+    // execute asynchronously. Therefore I don't want to tell other threads that I've
+    // comptued bprop activities for them when in fact I've only called a function which
+    // will eventually compute them.
+    bool synced = false;
+
+    if (isGradProducer()) {
+        // First notify other threads that I have output for them.
+        // This is a separate loop from the one below because I don't
+        // want to do any more computation before telling the other threads
+        // that they can proceed.
+        for (int i = 0; i < _prev.size(); i++) {
+            if (_prev[i]->isGradConsumer() && isGradProducer(_prev[i]->getName()) && _prev[i]->getDeviceID() != _deviceID) {
+                if (!synced) {
+                    cudaDeviceSynchronize();
+                    synced = true;
+                }
+                _prev[i]->getConvNetGPU().enqueueMessage(new BpropMessage(_name, _prev[i]->getName(), passType));
+            }
+        }
+        
+        for (int i = 0; i < _prev.size(); i++) {
+            if (_prev[i]->isGradConsumer() && isGradProducer(_prev[i]->getName()) && _prev[i]->getDeviceID() == _deviceID) {
+                _prev[i]->bprop(passType);
+            }
+        }
+    } 
+}
+
+void Layer::reset() {
+    _rcvdFInputs = 0;
+    _rcvdBInputMsgs = 0;
+    for (map<int,int>::iterator it = _rcvdBInputs.begin(); it != _rcvdBInputs.end(); ++it) {
+        it->second = 0;
+    }
+}
+
+int Layer::getNumCases(NVMatrix& v) {
+    int numCases = _convNetGPU->getConvNet().getWeightUpdateFreq() == 1 ? (_trans ? v.getNumRows() : v.getNumCols()) 
+                 : _convNetGPU->getConvNet().getWeightUpdateFreq() * _convNetGPU->getConvNet().getMinibatchSize();
+    return numCases;
+}
+
+int Layer::incRcvdBInputMsgs() {
+    return ++_rcvdBInputMsgs;
+}
+
+string& Layer::getName() {
+    return _name;
+}
+
+string& Layer::getType() {
+    return _type;
+}
+
+int Layer::getRcvdFInputs() {
+    return _rcvdFInputs;
+}
+int Layer::getRcvdBInputs(int deviceID) {
+    return _rcvdBInputs[deviceID];
+}
+
+// TODO: make sure all this stuff is thread-safe
+// it seems like it shouldn't be a problem for multiple threads to 
+// simultaneously increment different elements of a map,
+// as long as no one also tried to insert/delete stuff.
+int Layer::incRcvdBInputs(int deviceID) {
+    return ++_rcvdBInputs[deviceID];
+}
+
+void Layer::addNext(Layer* l) {
+    _next.push_back(l);
+    // Insert into a random position in _nextDeviceIDs
+    // so that the backward message order is randomized
+    // and good in expectation.
+    if (count(_nextDeviceIDs.begin(), _nextDeviceIDs.end(), l->getDeviceID()) == 0) {
+        int pos = rand() % (_nextDeviceIDs.size() + 1);
+        _nextDeviceIDs.insert(_nextDeviceIDs.begin() + pos, l->getDeviceID());
+//        _nextDeviceIDs.push_back(l->getDeviceID());
+    }
+}
+
+void Layer::addPrev(Layer* l) {
+    _prev.push_back(l);
+}
+
+bool Layer::hasGradProducerNext(string& layerName) {
+    bool b = _next.size() == 0;
+    for (int i = 0; i < _next.size(); i++) {
+        b |= _next[i]->hasGradProducerNext(_name);
+    }
+    return b && isGradProducer(layerName);
+}
+
+void Layer::postInit() {
+//    _outputs = _actsTarget < 0 ? new NVMatrix() : &_prev[_actsTarget]->getActs();
+    _outputs[_deviceID] = _actsTarget < 0 ? new NVMatrix() : NULL;
+    _actsGrad[_deviceID] = _actsGradTarget < 0 ? new NVMatrix() : &_prev[_actsGradTarget]->getActsGrad(_deviceID);
+    for (int i = 0; i < _next.size(); ++i) {
+        _numGradProducersNext += _next[i]->hasGradProducerNext(_name);
+        int d = _next[i]->getDeviceID();
+        if (_actsGrad.count(d) == 0) {
+            _actsGrad[d] = new NVMatrix();
+            _rcvdBInputs[d] = 0;
+            _outputs[d] = new NVMatrix();
+        }
+    }
+}
+
+// Does this layer, or some layer below it, need the gradient
+// for parameter updates?
+// Only weight layers should be grad consumers themselves.
+bool Layer::isGradConsumer() {
+    if (!_foundGradConsumers) {
+        for (int i = 0; i < _prev.size(); i++) {
+            _gradConsumer |= _prev[i]->isGradConsumer();
+        }
+        _foundGradConsumers = true;
+    }
+    return _gradConsumer;
+}
+
+// Does this layer produce gradient for layers below?
+bool Layer::isGradProducer() {
+    return true;
+}
+
+bool Layer::isGradProducer(string& layerName) {
+    return isGradProducer();
+}
+
+vector<Layer*>& Layer::getPrev() {
+    return _prev;
+}
+
+vector<Layer*>& Layer::getNext() {
+    return _next;
+}
+
+NVMatrix& Layer::getActs() {
+    return getActs(getDeviceID());
+}
+
+NVMatrix& Layer::getActs(int deviceID) {
+    assert(_outputs.count(deviceID) > 0);
+    return *_outputs[deviceID];
+}
+
+NVMatrix& Layer::getActsGrad(int deviceID) {
+    assert(_actsGrad.count(deviceID) > 0);
+    return *_actsGrad[deviceID];
+}
+
+NVMatrix& Layer::getActsGrad() {
+    return getActsGrad(NVMatrix::getDeviceID());
+}
+
+int Layer::getDeviceID() {
+    return _deviceID;
+}
+
+ConvNetGPU& Layer::getConvNetGPU() {
+    assert(_convNetGPU != NULL);
+    return *_convNetGPU;
+}
+
+ConvNet& Layer::getConvNet() {
+    return getConvNetGPU().getConvNet();
+}
+
+PipeDispenser& Layer::getPipeDispenser() {
+    return getConvNet().getPipeDispenser();
+}
+
+void Layer::setBwdTerminal(bool t) {
+    _bwdTerminal = t;
+}
+
+/* 
+ * =======================
+ * NeuronLayer
+ * =======================
+ */
+NeuronLayer::NeuronLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) 
+    : Layer(convNetGPU, paramsDict, true) {
+    PyObject* neuronDict = PyDict_GetItemString(paramsDict, "neuron");
+    _neuronType = pyDictGetString(neuronDict, "type");
+    _neuron = &Neuron::makeNeuron(neuronDict);
+}
+
+void NeuronLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    // Special optimization for cross-entropy objective with logistic units.
+    // Better to just compute the input gradient in one go to avoid division by small numbers.
+    bool doCrossEntGrad = _neuronType == "logistic" && _next.size() == 1 && _next[0]->getType() == "cost.crossent2" && _next[0]->getDeviceID() == _deviceID;
+    if (doCrossEntGrad) {
+        NVMatrix& labels = _next[0]->getPrev()[0]->getActs(_deviceID);
+        float gradCoeff = dynamic_cast<CostLayer*>(_next[0])->getCoeff();
+        labels.transpose(_trans);
+        if (scaleTargets == 0) {
+            getActs().add(labels, -gradCoeff, gradCoeff, _prev[0]->getActsGrad());
+        } else {
+            getActs().applyTernary(AddGradientBinaryOperator<NVMatrixBinaryOps::WeightedAdd>(NVMatrixBinaryOps::WeightedAdd(-gradCoeff, gradCoeff)), labels, _prev[0]->getActsGrad(), _prev[0]->getActsGrad());
+        }
+    } else {
+        _neuron->computeInputGrad(v, _prev[0]->getActsGrad(), scaleTargets > 0);
+    }
+}
+
+void NeuronLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    _neuron->activate(*_inputs[0], getActs());
+}
+
+string& NeuronLayer::getNeuronType() {
+    return _neuronType;
+}
+
+/* 
+ * =======================
+ * WeightLayer
+ * =======================
+ * 
+ * The  useGrad parameter here merely expresses a preference by the subclass. It may
+ * be overridden by the superclass (WeightLayer) and in that case the subclass must follow its wishes.
+ * So when computing gradient updates, the subclass must always first check weights.isUseGrad().
+ * 
+ * Note: biases always useGrad.
+ */
+WeightLayer::WeightLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans, bool useGrad, bool initWeights) : 
+    Layer(convNetGPU, paramsDict, trans) {
+    if (initWeights) {
+        MatrixV& hWeights = *pyDictGetMatrixV(paramsDict, "weights");
+        MatrixV& hWeightsInc = *pyDictGetMatrixV(paramsDict, "weightsInc");
+        Matrix& hBiases = *pyDictGetMatrix(paramsDict, "biases");
+        Matrix& hBiasesInc = *pyDictGetMatrix(paramsDict, "biasesInc");
+        PyObject* pySchedW = PyDict_GetItemString(paramsDict, "schedW");
+        PyObject* pySchedB = PyDict_GetItemString(paramsDict, "schedB");
+        floatv& momW = *pyDictGetFloatV(paramsDict, "momW");
+        float momB = pyDictGetFloat(paramsDict, "momB");
+        floatv& epsW = *pyDictGetFloatV(paramsDict, "epsW");
+        float epsB = pyDictGetFloat(paramsDict, "epsB");
+        floatv& wc = *pyDictGetFloatV(paramsDict, "wc");
+        floatv& wball = *pyDictGetFloatV(paramsDict, "wballNormed");
+        float superEps = pyDictGetFloat(paramsDict, "superEps");
+        float superMom = pyDictGetFloat(paramsDict, "superMom");
+        useGrad |= superEps > 0; // if using super weight updates, must use gradient matrix
+
+        // Source layers for shared weights
+        stringv& weightSourceLayers = *pyDictGetStringV(paramsDict, "weightSourceLayers");
+        // Weight matrix indices (inside the above source layers) for shared weights
+        intv& weightSourceMatrixIndices = *pyDictGetIntV(paramsDict, "weightSourceMatrixIndices");
+
+        for (int i = 0; i < weightSourceLayers.size(); i++) {
+            string& srcLayerName = weightSourceLayers[i];
+            int matrixIdx = weightSourceMatrixIndices[i];
+            LearningRateSchedule& lrs = LearningRateSchedule::make(pySchedW, epsW[i]);
+            if (srcLayerName == _name) { // Current layer
+                _weights.addWeights(*new Weights(_weights[matrixIdx], lrs));
+            } else if (srcLayerName != "") {
+                WeightLayer& srcLayer = *static_cast<WeightLayer*>(&convNetGPU->getLayer(srcLayerName));
+                Weights* srcWeights = &srcLayer.getWeights(matrixIdx);
+                _weights.addWeights(*new Weights(*srcWeights, lrs));
+            } else {
+                _weights.addWeights(*new Weights(*hWeights[i], *hWeightsInc[i], lrs, wc[i], wball[i], momW[i], superEps, useGrad));
+            }
+        }
+        _biases = new Weights(hBiases, hBiasesInc,  LearningRateSchedule::make(pySchedB, epsB), 0, 0, momB, superEps, true);
+
+        delete &weightSourceLayers;
+        delete &weightSourceMatrixIndices;
+        delete &hWeights;
+        delete &hWeightsInc;
+        delete &momW;
+        delete &epsW;
+        delete &wc;
+        delete &wball;
+    }
+    _wStep = 0.02;
+    _bStep = 0.05;
+    _gradComputed = false;
+}
+
+void WeightLayer::bpropCommon(NVMatrix& v, PASS_TYPE passType) {
+    if (_biases->getLearningRateSchedule().getBaseRate() > 0) {
+        bpropBiases(v, passType);
+        _biases->incNumUpdates();
+    }
+    for (int i = 0; i < _weights.getSize(); i++) {
+        if (_weights[i].getLearningRateSchedule().getBaseRate() > 0) {
+            bpropWeights(v, i, passType);
+            // Increment its number of updates
+            _weights[i].incNumUpdates();
+//            printf("layer %s[%d] computing weight grad\n", _name.c_str(), i);
+        }
+    }
+    _gradComputed = true;
+}
+
+bool WeightLayer::updateWeights() {
+    if (_gradComputed && _convNetGPU->getConvNet().getNumBwdMiniPasses() == 0) {
+        _weights.update(_convNetGPU->getConvNet().getTrainingProgress());
+        _biases->update(_convNetGPU->getConvNet().getTrainingProgress());
+        constrainWeights();
+        _gradComputed = false;
+        return true;
+    }
+    return false;
+}
+
+void WeightLayer::copyToCPU() {
+    _weights.copyToCPU();
+    _biases->copyToCPU();
+}
+
+void WeightLayer::copyToGPU() {
+    _weights.copyToGPU();
+    _biases->copyToGPU();
+}
+
+void WeightLayer::checkGradients() {
+    for (int i = 0; i < _weights.getSize(); i++) {
+        _convNetGPU->getConvNet().checkGradient(_name + " weights[" + tostr(i) + "]", _wStep, _weights[i]);
+    }
+    _convNetGPU->getConvNet().checkGradient(_name + " biases", _bStep, *_biases);
+}
+
+Weights& WeightLayer::getWeights(int idx) {
+    return _weights[idx];
+}
+
+/* 
+ * =======================
+ * FCLayer
+ * =======================
+ */
+FCLayer::FCLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool useGrad, bool initWeights) 
+    : WeightLayer(convNetGPU, paramsDict, true, useGrad, initWeights) {
+    _wStep = 0.01;
+    _bStep = 0.01;
+}
+
+void FCLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    getActs().addProduct(*_inputs[inpIdx], *_weights[inpIdx], scaleTargets, 1);
+    if (scaleTargets == 0) {
+        getActs().addVector(_biases->getW());
+    }
+}
+
+void FCLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    NVMatrix& weights_T = _weights[inpIdx].getW().getTranspose();
+    _prev[inpIdx]->getActsGrad().addProduct(v, weights_T, scaleTargets, 1);
+    delete &weights_T;
+}
+
+void FCLayer::bpropBiases(NVMatrix& v, PASS_TYPE passType) {
+    float scaleBGrad = passType == PASS_GC ? 1.0f : 1.0f / getNumCases(v);
+    float scaleInc = _biases->getNumUpdates() > 0;
+    _biases->getGrad().addSum(v, 0, scaleInc, scaleBGrad);
+}
+
+void FCLayer::bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) {
+    int numCases = getNumCases(v);
+    NVMatrix& prevActs_T = _inputs[inpIdx]->getTranspose();
+    float progress = _convNetGPU->getConvNet().getTrainingProgress();
+    if (_weights[inpIdx].isUseGrad()) {
+        float scaleGrad = passType == PASS_GC ? 1 : 1.0f / numCases;
+        float scaleInc = (_weights[inpIdx].getNumUpdates() > 0);
+        _weights[inpIdx].getGrad().addProduct(prevActs_T, v, scaleInc, scaleGrad);
+    } else {
+        float scaleGrad = passType == PASS_GC ? 1 : _weights[inpIdx].getEps(progress) / numCases;
+        float scaleInc =  (passType == PASS_GC ? _weights[inpIdx].getNumUpdates() > 0 
+                                               : (_weights[inpIdx].getNumUpdates() == 0 ? _weights[inpIdx].getMom() : 1.0f));
+        _weights[inpIdx].getInc().addProduct(prevActs_T, v, scaleInc, scaleGrad);
+    }
+    
+    delete &prevActs_T;
+}
+
+void FCLayer::constrainWeights() {
+    for (int i = 0; i < _weights.getSize(); i++) {
+        if (_weights[i].getWBall() > 0 && _weights[i].isOwner() && _weights[i].getLearningRateSchedule().getBaseRate() > 0) {
+            NVMatrix norm, sqw; // Unfortunate extra weight matrix...
+            _weights[i].getW().apply(NVMatrixOps::Square(), sqw);
+            sqw.sum(0, norm);
+            norm.apply(WeightConstraintOperator(_weights[i].getWBall()));
+            _weights[i].getW().eltwiseMultByVector(norm);
+        }
+    }
+}
+
+/* 
+ * =======================
+ * TreeFCLayer
+ * =======================
+ */
+TreeFCLayer::TreeFCLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) 
+    : FCLayer(convNetGPU, paramsDict, true, false) {
+    int rootLabel = pyDictGetInt(paramsDict, "rootLabel");
+
+    SoftmaxTree* tree = new SoftmaxTree(rootLabel);
+    
+    PyObject* pyTree = PyDict_GetItemString(paramsDict, "tree");
+    makeTree(pyTree, tree->getRoot());
+    
+    MatrixV& hWeights = *pyDictGetMatrixV(paramsDict, "weights");
+    MatrixV& hWeightsInc = *pyDictGetMatrixV(paramsDict, "weightsInc");
+    Matrix& hBiases = *pyDictGetMatrix(paramsDict, "biases");
+    Matrix& hBiasesInc = *pyDictGetMatrix(paramsDict, "biasesInc");
+    
+    floatv& momW = *pyDictGetFloatV(paramsDict, "momW");
+    float momB = pyDictGetFloat(paramsDict, "momB");
+    floatv& epsW = *pyDictGetFloatV(paramsDict, "epsW");
+    float epsB = pyDictGetFloat(paramsDict, "epsB");
+    floatv& wc = *pyDictGetFloatV(paramsDict, "wc");
+    
+    // This class does not support learning rate schedules for now.
+    _treeWeights = new TreeWeights(*tree, *hWeights[0], *hWeightsInc[0], *new LearningRateSchedule(epsW[0]), wc[0], momW[0]);
+    _biases = new Weights(hBiases, hBiasesInc, *new LearningRateSchedule(epsB), 0, 0, momB, false, true);
+    _weights.addWeights(*_treeWeights);
+    
+    _wStep = 0.001;
+    
+    delete &hWeights;
+    delete &hWeightsInc;
+    delete &momW;
+    delete &epsW;
+    delete &wc;
+}
+
+void TreeFCLayer::makeTree(PyObject* pyTree, SoftmaxNode& rootNode) {
+    PyObject* pyChildren = PyList_GetItem(pyTree, rootNode.getLabel());
+    int numChildren = PyList_GET_SIZE(pyChildren);
+    for (int c = 0; c < numChildren; ++c) {
+        int childLabel = PyLong_AsLong(PyList_GetItem(pyChildren, c));
+        SoftmaxNode& childNode = rootNode.addChild(childLabel);
+        makeTree(pyTree, childNode);
+    }
+}
+
+void TreeFCLayer::fpropActs(int inpIdx, float scaleTargets, uint passType) {
+    if (passType == PASS_GC) {
+        _treeWeights->makeWeights();
+    }
+    FCLayer::fpropActs(inpIdx, scaleTargets, passType);
+}
+
+void TreeFCLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, uint passType) {
+    FCLayer::bpropActs(v, inpIdx, scaleTargets, passType);
+    if (passType == PASS_GC) {
+        _treeWeights->distributeGradients();
+    }
+}
+
+void TreeFCLayer::constrainWeights() {
+}
+
+void TreeFCLayer::checkGradients() {
+    DummyWeights dum = DummyWeights(_treeWeights->getCPUW(), _treeWeights->getCPUWInc(),
+                                    _treeWeights->getAllW(), _treeWeights->getAllInc(),
+                                    _treeWeights->getAllGrad());
+    _convNetGPU->getConvNet().checkGradient(_name + " weights", _wStep, dum);
+    _convNetGPU->getConvNet().checkGradient(_name + " biases", _bStep, *_biases);
+}
+
+/* 
+ * =======================
+ * LocalLayer
+ * =======================
+ */
+LocalLayer::LocalLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool useGrad) 
+    : WeightLayer(convNetGPU, paramsDict, false, useGrad, true) {
+    _padding = pyDictGetIntV(paramsDict, "padding");
+    _stride = pyDictGetIntV(paramsDict, "stride");
+    _filterSize = pyDictGetIntV(paramsDict, "filterSize");
+    _channels = pyDictGetIntV(paramsDict, "channels");
+    _imgSize = pyDictGetIntV(paramsDict, "imgSize");
+    _numFilters = pyDictGetInt(paramsDict, "filters");
+    _groups = pyDictGetIntV(paramsDict, "groups");
+    _filterChannels = pyDictGetIntV(paramsDict, "filterChannels");
+    _randSparse = pyDictGetIntV(paramsDict, "randSparse");
+    _overSample = pyDictGetIntV(paramsDict, "overSample");
+    _filterPixels = pyDictGetIntV(paramsDict, "filterPixels");
+    _imgPixels = pyDictGetIntV(paramsDict, "imgPixels");
+    
+    _modulesX = pyDictGetInt(paramsDict, "modulesX");
+    _modules = pyDictGetInt(paramsDict, "modules");
+
+    // It's a vector on the heap to be consistent with all the others...
+    _filterConns = new vector<FilterConns>();
+    PyObject* pyFilterConns = PyDict_GetItemString(paramsDict, "filterConns");
+    for (int i = 0; i < _randSparse->size(); i++) {
+        FilterConns fc;
+        if (_randSparse->at(i)) {
+            fc.hFilterConns = getIntA(PyList_GET_ITEM(pyFilterConns, i));
+        }
+        _filterConns->push_back(fc);
+    }
+}
+
+void LocalLayer::copyToGPU() {
+    WeightLayer::copyToGPU();
+    for  (int i = 0; i < _prev.size(); i++) {
+        if (_randSparse->at(i)) { // Copy to GPU vector that describes sparse random connectivity
+            cudaMalloc(&_filterConns->at(i).dFilterConns, sizeof(int) * _groups->at(i) * _filterChannels->at(i));
+            cudaMemcpy(_filterConns->at(i).dFilterConns, _filterConns->at(i).hFilterConns,
+                       sizeof(int) * _groups->at(i) * _filterChannels->at(i), cudaMemcpyHostToDevice);
+            getLastCudaError("cudaMemcpy: failed");
+        }
+    }
+}
+
+/* 
+ * =======================
+ * ConvLayer
+ * =======================
+ */
+ConvLayer::ConvLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) 
+    : LocalLayer(convNetGPU, paramsDict, true) {
+    _partialSum = pyDictGetInt(paramsDict, "partialSum");
+    _sharedBiases = pyDictGetInt(paramsDict, "sharedBiases");
+    _weightContrastNormMin = pyDictGetFloatV(paramsDict, "wcNormMin");
+    _weightContrastNormMax = pyDictGetFloatV(paramsDict, "wcNormMax");
+}
+
+void ConvLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (_randSparse->at(inpIdx)) {
+        convFilterActsSparse(*_inputs[inpIdx], *_weights[inpIdx], getActs(), _filterConns->at(inpIdx).dFilterConns,
+                             _imgSize->at(inpIdx), _modulesX, _modulesX, _padding->at(inpIdx), _stride->at(inpIdx), _channels->at(inpIdx),
+                             _filterChannels->at(inpIdx), _groups->at(inpIdx), scaleTargets, 1);
+    } else {
+        convFilterActs(*_inputs[inpIdx], *_weights[inpIdx], getActs(), _imgSize->at(inpIdx), _modulesX, _modulesX, _padding->at(inpIdx),
+                       _stride->at(inpIdx), _channels->at(inpIdx), _groups->at(inpIdx), scaleTargets, 1);
+    }
+
+    if (scaleTargets == 0) {
+        if (_sharedBiases) {
+            getActs().reshape(_numFilters, getActs().getNumElements() / _numFilters);
+            getActs().addVector(_biases->getW());
+            getActs().reshape(_numFilters * _modules, getActs().getNumElements() / (_numFilters * _modules));
+        } else {
+            getActs().addVector(_biases->getW());
+        }
+    }
+}
+
+void ConvLayer::bpropBiases(NVMatrix& v, PASS_TYPE passType) {
+    int numCases = getNumCases(v);
+    float scaleBGrad = passType == PASS_GC ? 1.0f : 1.0f / numCases;
+    float scaleInc = _biases->getNumUpdates() > 0;
+    if (_sharedBiases) {
+        v.reshape(_numFilters, v.getNumElements() / _numFilters);
+        _biases->getGrad().addSum(v, 1, scaleInc, scaleBGrad);
+        v.reshape(_numFilters * _modules, v.getNumElements() / (_numFilters * _modules));
+    } else {
+        _biases->getGrad().addSum(v, 1, scaleInc, scaleBGrad);
+    }
+}
+
+void ConvLayer::bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) {
+    int numCases = getNumCases(v);
+
+    NVMatrix& tgt = _partialSum > 0 ? _weightGradTmp : _weights[inpIdx].getGrad();
+    float scaleWGrad = passType == PASS_GC ? 1.0f : 1.0f / numCases;
+    float scaleTargets = _weights[inpIdx].getNumUpdates() > 0 && _partialSum == 0; // ? 1 : 0;
+    
+    if (_randSparse->at(inpIdx)) {
+        convWeightActsSparse(*_inputs[inpIdx], v, tgt, _filterConns->at(inpIdx).dFilterConns, _imgSize->at(inpIdx), _modulesX, _modulesX,
+                             _filterSize->at(inpIdx), _padding->at(inpIdx), _stride->at(inpIdx), _channels->at(inpIdx),
+                             _filterChannels->at(inpIdx), _groups->at(inpIdx), _partialSum, scaleTargets, scaleWGrad);
+    } else {
+        convWeightActs(*_inputs[inpIdx], v, tgt, _imgSize->at(inpIdx), _modulesX, _modulesX, _filterSize->at(inpIdx), _padding->at(inpIdx),
+                       _stride->at(inpIdx), _channels->at(inpIdx), _groups->at(inpIdx), _partialSum, scaleTargets, scaleWGrad);
+    }
+    if (_partialSum > 0) {
+        scaleTargets = _weights[inpIdx].getNumUpdates() > 0;
+        //cout << _name << " scale inc: " << scaleTargets << " scale grad " << scaleWGrad << endl;
+        _weightGradTmp.reshape(_modules / _partialSum, _filterChannels->at(inpIdx) * _filterPixels->at(inpIdx) * _numFilters);
+        _weights[inpIdx].getGrad().addSum(_weightGradTmp, 0, scaleTargets, 1);
+        _weights[inpIdx].getGrad().reshape(_filterChannels->at(inpIdx) * _filterPixels->at(inpIdx), _numFilters);
+    }
+}
+
+void ConvLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (_randSparse->at(inpIdx)) {
+        NVMatrix& tgt = _overSample->at(inpIdx) > 1 ? _actGradTmp : _prev[inpIdx]->getActsGrad();
+
+        convImgActsSparse(v, *_weights[inpIdx], tgt, _filterConns->at(inpIdx).dFilterConns,
+                          _imgSize->at(inpIdx), _imgSize->at(inpIdx), _modulesX, _padding->at(inpIdx), _stride->at(inpIdx),
+                          _channels->at(inpIdx), _filterChannels->at(inpIdx), _groups->at(inpIdx), scaleTargets, 1);
+        if (_overSample->at(inpIdx) > 1) {
+            _actGradTmp.reshape(_overSample->at(inpIdx), _actGradTmp.getNumElements() / _overSample->at(inpIdx));
+            _actGradTmp.sum(0, _prev[inpIdx]->getActsGrad());
+            _prev[inpIdx]->getActsGrad().reshape(_prev[inpIdx]->getActsGrad().getNumElements() / v.getNumCols(), v.getNumCols());
+        }
+    } else {
+        convImgActs(v, *_weights[inpIdx], _prev[inpIdx]->getActsGrad(), _imgSize->at(inpIdx), _imgSize->at(inpIdx), _modulesX,
+                    _padding->at(inpIdx), _stride->at(inpIdx), _channels->at(inpIdx), _groups->at(inpIdx), scaleTargets, 1);
+    }
+}
+
+void ConvLayer::truncBwdActs() {
+    LocalLayer::truncBwdActs();
+    if (_conserveMem) {
+        _weightGradTmp.truncate();
+        _actGradTmp.truncate();
+    }
+}
+
+void ConvLayer::constrainWeights() {
+    for (int i = 0; i < _weights.getSize(); i++) {
+        if (_weightContrastNormMax->at(i) > 0 && _weights[i].isOwner() && _weights[i].getLearningRateSchedule().getBaseRate() > 0) {
+            float fz = _weights[i].getW().getNumRows();
+            NVMatrix tmp;
+            _weights[i].getW().sum(0, tmp); 
+            _weights[i].getW().addVector(tmp, -1.0f / fz, _weights[i].getGrad());
+            // Now _weights[i].getGrad() contains zero-mean filters
+            _weights[i].getGrad().apply(NVMatrixOps::Square());
+            _weights[i].getGrad().sum(0, tmp);
+//            tmp.apply(NVMatrixOps::Sqrt());
+//            tmp.scale(1.0f / fz);
+            
+//            tmp.scale(1.0f / (fz * _weightContrastNorm->at(i)));
+            tmp.apply(WeightContrastNormOperator(_weightContrastNormMin->at(i), _weightContrastNormMax->at(i), 1.0f / fz));
+            // Now tmp has the stdev
+            _weights[i].getW().eltwiseMultByVector(tmp);
+        }
+        // It's pretty silly to do both these things but whatever
+        if (_weights[i].getWBall() > 0 && _weights[i].isOwner() && _weights[i].getLearningRateSchedule().getBaseRate() > 0) {
+            NVMatrix norm;
+            _weights[i].getW().apply(NVMatrixOps::Square(), _weights[i].getGrad());
+            _weights[i].getGrad().sum(0, norm);
+
+            norm.apply(WeightConstraintOperator(_weights[i].getWBall()));
+            _weights[i].getW().eltwiseMultByVector(norm);
+        }
+    }
+}
+
+/* 
+ * =======================
+ * LocalUnsharedLayer
+ * =======================
+ */
+LocalUnsharedLayer::LocalUnsharedLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) 
+    : LocalLayer(convNetGPU, paramsDict, false) {
+}
+
+void LocalUnsharedLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (_randSparse->at(inpIdx)) {
+        localFilterActsSparse(*_inputs[inpIdx], *_weights[inpIdx], getActs(), _filterConns->at(inpIdx).dFilterConns,
+                              _imgSize->at(inpIdx), _modulesX, _modulesX, _padding->at(inpIdx), _stride->at(inpIdx), _channels->at(inpIdx),
+                              _filterChannels->at(inpIdx), _groups->at(inpIdx), scaleTargets, 1);
+    } else {
+        localFilterActs(*_inputs[inpIdx], *_weights[inpIdx], getActs(), _imgSize->at(inpIdx), _modulesX, _modulesX, _padding->at(inpIdx),
+                        _stride->at(inpIdx), _channels->at(inpIdx), _groups->at(inpIdx), scaleTargets, 1);
+
+    }  
+    if (scaleTargets == 0) {
+        getActs().addVector(_biases->getW());
+    }
+}
+
+void LocalUnsharedLayer::bpropBiases(NVMatrix& v, PASS_TYPE passType) {
+    int numCases = getNumCases(v);
+    float scaleBGrad = passType == PASS_GC ? 1.0f : 1.0f / numCases;
+    float scaleInc = _biases->getNumUpdates() > 0;
+    _biases->getGrad().addSum(v, 1, scaleInc, scaleBGrad);
+}
+
+void LocalUnsharedLayer::bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) {
+    int numCases = getNumCases(v);
+    float progress = _convNetGPU->getConvNet().getTrainingProgress();
+    NVMatrix& tgt = _weights[inpIdx].isUseGrad() ? _weights[inpIdx].getGrad() : _weights[inpIdx].getInc();
+    float scaleInc, scaleWGrad;
+    if (_weights[inpIdx].isUseGrad()) {
+        scaleInc = _weights[inpIdx].getNumUpdates() > 0;
+        scaleWGrad = passType == PASS_GC ? 1.0f : 1.0f / numCases; // eps / numCases
+    } else {
+        scaleInc =  (passType == PASS_GC ? _weights[inpIdx].getNumUpdates() > 0 
+                                         : (_weights[inpIdx].getNumUpdates() == 0 ? _weights[inpIdx].getMom() : 1.0f));
+        scaleWGrad = passType == PASS_GC ? 1.0f : _weights[inpIdx].getEps(progress) / numCases; // eps / numCases
+    }
+    
+    if (_randSparse->at(inpIdx)) {
+        localWeightActsSparse(*_inputs[inpIdx], v, tgt, _filterConns->at(inpIdx).dFilterConns,
+                              _imgSize->at(inpIdx), _modulesX, _modulesX, _filterSize->at(inpIdx), _padding->at(inpIdx), _stride->at(inpIdx),
+                              _channels->at(inpIdx), _filterChannels->at(inpIdx), _groups->at(inpIdx), scaleInc, scaleWGrad);
+    } else {
+        localWeightActs(*_inputs[inpIdx], v, tgt, _imgSize->at(inpIdx), _modulesX, _modulesX, _filterSize->at(inpIdx),
+                        _padding->at(inpIdx), _stride->at(inpIdx), _channels->at(inpIdx), _groups->at(inpIdx), scaleInc, scaleWGrad);
+    }
+}
+
+void LocalUnsharedLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (_randSparse->at(inpIdx)) {
+        localImgActsSparse(v, *_weights[inpIdx], _prev[inpIdx]->getActsGrad(), _filterConns->at(inpIdx).dFilterConns,
+                           _imgSize->at(inpIdx), _imgSize->at(inpIdx), _modulesX, _padding->at(inpIdx), _stride->at(inpIdx), _channels->at(inpIdx),
+                           _filterChannels->at(inpIdx), _groups->at(inpIdx), scaleTargets, 1);
+    } else {
+        localImgActs(v, *_weights[inpIdx], _prev[inpIdx]->getActsGrad(),_imgSize->at(inpIdx), _imgSize->at(inpIdx), _modulesX,
+                    _padding->at(inpIdx),  _stride->at(inpIdx), _channels->at(inpIdx), _groups->at(inpIdx), scaleTargets, 1);
+    }
+}
+
+void LocalUnsharedLayer::constrainWeights() {
+    for (int i = 0; i < _weights.getSize(); i++) {
+        if (_weights[i].getWBall() > 0  && _weights[i].isOwner() && _weights[i].getLearningRateSchedule().getBaseRate() > 0) {
+            normalizeLocalWeights(*_weights[i], _modules, _weights[i].getWBall());
+        }
+    }
+}
+
+/* 
+ * =======================
+ * SoftmaxLayer
+ * =======================
+ */
+SoftmaxLayer::SoftmaxLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) 
+    : Layer(convNetGPU, paramsDict, true), _doLogregGrad(true) {
+}
+
+void SoftmaxLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    NVMatrix& input = *_inputs[0];
+    NVMatrix& max = input.max(1);
+    input.addVector(max, -1, getActs());
+    getActs().apply(NVMatrixOps::Exp());
+    NVMatrix& sum = getActs().sum(1);
+    getActs().eltwiseDivideByVector(sum);
+    
+    delete &max;
+    delete &sum;
+}
+
+void SoftmaxLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    assert(inpIdx == 0);
+    bool doLogregGrad = _doLogregGrad && (_next.size() == 1 && _next[0]->getType() == "cost.logreg" && _next[0]->getDeviceID() == _deviceID);
+    if (doLogregGrad) {
+        NVMatrix& labels = _next[0]->getPrev()[0]->getActs(_deviceID);
+        float gradCoeff = dynamic_cast<CostLayer*>(_next[0])->getCoeff();
+        computeLogregSoftmaxGrad(labels, getActs(), _prev[0]->getActsGrad(), scaleTargets == 1, gradCoeff);
+    } else {
+        computeSoftmaxGrad(getActs(), v, _prev[0]->getActsGrad(), scaleTargets == 1);
+    }
+}
+
+void SoftmaxLayer::setDoLogregGrad(bool b) {
+    _doLogregGrad = b;
+}
+
+/* 
+ * =======================
+ * ConcatenationLayer
+ * =======================
+ */
+ConcatenationLayer::ConcatenationLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict)
+    : Layer(convNetGPU, paramsDict, false) {
+    _copyOffsets = pyDictGetIntV(paramsDict, "copyOffsets");
+    _copyOffsets->push_back(_numOutputs);
+}
+
+void ConcatenationLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    getActs().resize(_numOutputs, _inputs[inpIdx]->getNumCols());
+    _inputs[inpIdx]->copy(getActs(), 0, -1, 0, -1, _copyOffsets->at(inpIdx), 0);
+}
+
+void ConcatenationLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    NVMatrix& copySrc = v.sliceRows(_copyOffsets->at(inpIdx), _copyOffsets->at(inpIdx + 1)); // view
+    _prev[inpIdx]->getActsGrad().add(copySrc, scaleTargets, 1);
+    delete &copySrc;
+}
+
+/* 
+ * =======================
+ * EltwiseSumLayer
+ * =======================
+ */
+EltwiseSumLayer::EltwiseSumLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _coeffs = pyDictGetFloatV(paramsDict, "coeffs");
+}
+
+void EltwiseSumLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (scaleTargets == 0) {
+        _inputs[inpIdx]->scale(_coeffs->at(inpIdx), getActs());
+    } else {
+        getActs().add(*_inputs[inpIdx], _coeffs->at(inpIdx));
+    }
+}
+
+void EltwiseSumLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (scaleTargets == 0 ) {
+        v.scale(_coeffs->at(inpIdx), _prev[inpIdx]->getActsGrad());
+    } else {
+        assert(&_prev[inpIdx]->getActsGrad() != &v);
+        _prev[inpIdx]->getActsGrad().add(v, scaleTargets, _coeffs->at(inpIdx));
+    }
+}
+
+/* 
+ * =======================
+ * EltwiseMaxLayer
+ * =======================
+ */
+EltwiseMaxLayer::EltwiseMaxLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+}
+
+void EltwiseMaxLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (inpIdx == 1) { // First input, do nothing
+        _inputs[inpIdx]->applyBinary(NVMatrixAggs::Max(), *_inputs[0], getActs());
+    } else if (inpIdx > 1) {
+        getActs().applyBinary(NVMatrixAggs::Max(), *_inputs[inpIdx]);
+    }
+}
+
+void EltwiseMaxLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    computeEltwiseMaxGrad(v, *_inputs[inpIdx], getActs(), _prev[inpIdx]->getActsGrad(), scaleTargets != 0);
+}
+
+/* 
+ * =======================
+ * HiddenSexLayer
+ * =======================
+ */
+HiddenSexLayer::HiddenSexLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _enable = pyDictGetInt(paramsDict, "enable");
+    _keep = pyDictGetFloat(paramsDict, "keep");
+}
+
+void HiddenSexLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (_enable && passType == PASS_TRAIN) {
+        _sexMask.resize(*_inputs[inpIdx]);
+        _sexMask.randomizeUniform();
+        _sexMask.smallerThanScalar(_keep);
+        _inputs[inpIdx]->eltwiseMult(_sexMask, getActs());
+    } else {
+        _inputs[inpIdx]->scale(_keep, getActs());
+    }
+}
+
+void HiddenSexLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (_enable && passType == PASS_TRAIN) {
+        if (scaleTargets != 0) {
+            v.applyTernary(AddGradientBinaryOperator<NVMatrixBinaryOps::Multiply>(NVMatrixBinaryOps::Multiply()),
+                           _sexMask, _prev[inpIdx]->getActsGrad(), _prev[inpIdx]->getActsGrad());
+        } else {
+            v.eltwiseMult(_sexMask, _prev[inpIdx]->getActsGrad());
+        }
+    } else {
+        if (scaleTargets != 0) {
+             v.applyBinary(AddGradientOperator<NVMatrixOps::MultByScalar>(NVMatrixOps::MultByScalar(_keep)),
+                           _prev[inpIdx]->getActsGrad(), _prev[inpIdx]->getActsGrad());
+        } else {
+            v.scale(_keep, _prev[inpIdx]->getActsGrad());
+        }
+    }
+}
+
+void HiddenSexLayer::truncBwdActs() {
+    Layer::truncBwdActs();
+    if (_conserveMem) {
+        _sexMask.truncate();
+    }
+}
+
+/* 
+ * =======================
+ * DataLayer
+ * =======================
+ */
+DataLayer::DataLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _dataIdx = pyDictGetInt(paramsDict, "dataIdx");
+    _useBuffer = false;
+    _bufferMinibatchIdx = -1;
+    _bufferData = NULL;
+}
+
+void DataLayer::fprop(PASS_TYPE passType) {
+    throw string("No dava given!");
+}
+
+void DataLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+}
+
+void DataLayer::fprop(NVMatrixV& data, PASS_TYPE passType) {
+    throw string("Data layer requires CPU data!");
+}
+
+void DataLayer::startFprop(CPUData& data, PASS_TYPE passType) {
+    copyData(data, false);
+    fpropNext(passType);
+}
+
+void DataLayer::startFpropFromBuffer(PASS_TYPE passType) {
+    _useBuffer = !_useBuffer;
+    fpropNext(passType);
+}
+
+void DataLayer::fpropNext(PASS_TYPE passType) {
+    for (int i = 0; i < _next.size(); i++) {
+        // Inform this guy that my output is ready for him
+        _next[i]->getConvNetGPU().enqueueMessage(new FpropMessage(_name, _next[i]->getName(), passType));
+    }
+}
+
+void DataLayer::setBuffer(CPUData& data, int minibatchIdx) {
+    _bufferData = &data;
+    copyData(data, true);
+    
+    _bufferMinibatchIdx = minibatchIdx;
+}
+
+void DataLayer::copyData(CPUData& data, bool other) {
+    Matrix& dataMatrix = data.getData(_dataIdx);
+    int oldDeviceID = NVMatrix::getDeviceID();
+    //StopWatchInterface *timer = NULL;
+    //sdkCreateTimer(&timer);
+    //sdkStartTimer(&timer);
+    if (dataMatrix.isTrans()) {
+        assert(dataMatrix.isView());
+        _hostMemFwd.copyFromHost(dataMatrix, true);
+    }
+    for (intv::iterator it = _nextDeviceIDs.begin(); it != _nextDeviceIDs.end(); ++it) {
+        int deviceID = *it;
+        // Copy my output to this guy's GPU
+        NVMatrix::setDeviceID(deviceID);
+        if (dataMatrix.isTrans()) {
+            _hostMemFwd.flipTrans(getActs(deviceID, other));
+        } else {
+            getActs(deviceID, other).copyFromHost(data.getData(_dataIdx), true);
+        }
+    }
+    for (intv::iterator it = _nextDeviceIDs.begin(); it != _nextDeviceIDs.end(); ++it) {
+        NVMatrix::setDeviceID(*it);
+        cudaDeviceSynchronize();
+    }
+    NVMatrix::setDeviceID(oldDeviceID);
+    //sdkStopTimer(&timer);
+    //printf("data copy took %f\n", sdkGetTimerValue(&timer));
+}
+
+CPUData* DataLayer::getBufferData() {
+    return _bufferData;
+}
+
+int DataLayer::getBufferMinibatchIdx() {
+    return _bufferMinibatchIdx;
+}
+
+NVMatrix& DataLayer::getActs(int deviceID) {
+    return getActs(deviceID, false);
+}
+
+NVMatrix& DataLayer::getActs(int deviceID, bool other) {
+    return *(_useBuffer != other ? _outputs2[deviceID] : _outputs[deviceID]);
+}
+
+void DataLayer::postInit() {
+    Layer::postInit();
+    for (int i = 0; i < _next.size(); ++i) {
+        int d = _next[i]->getDeviceID();
+        if (_outputs2.count(d) == 0) {
+            _outputs2[d] = new NVMatrix();
+        }
+    }
+}
+
+bool DataLayer::isGradProducer() {
+    return false;
+}
+
+/* 
+ * =====================
+ * PoolLayer
+ * =====================
+ */
+PoolLayer::PoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans) 
+    : Layer(convNetGPU, paramsDict, trans) {
+    _channels = pyDictGetInt(paramsDict, "channels");
+    _sizeX = pyDictGetInt(paramsDict, "sizeX");
+    _start = pyDictGetInt(paramsDict, "start");
+    _stride = pyDictGetInt(paramsDict, "stride");
+    _outputsX = pyDictGetInt(paramsDict, "outputsX");
+    _imgSize = pyDictGetInt(paramsDict, "imgSize");
+    _pool = pyDictGetString(paramsDict, "pool");
+}
+
+PoolLayer& PoolLayer::makePoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) {
+    string _pool = pyDictGetString(paramsDict, "pool");
+    if (_pool == "max") {
+        return *new MaxPoolLayer(convNetGPU, paramsDict, false);
+    } else if(_pool == "maxabs") {
+        return *new MaxPoolLayer(convNetGPU, paramsDict, true);
+    } else if(_pool == "avg") {
+        return *new AvgPoolLayer(convNetGPU, paramsDict);
+    } else if(_pool == "rand") {
+        return *new RandomPoolLayer(convNetGPU, paramsDict);
+    }
+    throw string("Unknown pooling layer type ") + _pool;
+}
+
+/* 
+ * =====================
+ * AvgPoolLayer
+ * =====================
+ */
+AvgPoolLayer::AvgPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : PoolLayer(convNetGPU, paramsDict, false) {
+}
+
+void AvgPoolLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convLocalPool(*_inputs[0], getActs(), _channels, _sizeX, _start, _stride, _outputsX, AvgPooler());
+}
+
+void AvgPoolLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convLocalAvgUndo(v, _prev[0]->getActsGrad(), _sizeX, _start, _stride, _outputsX, _imgSize, scaleTargets, 1);
+}
+
+/* 
+ * =====================
+ * MaxPoolLayer
+ * =====================
+ */
+MaxPoolLayer::MaxPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool abs) : PoolLayer(convNetGPU, paramsDict, false), _abs(abs) {
+}
+
+void MaxPoolLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (_abs) {
+//        _inputs[0]->print(10,10);printf(" \n");
+        convLocalPool(*_inputs[0], getActs(), _channels, _sizeX, _start, _stride, _outputsX, MaxAbsPooler());
+    } else {
+        convLocalPool(*_inputs[0], getActs(), _channels, _sizeX, _start, _stride, _outputsX, MaxPooler());
+    }
+
+}
+
+void MaxPoolLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convLocalMaxUndo(*_inputs[0], v, getActs(), _prev[inpIdx]->getActsGrad(), _sizeX, _start, _stride, _outputsX, scaleTargets, 1);
+}
+
+/* 
+ * =====================
+ * RandomPoolLayer
+ * =====================
+ */
+RandomPoolLayer::RandomPoolLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : PoolLayer(convNetGPU, paramsDict, false){
+    _doMax = pyDictGetInt(paramsDict, "doMax");
+    printf("domax: %d\n", _doMax);
+}
+
+void RandomPoolLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (_doMax) {
+        convLocalPool(*_inputs[0], getActs(), _channels, _sizeX, _start, _stride, _outputsX, MaxPooler());
+    } else {
+        convLocalRandomPool(*_inputs[0], getActs(), _channels, _sizeX, _start, _stride, _outputsX);
+    }
+
+}
+
+void RandomPoolLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convLocalMaxUndo(*_inputs[0], v, getActs(), _prev[inpIdx]->getActsGrad(), _sizeX, _start, _stride, _outputsX, scaleTargets, 1);
+}
+
+/*
+ * =====================
+ * RandomScaleLayer
+ * =====================
+ */
+RandomScaleLayer::RandomScaleLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _channels = pyDictGetInt(paramsDict, "channels");
+    _maxScale = pyDictGetFloat(paramsDict, "maxScale");
+    _imgSize = pyDictGetInt(paramsDict, "imgSize");
+    _tgtSize = pyDictGetInt(paramsDict, "tgtSize");
+    // The smallest size the image could be after rescaling
+    _minScaledSize = _imgSize / _maxScale; 
+    
+    // The number of discrete scales we're considering
+    int numScales = _imgSize - _minScaledSize + 1;
+    
+    // The total number of squares of size _tgtSize that we can extract
+    // from all these scales
+    double numCrops = numScales * (numScales + 1) * (2 * numScales + 1) / 6;
+    
+    // For each scale, record the fraction of the squares that it has.
+    // This will be the probability of sampling this scale.
+    _scaleProbs.push_back(1.0 / numCrops);
+    for (int s = 1; s < numScales; ++s) {
+        _scaleProbs.push_back(_scaleProbs[s-1] + (s + 1) * (s + 1) / numCrops);
+//        cout << _scaleProbs.back() << endl;
+    }
+}
+ 
+void RandomScaleLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (passType == PASS_TRAIN) {
+        // _maxScale is in the range [1, 2) 
+        float r = randf;
+        int rescaledSize = _tgtSize;
+        float scaleFactor = _maxScale;
+        // Find which scale we have sampled
+        for (int s = 0; s < _scaleProbs.size(); ++s) {
+            if (r <= _scaleProbs[s]) {
+                rescaledSize += s;
+                float scaleFactorEnd = _imgSize / float(rescaledSize);
+                float scaleFactorStart = max(1.0, _imgSize / (1.0 + rescaledSize));
+//                printf("scaleFactorStart: %f, scaleFactorEnd: %f\n", scaleFactorStart, scaleFactorEnd);
+                scaleFactor = scaleFactorStart + randf * (scaleFactorEnd - scaleFactorStart);
+                break;
+            }
+        }
+//        printf("Rescaled size: %d (r = %f), scale factor: %f\n", rescaledSize, r, scaleFactor);
+        assert(rescaledSize >= _tgtSize);
+        int maxStart = rescaledSize - _tgtSize;
+        int startY = rand() % (1 + maxStart), startX = rand() % (1 + maxStart);
+//        int startY = 0, startX = 0;
+//        printf("starty: %d, startx: %d\n", startY, startX);
+        if (rescaledSize  == _imgSize) {
+//            printf("not resizing\n");
+            convCrop(*_inputs[0], getActs(), rescaledSize, _tgtSize, startY, startX);
+        } else {
+            convResizeBilinear(*_inputs[0], _rescaledActs, _imgSize, rescaledSize, scaleFactor);
+//            _rescaledActs.print(10,10);exit(0);
+            convCrop(_rescaledActs, getActs(), rescaledSize, _tgtSize, startY, startX);
+        }
+        _rescaledActs.truncate(); // this'll have a different size each time so may as well truncate it.
+    } else if (passType & PASS_MULTIVIEW_TEST) { // for now... 
+        // definitely redo this later so that multiview cropping is handled in c
+        _inputs[0]->copy(getActs());
+    } else if (passType & PASS_TEST) { // Test on center patch
+        int cropStart = (_imgSize - _tgtSize) / 2;
+        convCrop(*_inputs[0], getActs(), _imgSize, _tgtSize, cropStart, cropStart);
+//        convResizeBilinear(*_inputs[0], getActs(), _imgSize, _tgtSize, _maxScale);
+    }
+}
+
+void RandomScaleLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    assert(false);
+}
+
+/* 
+ * =====================
+ * NailbedLayer
+ * =====================
+ */
+NailbedLayer::NailbedLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _channels = pyDictGetInt(paramsDict, "channels");
+    _start = pyDictGetInt(paramsDict, "start");
+    _stride = pyDictGetInt(paramsDict, "stride");
+    _outputsX = pyDictGetInt(paramsDict, "outputsX");
+    _imgSize = pyDictGetInt(paramsDict, "imgSize");
+}
+
+void NailbedLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convBedOfNails(*_inputs[0], getActs(), _channels, _imgSize, _start, _stride, 0, 1);
+}
+
+void NailbedLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convBedOfNailsUndo(v, _prev[0]->getActsGrad(), _channels, _imgSize, _start, _stride, scaleTargets, 1);
+}
+
+/* 
+ * =====================
+ * GaussianBlurLayer
+ * =====================
+ */
+GaussianBlurLayer::GaussianBlurLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _channels = pyDictGetInt(paramsDict, "channels");
+    _hFilter = pyDictGetMatrix(paramsDict, "filter");
+}
+
+void GaussianBlurLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convGaussianBlur(*_inputs[0], _filter, getActs(), true, _channels, 0, 1);
+    convGaussianBlur(getActs(), _filter, getActs(), false, _channels, 0, 1);
+}
+
+void GaussianBlurLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    NVMatrix& tgt1 = _prev[0]->getRcvdBInputs(_deviceID) > 0 ? _actGradsTmp : _prev[0]->getActsGrad();
+    convGaussianBlur(v, _filter, tgt1, true, _channels, 0, 1);
+    convGaussianBlur(tgt1, _filter, _prev[0]->getActsGrad(), false, _channels, scaleTargets, 1);
+}
+
+void GaussianBlurLayer::copyToGPU() {
+    _filter.copyFromHost(*_hFilter, true);
+}
+
+ /* 
+ * =====================
+ * HorizontalReflectionLayer
+ * =====================
+ */
+HorizontalReflectionLayer::HorizontalReflectionLayer(ConvNetGPU* convNet, PyObject* paramsDict) : Layer(convNet, paramsDict, false) {
+    _channels = pyDictGetInt(paramsDict, "channels");
+    _imgSize = pyDictGetInt(paramsDict, "imgSize");
+    assert(_channels >= 1 && _channels <= 3);
+}
+
+void HorizontalReflectionLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convReflectHorizontal(*_inputs[0], getActs(), _imgSize);
+}
+
+void HorizontalReflectionLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convReflectHorizontal(v, _prev[0]->getActsGrad(), _imgSize);
+}
+
+/* 
+ * =====================
+ * ResizeLayer
+ * =====================
+ */
+ResizeLayer::ResizeLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _channels = pyDictGetInt(paramsDict, "channels");
+    _imgSize = pyDictGetInt(paramsDict, "imgSize");
+    _tgtSize = pyDictGetInt(paramsDict, "tgtSize");
+    _scale = pyDictGetFloat(paramsDict, "scale");
+}
+
+void ResizeLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convResizeBilinear(*_inputs[0], getActs(), _imgSize, _tgtSize, _scale);
+}
+
+// Can't do this
+void ResizeLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    assert(false);
+}
+
+/* 
+ * =====================
+ * RGBToYUVLayer
+ * =====================
+ */
+RGBToYUVLayer::RGBToYUVLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+}
+
+void RGBToYUVLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convRGBToYUV(*_inputs[0], getActs());
+}
+
+// Can't do this
+void RGBToYUVLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    assert(false);
+}
+
+/* 
+ * =====================
+ * RGBToLABLayer
+ * =====================
+ */
+RGBToLABLayer::RGBToLABLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _center = pyDictGetInt(paramsDict, "center");
+}
+
+void RGBToLABLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convRGBToLAB(*_inputs[0], getActs(), _center);
+}
+
+// Can't do this
+void RGBToLABLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    assert(false);
+}
+
+/* 
+ * =====================
+ * ResponseNormLayer
+ * =====================
+ */
+ResponseNormLayer::ResponseNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : Layer(convNetGPU, paramsDict, false) {
+    _channels = pyDictGetInt(paramsDict, "channels");
+    _size = pyDictGetInt(paramsDict, "size");
+
+    _scale = pyDictGetFloat(paramsDict, "scale");
+    _pow = pyDictGetFloat(paramsDict, "pow");
+}
+
+void ResponseNormLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convResponseNorm(*_inputs[0], _denoms, getActs(), _channels, _size, _scale, _pow);
+}
+
+void ResponseNormLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convResponseNormUndo(v, _denoms, *_inputs[0], getActs(), _prev[0]->getActsGrad(), _channels, _size, _scale, _pow, scaleTargets, 1);
+}
+
+void ResponseNormLayer::truncBwdActs() {
+    Layer::truncBwdActs();
+    if (_conserveMem) {
+        _denoms.truncate();
+    }
+}
+
+CrossMapResponseNormLayer::CrossMapResponseNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : ResponseNormLayer(convNetGPU, paramsDict) {
+    _blocked = pyDictGetInt(paramsDict, "blocked");
+    _minDiv = pyDictGetFloat(paramsDict, "minDiv");
+}
+
+void CrossMapResponseNormLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convResponseNormCrossMap(*_inputs[0], _denoms, getActs(), _channels, _size, _scale, _pow, _minDiv, _blocked);
+}
+
+void CrossMapResponseNormLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convResponseNormCrossMapUndo(v, _denoms, *_inputs[0], getActs(), _prev[0]->getActsGrad(), _channels, _size, _scale, _pow, _blocked, scaleTargets, 1);
+}
+
+/* 
+ * =====================
+ * ContrastNormLayer
+ * =====================
+ */
+ContrastNormLayer::ContrastNormLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : ResponseNormLayer(convNetGPU, paramsDict) {
+    _imgSize = pyDictGetInt(paramsDict, "imgSize");
+}
+
+void ContrastNormLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    NVMatrix& images = *_inputs[0];
+    convLocalPool(images, _meanDiffs, _channels, _size, -_size/2, 1, _imgSize, AvgPooler());
+    //_meanDiffs.print(10,10);exit(0);
+    _meanDiffs.add(images, -1, 1);
+    convContrastNorm(images, _meanDiffs, _denoms, getActs(), _channels, _size, _scale, _pow);
+    //images.print(5,5);
+}
+
+void ContrastNormLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convContrastNormUndo(v, _denoms, _meanDiffs, getActs(), _prev[inpIdx]->getActsGrad(), _channels, _size, _scale, _pow, scaleTargets, 1);
+}
+
+void ContrastNormLayer::truncBwdActs() {
+    ResponseNormLayer::truncBwdActs();
+    if (_conserveMem) {
+        _meanDiffs.truncate();
+    }
+}
+
+/* 
+ * =====================
+ * CostLayer
+ * =====================
+ */
+CostLayer::CostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict, bool trans) 
+    : Layer(convNetGPU, paramsDict, trans) {
+    _coeff = pyDictGetFloat(paramsDict, "coeff");
+}
+
+float CostLayer::getCoeff() {
+    return _coeff;
+}
+
+void CostLayer::bprop(NVMatrix& v, PASS_TYPE passType) {
+    if (_coeff != 0) {
+        Layer::bprop(v, passType);
+    }
+}
+
+void CostLayer::fprop(PASS_TYPE passType) {
+    Layer::fprop(passType);
+    if (_rcvdFInputs == _prev.size()) {
+        cudaDeviceSynchronize();
+        _convNetGPU->getConvNet().getMessageQueue().enqueue(new Message(FPROP_TERMINAL));
+    }
+}
+
+bool CostLayer::isGradProducer() {
+    return _coeff != 0;
+}
+
+doublev& CostLayer::getCost() {
+    doublev& v = *new doublev();
+    v.insert(v.begin(), _costv.begin(), _costv.end());
+    return v;
+}
+
+CostLayer& CostLayer::makeCostLayer(ConvNetGPU* convNetGPU, string& type, PyObject* paramsDict) {
+    if (type == "cost.crossent") {
+        return *new CrossEntCostLayer(convNetGPU, paramsDict);
+    } else if (type == "cost.crossent2") {
+        return *new CrossEnt2CostLayer(convNetGPU, paramsDict);
+    } else if (type == "cost.logreg") {
+        return *new LogregCostLayer(convNetGPU, paramsDict);
+    } else if (type == "cost.sum2") {
+        return *new SumOfSquaresCostLayer(convNetGPU, paramsDict);
+    } else if (type == "cost.gsum2") {
+        return *new GatedSumOfSquaresCostLayer(convNetGPU, paramsDict);
+    } else if (type == "cost.tica") {
+        return *new TICACostLayer(convNetGPU, paramsDict);
+    } else if (type == "cost.msm") {
+        return *new MultiSoftmaxCostLayer(convNetGPU, paramsDict);
+    } else if (type == "cost.rflickr") {
+        return *new RobustFlickrCost(convNetGPU, paramsDict);
+    }
+    throw string("Unknown cost layer type ") + type;
+}
+
+/* 
+ * =====================
+ * CrossEntCostLayer
+ * =====================
+ */
+CrossEntCostLayer::CrossEntCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : CostLayer(convNetGPU, paramsDict, false) {
+}
+
+void CrossEntCostLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    // This layer uses its two inputs together
+    if (inpIdx == 0) {
+        NVMatrix& labels = *_inputs[0];
+        NVMatrix& probs = *_inputs[1];
+        int numCases = labels.getLeadingDim();
+        NVMatrix& trueLabelLogProbs = getActs(), correctProbs;
+        computeCrossEntCost(labels, probs, trueLabelLogProbs, correctProbs);
+        _costv.clear();
+        _costv.push_back(-trueLabelLogProbs.sum());
+        _costv.push_back(numCases - correctProbs.sum());
+    }
+}
+
+void CrossEntCostLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    assert(inpIdx == 1);
+    NVMatrix& labels = *_inputs[0];
+    NVMatrix& probs = *_inputs[1];
+    NVMatrix& target = _prev[1]->getActsGrad();
+    // Numerical stability optimization: if the layer below me is a softmax layer, let it handle
+    // the entire gradient computation to avoid multiplying and dividing by a near-zero quantity.
+    bool doWork = _prev[1]->getNext().size() > 1 || _prev[1]->getType() != "softmax";
+    if (doWork) {
+        computeCrossEntGrad(labels, probs, target, scaleTargets == 1, _coeff);
+    }
+}
+
+/* 
+ * =====================
+ * CrossEnt2CostLayer
+ * =====================
+ */
+CrossEnt2CostLayer::CrossEnt2CostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : CostLayer(convNetGPU, paramsDict, false) {
+}
+
+void CrossEnt2CostLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    // This layer uses its two inputs together
+    if (inpIdx == 0) {
+        NVMatrix& labels = *_inputs[0];
+        NVMatrix& probs = *_inputs[1];
+        int numCases = labels.getLeadingDim();
+        labels.applyBinary(CrossEntOperator(), probs, getActs());
+        _costv.clear();
+        _costv.push_back(-getActs().sum());// / labels.getFollowingDim());
+//        printf("-getActs().sum(): %f\n", -getActs().sum());
+    }
+}
+
+void CrossEnt2CostLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    assert(inpIdx == 1);
+    NVMatrix& labels = *_inputs[0];
+    NVMatrix& probs = *_inputs[1];
+    NVMatrix& target = _prev[1]->getActsGrad();
+    // Numerical stability optimization: if the layer below me is a softmax layer, let it handle
+    // the entire gradient computation to avoid multiplying and dividing by a near-zero quantity.
+//    printf("_prev[1]->getType():%s\n", _prev[1]->getType().c_str());
+    bool doWork =   _prev[1]->getNext().size() > 1 
+                    || _prev[1]->getType() != "neuron" 
+                    || static_cast<NeuronLayer*>(_prev[1])->getNeuronType() != "logistic" 
+                    ||  _prev[1]->getDeviceID() != _deviceID;
+    if (doWork) {
+        printf("Computing cross-ent gradient the stupid way\n");
+        if (scaleTargets == 0) {
+            labels.applyBinary(CrossEntGradientOperator(_coeff), probs, target);
+        } else {
+            labels.applyTernary(AddGradientBinaryOperator<CrossEntGradientOperator>(CrossEntGradientOperator(_coeff)), probs, target, target);
+        }
+    }
+}
+
+/* 
+ * =====================
+ * RobustFlickrCost
+ * =====================
+ */
+RobustFlickrCost::RobustFlickrCost(ConvNetGPU* convNetGPU, PyObject* paramsDict) : CostLayer(convNetGPU, paramsDict, false) {
+}
+
+void RobustFlickrCost::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    // This layer uses its two inputs together
+    if (inpIdx == 0) {
+        NVMatrix& labels = *_inputs[0];
+        NVMatrix& probs = *_inputs[1];
+        int numCases = labels.getLeadingDim();
+        labels.applyBinary(RobustFlickrCostOperator(), probs, getActs());
+        _costv.clear();
+        _costv.push_back(getActs().sum());// / labels.getFollowingDim());
+//        printf("-getActs().sum(): %f\n", -getActs().sum());
+    }
+}
+
+void RobustFlickrCost::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    assert(inpIdx == 1);
+    NVMatrix& labels = *_inputs[0];
+    NVMatrix& probs = *_inputs[1];
+    NVMatrix& target = _prev[1]->getActsGrad();
+    if (scaleTargets == 0) {
+        labels.applyBinary(RobustFlickrCostGradientOperator(_coeff), probs, target);
+    } else {
+        labels.applyTernary(AddGradientBinaryOperator<RobustFlickrCostGradientOperator>(RobustFlickrCostGradientOperator(_coeff)), probs, target, target);
+    }
+}
+
+/* 
+ * =====================
+ * LogregCostLayer
+ * =====================
+ */
+LogregCostLayer::LogregCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : CostLayer(convNetGPU, paramsDict, false) {
+    _topk = pyDictGetInt(paramsDict, "topk");
+}
+
+void LogregCostLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    // This layer uses its two inputs together
+    if (inpIdx == 0) {
+        NVMatrix& labels = *_inputs[0];
+        NVMatrix* probs = _inputs[1];
+        bool doCompute = !IS_MULTIVIEW_TEST(passType);
+        if (!doCompute) {
+            if (IS_MULTIVIEW_TEST_START(passType)) {
+                probs->copy(_probsAccum);
+                _numAccumed = 1;
+            } else {
+                _probsAccum.add(*probs);
+                _numAccumed += 1;
+            }
+            if (IS_MULTIVIEW_TEST_END(passType)) {
+                probs = &_probsAccum;
+                probs->scale(1.0 / _numAccumed);
+                doCompute = true;
+            }
+        }
+        if (doCompute) {
+            int numCases = labels.getNumElements();
+            NVMatrix& trueLabelLogProbs = getActs();
+            if (_topk == 1) {
+                computeLogregCost(labels, *probs, trueLabelLogProbs, _correctProbs);
+            } else {
+                computeMultiSoftmaxCost(labels, *probs, *probs, trueLabelLogProbs, _correctProbs, _topkProbs, _topk, false);
+            }
+            _costv.clear();
+            double top1 = _correctProbs.sum();
+            _costv.push_back(-trueLabelLogProbs.sum());
+            _costv.push_back(numCases - top1);
+            _costv.push_back(numCases - (_topk == 1 ? top1 : _topkProbs.sum()));
+        }
+    }
+}
+
+NVMatrix& LogregCostLayer::getProbsAccum() {
+    return _probsAccum;
+}
+
+void LogregCostLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+//    assert(inpIdx == 1);
+    if (inpIdx == 1) {
+        NVMatrix& labels = *_inputs[0];
+        NVMatrix& probs = *_inputs[1];
+        NVMatrix& target = _prev[1]->getActsGrad();
+        // Numerical stability optimization: if the layer below me is a softmax layer, let it handle
+        // the entire gradient computation to avoid multiplying and dividing by a near-zero quantity.
+        bool doWork = _prev[1]->getNext().size() > 1 || _prev[1]->getType() != "softmax" || _prev[1]->getDeviceID() != _deviceID;
+        if (doWork) {
+            computeLogregGrad(labels, probs, target, scaleTargets == 1, _coeff);
+        }
+    }
+}
+
+/* 
+ * =====================
+ * MultiSoftmaxCostLayer
+ * =====================
+ */
+MultiSoftmaxCostLayer::MultiSoftmaxCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : CostLayer(convNetGPU, paramsDict, false) {
+    _setSize = pyDictGetInt(paramsDict, "setSize");
+    _numOut = pyDictGetInt(paramsDict, "numOut");
+    _threads = pyDictGetInt(paramsDict, "threads");
+    
+    for (int i = 0; i < _threads; i++) {
+        B.push_back(new Matrix(_numOut + 1, _setSize + 1));
+        B[i]->apply(Matrix::ONE);
+        B[i]->scale(-INF);
+    }
+}
+
+void MultiSoftmaxCostLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    // This layer uses its two inputs together
+    if (inpIdx == 0) {
+        NVMatrix& labels = *_inputs[0];
+        NVMatrix& energies = *_inputs[1];
+        labels.copyToHost(_cpuLabels, true);
+        Matrix& cpuLabels_T = _cpuLabels.transpose();
+
+        NVMatrix energies_T;
+        energies.transpose(energies_T);
+        NVMatrix& max = energies_T.max(1);
+        energies_T.addVector(max, -1);
+        energies_T.copyToHost(_energies_T_CPU, true);
+
+        MultiSoftmaxCPU_T_parallel(_energies_T_CPU, B, _cpuProbs, cpuLabels_T, _setSize, true);
+        _probsT.copyFromHost(_cpuProbs, true);
+        _probsT.transpose(getActs());
+        
+        computeCost(true);
+        
+        delete &max;
+        delete &cpuLabels_T;
+    }
+}
+
+void MultiSoftmaxCostLayer::computeCost(bool useEnergies) {
+    NVMatrix& labels = *_inputs[0];
+    NVMatrix& energies = *_inputs[1];
+    int numCases = labels.getNumElements();
+    NVMatrix trueLabelLogProbs, correctProbs, top5Probs;
+    computeMultiSoftmaxCost(labels, getActs(), energies, trueLabelLogProbs, correctProbs, top5Probs, _setSize, useEnergies);
+    _costv.clear();
+    _costv.push_back(-trueLabelLogProbs.sum());
+    _costv.push_back(numCases - correctProbs.sum());
+    _costv.push_back(numCases - top5Probs.sum());
+}
+
+void MultiSoftmaxCostLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+//    assert(inpIdx == 1);
+    if (inpIdx == 1) {
+        NVMatrix& labels = *_inputs[0];
+        
+        labels.copyToHost(_cpuLabels, true);
+        Matrix& cpuLabels_T = _cpuLabels.transpose();
+
+        MultiSoftmaxCPU_T_parallel(_energies_T_CPU, B, _cpuProbs, cpuLabels_T, _setSize, false);
+
+        // _cpuProbs now contains gradient
+        _probsT.copyFromHost(_cpuProbs);
+        _probsT.scale(_coeff);
+        if (scaleTargets == 1) {
+            _prev[1]->getActsGrad().add(_probsT);
+        } else {
+            _probsT.transpose(_prev[1]->getActsGrad());
+            
+        }
+        delete &cpuLabels_T;
+    }
+}
+
+/* 
+ * =====================
+ * SumOfSquaresCostLayer
+ * =====================
+ */
+SumOfSquaresCostLayer::SumOfSquaresCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : CostLayer(convNetGPU, paramsDict, false) {
+}
+
+void SumOfSquaresCostLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    _inputs[0]->apply(NVMatrixOps::Square(), getActs());
+    _costv.clear();
+    _costv.push_back(getActs().sum());
+}
+
+void SumOfSquaresCostLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    _prev[inpIdx]->getActsGrad().add(*_inputs[0], scaleTargets, -2 * _coeff);
+}
+
+/* 
+ * =====================
+ * GatedSumOfSquaresCostLayer
+ * =====================
+ */
+GatedSumOfSquaresCostLayer::GatedSumOfSquaresCostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : CostLayer(convNetGPU, paramsDict, false) {
+}
+
+void GatedSumOfSquaresCostLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (inpIdx == 0) {
+        _inputs[1]->apply(NVMatrixOps::Square(), _ungated);
+        _ungated.eltwiseMultByVector(*_inputs[0], getActs());
+        _costv.clear();
+        _costv.push_back(getActs().sum());
+    }
+}
+
+void GatedSumOfSquaresCostLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    if (inpIdx == 0) { // derivative w.r.t. gates
+        _prev[inpIdx]->getActsGrad().addSum(_ungated, 0, scaleTargets, -_coeff);
+    } else {
+        _inputs[inpIdx]->eltwiseMultByVector(*_inputs[0], _ungated);
+        _prev[inpIdx]->getActsGrad().add(_ungated, scaleTargets, -2 * _coeff);
+    }   
+}
+
+/* 
+ * =====================
+ * TICACostLayer
+ * =====================
+ */
+TICACostLayer::TICACostLayer(ConvNetGPU* convNetGPU, PyObject* paramsDict) : CostLayer(convNetGPU, paramsDict, false) {
+    _channels = pyDictGetInt(paramsDict, "channels");
+    _sizeX = pyDictGetInt(paramsDict, "sizeX");
+}
+
+// This one doesn't report any error measure.
+void TICACostLayer::fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    // TODO: make it report something when doing a grad check so it doesn't fail.
+    // Otherwise it's pretty useless and consumes extra memory to report error numbers.
+    convTICA(*_inputs[0], getActs(), _channels, _sizeX, scaleTargets, 1);
+    _costv.clear();
+    _costv.push_back(getActs().sum()); // TODO: this is wrong, because it contains reciprocals
+}
+
+void TICACostLayer::bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
+    convTICAGrad(*_inputs[0], getActs(), _prev[inpIdx]->getActsGrad(), _channels, _sizeX, scaleTargets, _coeff);
+}
diff --git a/src/layer_kernels.cu b/src/layer_kernels.cu
new file mode 100644
index 0000000..984a372
--- /dev/null
+++ b/src/layer_kernels.cu
@@ -0,0 +1,720 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <vector>
+#include <layer_kernels.cuh>
+
+
+//#define LOG(x) ((x) > 0.0 ? log(x) : -1000.0)
+
+// Computes log(exp(x) + exp(y))
+//#define LOGADD(x, y) ()
+
+using namespace std;
+
+
+/*
+ * E = -log(y_t)
+ * probs:           (numOut, numCases)
+ * energies:        (numOut, numCases)
+ * labels:          (1, numCases)
+ * maxEnergies:     (1, numCases)
+ * labelLogProbs:   (1, numCases)   (*out)
+ * correctProbs:    (1, numCases)   (*out)
+ * top5Probs:       (1, numCases)   (*out)
+ * 
+ * target:          (1, numCases)
+ * 
+ * This routine uses energeis to determine top-1 score because they're more accurate than top-n 
+ * probabilities, which have numerical errors in them.
+ */
+__global__ void kMultiSoftmaxCost_engs(float* probs, float* energies, float* labels, float* maxEnergies,
+                                  float* labelLogProbs, float* correctProbs, float* top5Probs,
+                                  const int numCases, const int numOut, const int setSize) {
+    const int tx = blockIdx.x * LOGREG_ERR_THREADS_X + threadIdx.x;
+
+    if (tx < numCases) {
+        const int label = int(labels[tx]);
+        const float maxe = maxEnergies[tx];
+        const float labelp = probs[label * numCases + tx];
+        const float labele = energies[label * numCases + tx];
+        
+        labelLogProbs[tx] = __logf(labelp);
+        
+        int numBiggerEnergies = 0, numEqualsEnergies = 0;
+        for (int i = 0; i < numOut; ++i) {
+            numBiggerEnergies += energies[i * numCases + tx] > labele;
+            numEqualsEnergies += energies[i * numCases + tx] == labele;
+        }
+
+        const int slotsLeft = setSize - numBiggerEnergies;
+        
+        top5Probs[tx] = slotsLeft <= 0 ? 0 : (numEqualsEnergies <= slotsLeft ? 1 : float(slotsLeft) / numEqualsEnergies);
+//        if (numEqualsEnergies != 1) {
+//            printf("numEqualsEnergies: %d, labelp: %e, maxp: %e\n", numEqualsEnergies, labelp, maxe);
+//        }
+        correctProbs[tx] = labele != maxe ? 0.0f : 1.0f / float(numEqualsEnergies);
+    }
+}
+
+/*
+ * E = -log(y_t)
+ * probs:           (numOut, numCases)
+ * labels:          (1, numCases)
+ * maxEnergies:     (1, numCases)
+ * labelLogProbs:   (1, numCases)   (*out)
+ * correctProbs:    (1, numCases)   (*out)
+ * top5Probs:       (1, numCases)   (*out)
+ * 
+ * target:          (1, numCases)
+ * 
+ */
+__global__ void kMultiSoftmaxCost(float* probs, float* labels, float* maxProbs,
+                                  float* labelLogProbs, float* correctProbs, float* top5Probs,
+                                  const int numCases, const int numOut, const int setSize) {
+    const int tx = blockIdx.x * LOGREG_ERR_THREADS_X + threadIdx.x;
+
+    if (tx < numCases) {
+        const int label = int(labels[tx]);
+        const float maxp = maxProbs[tx];
+        const float labelp = probs[label * numCases + tx];
+        
+        labelLogProbs[tx] = __logf(labelp);
+        
+        int numBiggerProbs = 0, numEqualsProbs = 0;
+        for (int i = 0; i < numOut; ++i) {
+            numBiggerProbs += probs[i * numCases + tx] > labelp;
+            numEqualsProbs += probs[i * numCases + tx] == labelp;
+        }
+
+        const int slotsLeft = setSize - numBiggerProbs;
+        
+        top5Probs[tx] = slotsLeft <= 0.0f ? 0.0f : (numEqualsProbs <= slotsLeft ? 1.0f : float(slotsLeft) / numEqualsProbs);
+        correctProbs[tx] = labelp != maxp ? 0.0f : 1.0f / float(numEqualsProbs);
+    }
+}
+
+/*
+ * E = -log(y_t)
+ * probs:           (numOut, numCases)
+ * labels:          (1, numCases)
+ * maxProbs:        (1, numCases)
+ * labelLogProbs:   (1, numCases)   (*out)
+ * correctProbs:    (1, numCases)   (*out)
+ * top5Probs:       (1, numCases)   (*out)
+ * 
+ * target:          (1, numCases) == log(y_l[labels,:]
+ */
+void computeMultiSoftmaxCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& energies, NVMatrix& labelLogProbs_out,
+                       NVMatrix& correctProbs_out, NVMatrix& top5Probs_out, int setSize, bool useEnergies) {
+    int numCases = probs.getNumCols(); 
+    int numOut = probs.getNumRows(); 
+
+    assert(labels.getNumElements() == numCases);
+    assert(!labels.isTrans());
+    assert(!probs.isTrans());
+    assert(labels.isContiguous());
+    assert(probs.isContiguous());
+    assert(energies.isContiguous());
+    assert(energies.isSameDims(probs));
+    
+//    NVMatrix& maxProbs = probs.max(0);
+    NVMatrix& maxPE = useEnergies ? energies.max(0) : probs.max(0);
+    
+    labelLogProbs_out.resize(1, numCases);
+    correctProbs_out.resize(1, numCases);
+    top5Probs_out.resize(1, numCases);
+    dim3 threads(LOGREG_ERR_THREADS_X, 1);
+    dim3 blocks(DIVUP(numCases, LOGREG_ERR_THREADS_X), 1);
+    
+    if (useEnergies) {
+        cudaFuncSetCacheConfig(kMultiSoftmaxCost_engs, cudaFuncCachePreferL1);
+        kMultiSoftmaxCost_engs<<<blocks, threads>>>(probs.getDevData(), energies.getDevData(), labels.getDevData(), maxPE.getDevData(),
+                                        labelLogProbs_out.getDevData(), correctProbs_out.getDevData(), top5Probs_out.getDevData(),
+                                        numCases, numOut, setSize);
+    } else {
+        cudaFuncSetCacheConfig(kMultiSoftmaxCost, cudaFuncCachePreferL1);
+        kMultiSoftmaxCost<<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), maxPE.getDevData(),
+                                        labelLogProbs_out.getDevData(), correctProbs_out.getDevData(), top5Probs_out.getDevData(),
+                                        numCases, numOut, setSize);
+    }
+
+    getLastCudaError("computeLogregCost: Kernel execution failed");
+//    cudaThreadSynchronize();
+    delete &maxPE;
+}
+
+/*
+ * energies:    (numCases, numOut) (yes this is weird)
+ * bLattice:    (numOut + 1, setSize, numCases) (*out)
+ * 
+ * This is intended to work for cases when setSize <= 32.
+ * Block size (y, x) = (1, B_X)
+ * 
+ * NOTE: 
+ *  B_X must be a multiple of 32
+ */
+template <int B_X>
+__global__ void kMSMBackward(float* energies, float* bLattice, const int numCases, const int numOut, const int setSize) {
+    extern __shared__ float shmem[];
+    const int tidx = blockIdx.x * B_X + threadIdx.x;
+    const int casesPerWarp = 32 / setSize;
+    const int casesPerBlock = casesPerWarp * B_X / 32;
+    const int numWorkersPerWarp = casesPerWarp * setSize;
+    const int tidxInWarp = tidx % 32;
+    const int warpIdx = tidx / 32;
+    const int blockCaseIdx = blockIdx.x * casesPerBlock;
+    const int caseIdxInBlock = threadIdx.x / setSize;
+    const int caseIdx = warpIdx * casesPerWarp + tidxInWarp / setSize;
+    const bool doWork = tidxInWarp < numWorkersPerWarp && caseIdx < numCases;
+    
+    const int bIdx = threadIdx.x + threadIdx.x/setSize + 1;
+    volatile float* B = shmem;
+    volatile float* shE = &shmem[B_X + casesPerBlock]; // Dimensions (casesPerBlock, 32 + 1)
+
+    const int loadY = warpIdx;
+    const int loadX = tidxInWarp;
+    
+    energies += (blockCaseIdx + loadY) * numOut + loadX;
+    bLattice += tidx;
+    if (blockIdx.x != 0) {
+        return;
+    }
+    // The first row of the lattice has a 1 in the columns corresponding to 
+    // zero set size, 0 elsewhere.
+    for (int t = threadIdx.x; t < B_X + casesPerBlock; t += B_X) {
+        B[t] = t % setSize == 0;
+    }
+    
+    for (int l = 0; l < numOut / 32; ++l) { // Load 32 energies at a time for casesPerBlock cases
+        __syncthreads();
+        // Load energies into shmem
+        for (int r = 0; r < casesPerBlock && blockCaseIdx + loadY + r < numCases; r += B_X / 32) {
+            shE[(r + loadY) * (32 + 1) + loadX] = __expf(energies[r * numOut]);
+            printf("%f\n", energies[r * numOut]);
+        }
+        __syncthreads();
+        
+        // Compute 32 rows of the lattice
+        if (doWork) {
+            #pragma unroll
+            for (int i = 0; i < 32; ++i) {
+                B[bIdx] = B[bIdx - 1] * shE[caseIdxInBlock * (32 + 1) + i] + B[bIdx];
+                bLattice[i * numCases * setSize] = B[bIdx];
+//                printf("thread %d wrote %d to idx %d\n", tidx, B[bIdx], bIdx);
+            }
+        }
+        printf("thread %d made it\n", tidx);
+        bLattice += 32 * numCases * setSize;
+    }
+//    if (numOut % 32 != 0) {
+//        __syncthreads();
+//        
+//    }
+}
+
+/*
+ * energies:    (numCases, numOut) (yes this is weird)
+ * bLattice:    (numOut + 1, setSize, numCases) (*out)
+ */
+void MSMBackward(NVMatrix& energies, NVMatrix& bLattice, int setSize) {
+    int numCases = energies.getNumRows(); 
+    int numOut = energies.getNumCols();
+
+    assert(!energies.isTrans());
+    assert(!bLattice.isTrans());
+    assert(energies.isContiguous());
+    assert(energies.isContiguous());
+    
+    bLattice.resize((numOut + 1) * setSize, numCases);
+    int B_X = 32;
+    int casesPerBlock = B_X / setSize;
+    int shmem = 4*(B_X + casesPerBlock + casesPerBlock * (32 + 1));
+    dim3 threads(B_X, 1);
+    dim3 blocks(DIVUP(numCases*setSize, B_X), 1);
+    printf("allocating %d words of shmem\n", shmem/4);
+    cudaFuncSetCacheConfig(kMSMBackward<32>, cudaFuncCachePreferShared);
+    kMSMBackward<32><<<blocks, threads, shmem>>>(energies.getDevData(), bLattice.getDevData(), 
+                                     numCases, numOut, setSize);
+    getLastCudaError("kMSMBackward: Kernel execution failed");
+}
+
+/*
+ * E = sum(p_l * log(y_l))
+ * probs:           (numOut, numCases)
+ * labels:          (numOut, numCases)
+ * maxProbs:        (1, numCases)
+ * labelLogProbs:   (1, numCases)   (*out)
+ * correctProbs:    (1, numCases)   (*out)
+ * 
+ * target:          (1, numCases)
+ */
+__global__ void kCrossEntCost(float* probs, float* labels, float* maxProbs, float* labelLogProbs, float* correctProbs,
+                            const int numCases, const int numOut) {
+    const int tx = blockIdx.x * LOGREG_ERR_THREADS_X + threadIdx.x;
+
+    if (tx < numCases) {
+        probs += tx;
+        labels += tx;
+        maxProbs += tx;
+        labelLogProbs += tx;
+        correctProbs += tx;
+        
+        const float maxp = maxProbs[0];
+
+        /*
+         * Compute the probability of guessing the correct case if you take the most-probable label.
+         * 
+         * This is done like this:
+         * 
+         * - If the most probable label is not equal to the true label, then the probability is zero.
+         * - Otherwise, the probability is 1 / (number of labels whose probability is equal to the maximum).
+         * 
+         * This is certainly overkill -- in practice, it's just about impossible for two labels to get assigned
+         * maximum probability. But it's a safety measure to prevent over-estimating your accuracy.
+         * Though it could never happen in reality. Well it could. But it wouldn't. Cool?
+         */
+        float crossEnt = 0.0f;
+        int numMax = 0;
+        bool correctLabel = false;
+        for (int i = 0; i < numOut; i++) {
+            const float label_prob = labels[i * numCases];
+            const float model_prob = probs[i * numCases];
+            numMax += model_prob == maxp;
+            crossEnt += label_prob * safelog(model_prob);
+            correctLabel |= model_prob == maxp && label_prob > 0.0f;
+        }
+        labelLogProbs[0] = crossEnt;
+        if (!correctLabel) {
+            correctProbs[0] = 0.0f;
+        } else {
+            correctProbs[0] = 1.0f / float(numMax);
+        }
+    }
+}
+
+/*
+ * E = sum(p_l * log(y_l))
+ * y_l:     (numOut, numCases)
+ * labels:  (numOut, numCases)
+ * 
+ * dE_dy_l: (numOut, numCases)
+ */
+template <bool add>
+__global__ void kCrossEntGrad(float* y_l, float* labels, float* dE_dy_l, const int numCases,
+                                 const int numOut, const float gradCoeff) {
+    const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
+    const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
+    const int tidx = ty * numCases + tx;
+    
+    if (ty < numOut && tx < numCases) {
+        const float label_prob = labels[tidx];
+        const float model_prob = y_l[tidx];
+        const float v = gradCoeff * __fdividef(label_prob, model_prob);
+        if (add) {
+            dE_dy_l[tidx] += v;
+        } else {
+            dE_dy_l[tidx] = v;
+        }
+    }
+}
+
+/*
+ * E = sum(p_l * log(y_l))
+ * y_l:     (numOut, numCases)
+ * labels:  (numOut, numCases)
+ * 
+ * dE_dx_l: (numOut, numCases)
+ */
+template <bool add>
+__global__ void kCrossEntSoftmaxGrad(float* y_l, float* labels, float* dE_dx_l, const int numCases,
+                                 const int numOut, const float gradCoeff) {
+    const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
+    const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
+    const int tidx = ty * numCases + tx;
+    
+    if (ty < numOut && tx < numCases) {
+        float v = 0;
+        const float model_prob = y_l[tidx];
+        for (int j = 0; j < numOut; j++) {
+            const float label_prob = labels[j * numCases + tx];
+            v += label_prob * ((j == ty) - model_prob);
+        }
+        v *= gradCoeff;
+        if (add) {
+            dE_dx_l[tidx] += v;
+        } else {
+            dE_dx_l[tidx] = v;
+        }
+    }
+}
+
+
+/*
+ * E = -log(y_t)
+ * probs:           (numOut, numCases)
+ * labels:          (1, numCases)
+ * maxProbs:        (1, numCases)
+ * labelLogProbs:   (1, numCases)   (*out)
+ * correctProbs:    (1, numCases)   (*out)
+ * 
+ * target:          (1, numCases)
+ */
+__global__ void kLogregCost(float* probs, float* labels, float* maxProbs, float* labelLogProbs, float* correctProbs,
+                            const int numCases, const int numOut) {
+    const int tx = blockIdx.x * LOGREG_ERR_THREADS_X + threadIdx.x;
+
+    if (tx < numCases) {
+        const int label = int(labels[tx]);
+        const float maxp = maxProbs[tx];
+        const float labelp = probs[label * numCases + tx];
+        
+        labelLogProbs[tx] = __logf(labelp);
+        
+        /*
+         * Compute the probability of guessing the correct case if you take the most-probable label.
+         * 
+         * This is done like this:
+         * 
+         * - If the most probable label is not equal to the true label, then the probability is zero.
+         * - Otherwise, the probability is 1 / (number of labels whose probability is equal to the maximum).
+         * 
+         * This is certainly overkill -- in practice, it's just about impossible for two labels to get assigned
+         * maximum probability. But it's a safety measure to prevent over-estimating your accuracy.
+         * Though it could never happen in reality. Well it could. But it wouldn't. Cool?
+         */
+        if (labelp != maxp) {
+            correctProbs[tx] = 0;
+        } else {
+            int numMax = 0;
+            for (int i = 0; i < numOut; i++) {
+                numMax += probs[i * numCases + tx] == maxp;
+            }
+            correctProbs[tx] = 1.0f / float(numMax);
+        }
+    }
+}
+
+/*
+ * E = -log(y_t)
+ * y_l:     (numOut, numCases)
+ * labels:  (1, numCases)
+ * 
+ * dE_dy_l: (numOut, numCases)
+ */
+template <bool add>
+__global__ void kLogregCostGrad(float* y_l, float* labels, float* dE_dy_l, const int numCases,
+                                 const int numOut, const float gradCoeff) {
+    const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
+    const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
+    const int tidx = ty * numCases + tx;
+    
+    if (ty < numOut && tx < numCases) {
+        const int label = int(labels[tx]);
+        float v = gradCoeff * (label == ty);
+        v = __fdividef(v, y_l[tidx]);
+        if (add) {
+            dE_dy_l[tidx] += v;
+        } else {
+            dE_dy_l[tidx] = v;
+        }
+    }
+}
+
+/*
+ * E = -log(y_t)
+ * y_l:     (numOut, numCases)
+ * labels:  (1, numCases)
+ * 
+ * dE_dx_l: (numOut, numCases)
+ */
+template <bool add>
+__global__ void kLogregSoftmaxGrad(float* y_l, float* labels, float* dE_dx_l, const int numCases,
+                                 const int numOut, const float gradCoeff) {
+    const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
+    const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
+    const int tidx = ty * numCases + tx;
+    
+    if (ty < numOut && tx < numCases) {
+        const int label = int(labels[tx]);
+        float v = gradCoeff * ((label == ty) - y_l[tidx]);
+        if (add) {
+            dE_dx_l[tidx] += v;
+        } else {
+            dE_dx_l[tidx] = v;
+        }
+    }
+}
+
+/*
+ * dE_dy_l: (numOut, numCases)
+ * y_l:     (numOut, numCases)
+ * 
+ * dE_dx_l: (numOut, numCases)
+ */
+template <bool add>
+__global__ void kSoftmaxGrad(float* dE_dy_l, float* y_l, float* dE_dx_l, const int numCases, const int numOut) {
+    const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
+    const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
+    const int tidx = ty * numCases + tx;
+    
+    if (ty < numOut && tx < numCases) {
+        float v = 0;
+        for (int j = 0; j < numOut; j++) {
+            v += dE_dy_l[j * numCases + tx] * ((j == ty) - y_l[j * numCases + tx]);
+        }
+        v *= y_l[tidx];
+        
+        if (add) {
+            dE_dx_l[tidx] += v;
+        } else {
+            dE_dx_l[tidx] = v;
+        }
+    }
+}
+
+template <int B_X, bool add>
+__global__ void kEltwiseMaxGrad(float* actGrad, float* input, float* output, float* target,
+                                const int numElements) {
+    for (int i = B_X * blockIdx.x + threadIdx.x; i < numElements; i += B_X * gridDim.x) {
+        if (add) {
+            target[i] += actGrad[i] * (output[i] == input[i]);
+        } else {
+            target[i] = actGrad[i] * (output[i] == input[i]);
+        }
+    }
+}
+
+void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add) {
+    assert(actGrad.isContiguous());
+    assert(output.isContiguous());
+    assert(input.isContiguous());
+    assert(actGrad.isSameDims(input));
+    assert(actGrad.isSameDims(output));
+    
+    dim3 blocks(DIVUP(actGrad.getNumElements(), 128));
+    dim3 threads(128);
+    if (add) {
+        assert(actGrad.isSameDims(target));
+        cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, true>, cudaFuncCachePreferL1);
+        kEltwiseMaxGrad<128, true><<<blocks, threads>>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements());
+    } else {
+        target.resize(actGrad);
+        cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, false>, cudaFuncCachePreferL1);
+        kEltwiseMaxGrad<128, false><<<blocks, threads>>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements());
+    }
+    
+    getLastCudaError("computeEltwiseMaxGrad: Kernel execution failed");
+}
+
+/*
+ * E = sum_i{-p_i*log(y_i)}
+ * probs:           (numOut, numCases)
+ * labels:          (numOut, numCases)
+ * maxProbs:        (1, numCases)
+ * labelLogProbs:   (1, numCases)   (*out)
+ * correctProbs:    (1, numCases)   (*out)
+ * 
+ * target:          (1, numCases)
+ */
+void computeCrossEntCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out) {
+    int numCases = probs.getNumCols(); 
+    int numOut = probs.getNumRows(); 
+
+    assert(labels.isSameDims(probs));
+    assert(!labels.isTrans());
+    assert(!probs.isTrans());
+    assert(labels.isContiguous());
+    assert(probs.isContiguous());
+    
+    NVMatrix& maxProbs = probs.max(0);
+    
+    labelLogProbs_out.resize(1, numCases);
+    correctProbs_out.resize(1, numCases);
+    dim3 threads(LOGREG_ERR_THREADS_X, 1);
+    dim3 blocks(DIVUP(numCases, LOGREG_ERR_THREADS_X), 1);
+    cudaFuncSetCacheConfig(kCrossEntCost, cudaFuncCachePreferL1);
+    kCrossEntCost<<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), maxProbs.getDevData(),
+                                     labelLogProbs_out.getDevData(), correctProbs_out.getDevData(),
+                                     numCases, numOut);
+    getLastCudaError("kCrossEntCost: Kernel execution failed");
+
+    delete &maxProbs;
+}
+
+void computeCrossEntGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) {
+    int numCases = probs.getLeadingDim(); 
+    int numOut = probs.getFollowingDim(); 
+    assert(labels.isSameDims(probs));
+    assert(probs.isContiguous());
+    assert(target.isContiguous());
+    assert(labels.isContiguous());
+    assert(!labels.isTrans());
+    assert(!probs.isTrans());
+    
+    dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
+    dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
+    if (!add) {
+        target.resize(probs);
+        kCrossEntGrad<false><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
+                                                     numCases, numOut, coeff);
+    } else {
+        kCrossEntGrad<true><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
+                                                     numCases, numOut, coeff);
+    }
+
+    getLastCudaError("kCrossEntGrad: Kernel execution failed");
+}
+
+void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add) {
+    int numCases = acts.getLeadingDim();
+    int numOut = acts.getFollowingDim();
+
+    assert(acts.isSameDims(actsGrad));
+    assert(acts.isContiguous());
+    assert(actsGrad.isContiguous());
+    assert(target.isContiguous());
+    assert(acts.isTrans());
+    assert(actsGrad.isTrans());
+
+    dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
+    dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
+    if (!add) {
+        target.resize(acts);
+        kSoftmaxGrad<false><<<blocks, threads>>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut);
+    } else {
+        kSoftmaxGrad<true><<<blocks, threads>>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut);
+    }
+    getLastCudaError("computeSoftmaxGrad: Kernel execution failed");
+}
+
+void computeCrossEntSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) {
+    int numCases = probs.getLeadingDim(); 
+    int numOut = probs.getFollowingDim(); 
+    assert(labels.getLeadingDim() == probs.getLeadingDim() && labels.getFollowingDim() == probs.getFollowingDim());
+    assert(probs.isContiguous());
+    assert(target.isContiguous());
+    assert(labels.isContiguous());
+    assert(probs.isTrans());
+    assert(!labels.isTrans());
+    
+    dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
+    dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
+    if (!add) {
+        target.resize(probs);
+        cudaFuncSetCacheConfig(kCrossEntSoftmaxGrad<false>, cudaFuncCachePreferL1);
+        kCrossEntSoftmaxGrad<false><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
+                                                        numCases, numOut, coeff);
+    } else {
+        cudaFuncSetCacheConfig(kCrossEntSoftmaxGrad<true>, cudaFuncCachePreferL1);
+        kCrossEntSoftmaxGrad<true><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
+                                                        numCases, numOut, coeff);
+    }
+    getLastCudaError("kCrossEntSoftmaxGrad: Kernel execution failed");
+}
+
+/*
+ * E = -log(y_t)
+ * probs:           (numOut, numCases)
+ * labels:          (1, numCases)
+ * maxProbs:        (1, numCases)
+ * labelLogProbs:   (1, numCases)   (*out)
+ * correctProbs:    (1, numCases)   (*out)
+ * 
+ * target:          (1, numCases) == log(y_l[labels,:]
+ */
+void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out) {
+    int numCases = probs.getNumCols(); 
+    int numOut = probs.getNumRows(); 
+
+    assert(labels.getNumElements() == numCases);
+    assert(!labels.isTrans());
+    assert(!probs.isTrans());
+    assert(labels.isContiguous());
+    assert(probs.isContiguous());
+    
+    NVMatrix& maxProbs = probs.max(0);
+    
+    labelLogProbs_out.resize(1, numCases);
+    correctProbs_out.resize(1, numCases);
+    dim3 threads(LOGREG_ERR_THREADS_X, 1);
+    dim3 blocks(DIVUP(numCases, LOGREG_ERR_THREADS_X), 1);
+    cudaFuncSetCacheConfig(kLogregCost, cudaFuncCachePreferL1);
+    kLogregCost<<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), maxProbs.getDevData(),
+                                     labelLogProbs_out.getDevData(), correctProbs_out.getDevData(),
+                                     numCases, numOut);
+    getLastCudaError("computeLogregCost: Kernel execution failed");
+//    cudaThreadSynchronize();
+    delete &maxProbs;
+}
+
+void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) {
+    int numCases = probs.getLeadingDim(); 
+    int numOut = probs.getFollowingDim(); 
+    assert(labels.getNumElements() == numCases);
+    assert(probs.isContiguous());
+    assert(target.isContiguous());
+    assert(labels.isContiguous());
+    assert(!labels.isTrans());
+    assert(!probs.isTrans());
+    
+    dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
+    dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
+    if (!add) {
+        target.resize(probs);
+        kLogregCostGrad<false><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
+                                                     numCases, numOut, coeff);
+    } else {
+        kLogregCostGrad<true><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
+                                                     numCases, numOut, coeff);
+    }
+
+    getLastCudaError("computeLogregGrad: Kernel execution failed");
+}
+
+void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) {
+    int numCases = probs.getLeadingDim(); 
+    int numOut = probs.getFollowingDim(); 
+    assert(labels.getNumElements() == numCases);
+    assert(probs.isContiguous());
+    assert(target.isContiguous());
+    assert(labels.isContiguous());
+    assert(probs.isTrans());
+    
+    dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
+    dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
+    if (!add) {
+        target.resize(probs);
+        kLogregSoftmaxGrad<false><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
+                                                     numCases, numOut, coeff);
+    } else {
+        kLogregSoftmaxGrad<true><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
+                                                     numCases, numOut, coeff);
+    }
+
+    getLastCudaError("computeLogregSoftmaxGrad: Kernel execution failed");
+}
diff --git a/src/lr.cu b/src/lr.cu
new file mode 100644
index 0000000..7d31fbe
--- /dev/null
+++ b/src/lr.cu
@@ -0,0 +1,186 @@
+#include <string>
+#include <lr.cuh>
+#include <util.cuh>
+
+using namespace std;
+
+/*
+ * ==================================
+ * LearningRateSchedule
+ * ==================================
+ */
+LearningRateSchedule& LearningRateSchedule::make(PyObject* lrsDict, double baseRate) {
+	string type = pyDictGetString(lrsDict, "type");
+	if (type == "default") {
+		return *new LearningRateSchedule(baseRate, 0);
+	} else {
+		PyObject* paramsDict = PyDict_GetItemString(lrsDict, "params");
+		double tgtFactor = pyDictGetFloat(paramsDict, "tgtFactor");
+		double noiseStdev = pyDictGetFloat(paramsDict, "noiseStdev");
+		if (type == "linear") {
+			return *new LinearLRS(baseRate, tgtFactor, noiseStdev);
+		} else if (type == "exp") {
+			return *new ExpLRS(baseRate, tgtFactor, noiseStdev);
+		} else if (type == "dexp") {
+			double numSteps = pyDictGetInt(paramsDict, "numSteps");
+			return *new DiscreteExpLRS(baseRate, tgtFactor, noiseStdev, numSteps);
+		} else if (type == "jdexp") {
+            double numSteps = pyDictGetInt(paramsDict, "numSteps");
+            return *new JumpyDiscreteExpLRS(baseRate, tgtFactor, noiseStdev, numSteps);
+        }
+	}
+	throw string("Unknown learning rate schedule type ") + type;
+}
+
+LearningRateSchedule::LearningRateSchedule(double baseRate, double noiseStdev)
+    : _baseRate(baseRate), _noiseStdev(noiseStdev), _haveRandnSpare(false), _randnSpare(0) {
+}
+
+LearningRateSchedule::LearningRateSchedule(double baseRate)
+    : _baseRate(baseRate), _noiseStdev(0), _haveRandnSpare(false), _randnSpare(0) {
+}
+
+double LearningRateSchedule::getRate(double progress) {
+	return _noiseStdev > 0 ? _getRate(progress) * (1 + abs(randn()) * _noiseStdev)
+	                       : _getRate(progress);
+}
+
+double LearningRateSchedule::_getRate(double progress) {
+    return _baseRate;
+}
+
+inline double LearningRateSchedule::randn() {
+    if (!_haveRandnSpare) {
+        double T = 2 * 3.1415 * rand();
+        double R = std::sqrt(-2 * std::log(rand()));
+        _randnSpare = R * std::sin(T);
+        _haveRandnSpare = true;
+        return R * std::cos(T);
+    }
+    _haveRandnSpare = false;
+    return _randnSpare;
+}
+
+// This should never generate zero
+inline double LearningRateSchedule::rand() const {
+    return double(1L + random()) / (1L + RAND_MAX);
+}
+
+inline double LearningRateSchedule::abs(double x) const {
+    return x > 0 ? x : -x;
+}
+
+double LearningRateSchedule::getBaseRate() const {
+	return _baseRate;
+}
+
+LearningRateSchedule::~LearningRateSchedule() {
+}
+
+/*
+ * ==================================
+ * LinearLRS
+ * ==================================
+ */
+LinearLRS::LinearLRS(double baseRate, double tgtFactor, double noiseStdev)
+: LearningRateSchedule(baseRate, noiseStdev) {
+	_finalRate = baseRate / tgtFactor;
+}
+
+double LinearLRS::_getRate(double progress) {
+	return _baseRate * (1 - progress) + _finalRate * progress;
+}
+
+/*
+ * ==================================
+ * ExpLRS
+ * ==================================
+ */
+ExpLRS::ExpLRS(double baseRate, double tgtFactor, double noiseStdev)
+: LearningRateSchedule(baseRate, noiseStdev) {
+	double finalRate = baseRate / tgtFactor;
+	_pow = baseRate == 0 ? 1 : (std::log(finalRate) / std::log(baseRate) - 1);
+}
+
+double ExpLRS::_getRate(double progress) {
+	return std::pow(_baseRate, 1.0 + progress * _pow);
+}
+
+/*
+ * ==================================
+ * TanhLRS
+ * ==================================
+ */
+TanhLRS::TanhLRS(double baseRate, double tgtFactor, double noiseStdev)
+: LearningRateSchedule(baseRate, noiseStdev), _alpha(0), _beta(0) {
+	if (baseRate > 0) {
+		double finalRate = baseRate / tgtFactor;
+		_beta = 0.5 * (baseRate + finalRate);
+		_alpha = 2 * atanh((baseRate - finalRate) / (baseRate + finalRate));
+	}
+}
+
+double TanhLRS::_getRate(double progress) {
+	return _beta * (tanh(-_alpha * (progress - 0.5)) + 1.0);
+}
+
+/*
+ * ==================================
+ * DiscreteExpLRS
+ * ==================================
+ */
+DiscreteExpLRS::DiscreteExpLRS(double baseRate, double tgtFactor, double noiseStdev, int numSteps)
+: LearningRateSchedule(baseRate, noiseStdev) {
+	ExpLRS elrs(baseRate, tgtFactor, 0);
+	double finalRate = baseRate / tgtFactor;
+	for (int i = 0; i < numSteps - 1; i++) {
+		double progress = double(i) / (numSteps - 1);
+		_rates.push_back(elrs._getRate(progress));
+	}
+	_rates.push_back(finalRate);
+	//printf("initialized base %e, final %e, stpes %d\n", baseRate, finalRate, numSteps);
+}
+
+double DiscreteExpLRS::_getRate(double progress) {
+	for (int i = 0; i < _rates.size(); ++i) {
+		if (progress <= double(i + 1) / _rates.size()) {
+			return _rates[i];
+		}
+	}
+	return _rates.back();
+}
+
+/*
+ * ==================================
+ * JumpyDiscreteExpLRS
+ * ==================================
+ */
+JumpyDiscreteExpLRS::JumpyDiscreteExpLRS(double baseRate, double tgtFactor, double noiseStdev, int numSteps)
+: DiscreteExpLRS(baseRate, tgtFactor, noiseStdev, numSteps) {
+}
+
+double JumpyDiscreteExpLRS::_getRate(double progress) {
+    int rateIdx = 0;
+    for (int i = 0; i < _rates.size(); ++i) {
+        if (progress <= double(i + 1) / _rates.size()) {
+            rateIdx = i;
+            break;
+        }
+    }
+    // The midpoint of the interval that progress falls into.
+    double intervalMid = double(rateIdx + 0.5) / _rates.size();
+    // Jumpy learning rate works like this:
+    // If progress is before the midpoint of the current interval,
+    //    it returns the same learning rate as would DiscreteExpLRS.
+    // Else,
+    //    it returns the learning rate of the *previous* interval (provided there is one).
+//    rateIdx -= rateIdx > 0 && progress > 0.2 && progress < 0.9 && progress > intervalMid;
+
+    // Uncomment this (and comment line above) to use variant 2:
+    // Instead of using the learning rate of the previous interval, this uses
+    // the geometric average of the learning rates of the current and previous
+    // intervals.
+    bool jump = rateIdx > 0 && progress > 0.2 && progress < 0.9 && progress > intervalMid;
+    return jump ? sqrt(_rates[rateIdx] * _rates[rateIdx - 1]) : _rates[rateIdx];
+//    return _rates[rateIdx];
+}
diff --git a/src/multisoftmax.cpp b/src/multisoftmax.cpp
new file mode 100644
index 0000000..d3d82c4
--- /dev/null
+++ b/src/multisoftmax.cpp
@@ -0,0 +1,126 @@
+
+#include <assert.h>
+//#include <mathimf.h>
+#include <multisoftmax.h>
+
+using namespace std;
+
+// Computes log(exp(x) + exp(y))
+inline double logadd(const double x, const double y) {
+    if (x <= -INF && y <= -INF) {
+        return -INF;
+    }
+    const double M = max(x,y);
+    const double m = min(x,y);
+    const double diff = M - m;
+//    return diff > 15 ? M : M + LOG(1.0 + EXP(-diff));
+//    return m <= -INF ? M : M + LOG(1.0f + EXP(-diff));
+    return diff > 15 ? M : (diff > 5 ? M + EXP(-diff) : M + LOG(1.0 + EXP(-diff)));
+}
+
+/*
+ * elts:     (numCases, numOut) 
+ * B:        (N + 1, size + 1) -- batckward lattice matrix, MUST BE initially -INF
+ * fixed:    (numCases, 1)
+ * probs:    (numCases, numOut) (*out)
+ * 
+ * double precision is much faster than single. :/
+ */
+void MultiSoftmaxCPU_T_logspace(Matrix& elts, Matrix& logB, Matrix& probs, Matrix& fixed, int size, bool nofix) {
+    int numCases = elts.getNumRows();
+    assert(probs.isSameDims(elts));
+    assert(!elts.isTrans());
+    assert(!logB.isTrans());
+    assert(!probs.isTrans());
+    assert(fixed.getNumRows() == numCases);
+    assert(fixed.getNumCols() == 1);
+    int N = elts.getNumCols();
+    Matrix& logF = *new Matrix(size + 1, 1); // Forward column
+
+    // Prepare logB
+    logB(N, 0) = 0;
+    
+    for (int c = 0; c < numCases; ++c) {
+        int fx = nofix ? -1 : int(fixed(c, 0));
+        // Backward pass
+        for (int i = N - 1; i >= 0; --i) {
+            double elt = elts(c, i);
+            logB(i, 0) = i <= fx ? -INF : 0.0f;
+            for (int s = max(1, size - i); s < size + 1; ++s) {
+                logB(i, s) = fx == i ? logB(i + 1, s - 1) + elt : logadd(logB(i + 1, s - 1) + elt, logB(i + 1, s));
+            }
+        }
+        // Log partition function
+        double logZ = logB(0, size);
+        
+        // Forward pass
+        logF.apply(Matrix::ONE);
+        logF.scale(-INF);
+        logF(0, 0) = 0;
+        
+        for (int i = 1; i < N + 1; ++i) {
+            double logy = -INF;
+            double elt = elts(c, i - 1);
+            for (int s = size; s >= 0; --s) {
+                if (s < size) {
+                    logy = logadd(logy, logF(s, 0) + logB(i, size - 1 - s));
+                }
+                if (s > 0) {
+                    logF(s, 0) = fx == i - 1 ? logF(s - 1, 0) + elt : logadd(logF(s - 1, 0) + elt, logF(s, 0));
+                } else if (fx == i - 1) {
+                    logF(0, 0) = -INF;
+                }
+            }
+            logy += elt - logZ;
+            probs(c, i - 1) = EXP(logy) - (fx >= 0 ? probs(c, i - 1) : 0);
+        }
+    }
+    
+    delete &logF;
+}
+
+MultiSoftmaxWorker::MultiSoftmaxWorker(Matrix* elts, Matrix* B, Matrix* probs, Matrix* fixed, int size, bool nofix) 
+    : Thread(true), _elts(elts), _B(B), _probs(probs), _fixed(fixed), _size(size), _nofix(nofix) {
+    
+}
+
+MultiSoftmaxWorker::~MultiSoftmaxWorker() {
+    delete _elts;
+    delete _probs;
+    delete _fixed;
+}
+
+void* MultiSoftmaxWorker::run() {
+    MultiSoftmaxCPU_T_logspace(*_elts, *_B, *_probs, *_fixed, _size, _nofix);
+    return NULL;
+}
+
+/*
+ * elts:     (numCases, numOut) 
+ * B:        vector of (N + 1, size + 1) -- batckward lattice matrix, should be initially zero
+ * fixed:    (numCases, 1)
+ * probs:    (numCases, numOut) (*out)
+ * 
+ * NOTE: remember to write a version of this for transposed matrices.
+ * It may end up being significantly faster, which is important if 
+ * I plan to use CPU for this.
+ */
+void MultiSoftmaxCPU_T_parallel(Matrix& elts, vector<Matrix*>& B, Matrix& probs, Matrix& fixed, int size, bool nofix) {
+    int numCases = elts.getNumRows();
+    int numWorkers = min(numCases, (int)B.size());
+    probs.resize(elts);
+    int casesPerWorker = DIVUP(numCases, B.size());
+    numWorkers = min(numWorkers, DIVUP(numCases, casesPerWorker));
+    vector<Thread*> workers;
+    for (int i = 0; i < numWorkers; ++i) {
+        Matrix* eltSlice = &elts.sliceRows(i * casesPerWorker, min(elts.getNumRows(), (long int)(i + 1) * casesPerWorker));
+        Matrix* probSlice = &probs.sliceRows(i * casesPerWorker, min(elts.getNumRows(), (long int)(i + 1) * casesPerWorker));
+        Matrix* fixedSlice = &fixed.sliceRows(i * casesPerWorker, min(elts.getNumRows(), (long int)(i + 1) * casesPerWorker));
+        workers.push_back(new MultiSoftmaxWorker(eltSlice, B[i], probSlice, fixedSlice, size, nofix));
+        workers[i]->start();
+    }
+    for (int i = 0; i < numWorkers; ++i) {
+        workers[i]->join();
+        delete workers[i];
+    }
+}
\ No newline at end of file
diff --git a/src/neuron.cu b/src/neuron.cu
new file mode 100644
index 0000000..ada6c99
--- /dev/null
+++ b/src/neuron.cu
@@ -0,0 +1,85 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <neuron.cuh>
+#include <util.cuh>
+
+using namespace std;
+
+Neuron& Neuron::makeNeuron(PyObject* neuronDict) {
+    string type = pyDictGetString(neuronDict, "type");
+    PyObject* neuronParamsDict = PyDict_GetItemString(neuronDict, "params");
+    
+    if (type == "relu") {
+        return *new ReluNeuron();
+    }
+    
+    if (type == "nrelu") {
+        return *new NoisyReluNeuron();
+    }
+    
+    if (type == "drelu") {
+        return *new DoubleReluNeuron(pyDictGetFloat(neuronParamsDict, "a"));
+    }
+    
+    if (type == "softrelu") {
+        return *new SoftReluNeuron();
+    }
+    
+    if (type == "brelu") {
+        return *new BoundedReluNeuron(pyDictGetFloat(neuronParamsDict, "a"));
+    }
+
+    if (type == "abs") {
+        return *new AbsNeuron();
+    }
+
+    if (type == "logistic") {
+        return *new LogisticNeuron();
+    }
+    
+    if (type == "tanh") {
+        return *new TanhNeuron(pyDictGetFloat(neuronParamsDict, "a"), pyDictGetFloat(neuronParamsDict, "b"));
+    }
+    
+    if (type == "square") {
+        return *new SquareNeuron();
+    }
+    
+    if (type == "sqrt") {
+        return *new SqrtNeuron();
+    }
+    
+    if (type == "linear") {
+        return *new LinearNeuron(pyDictGetFloat(neuronParamsDict, "a"), pyDictGetFloat(neuronParamsDict, "b"));
+    }
+
+    if (type == "ident") {
+        return *new Neuron();
+    }
+    
+    throw string("Unknown neuron type: ") + type;
+}
diff --git a/src/pyconvnet.cu b/src/pyconvnet.cu
new file mode 100644
index 0000000..1dd6d01
--- /dev/null
+++ b/src/pyconvnet.cu
@@ -0,0 +1,242 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <Python.h>
+#include <arrayobject.h>
+#include <assert.h>
+#include <helper_cuda.h>
+#include <cublas.h>
+#include <time.h>
+#include <vector>
+
+#include <matrix.h>
+#include <queue.h>
+#include <worker.cuh>
+#include <util.cuh>
+#include <cost.cuh>
+
+#include <pyconvnet.cuh>
+#include <convnet.cuh>
+
+using namespace std;
+static ConvNet* model = NULL;
+
+static PyMethodDef _ConvNetMethods[] = {{ "initModel",          initModel,              METH_VARARGS },
+                                        { "startBatch",         startBatch,             METH_VARARGS },
+                                        { "finishBatch",        finishBatch,            METH_VARARGS },
+                                        { "checkGradients",     checkGradients,         METH_VARARGS },
+                                        { "startMultiviewTest", startMultiviewTest,     METH_VARARGS },
+                                        { "startFeatureWriter", startFeatureWriter,     METH_VARARGS },
+                                        { "startDataGrad",      startDataGrad,          METH_VARARGS },
+                                        { "syncWithHost",       syncWithHost,           METH_VARARGS },
+                                        { NULL, NULL }
+};
+
+#if defined(_WIN64) || defined(_WIN32)
+extern "C" __declspec(dllexport) void initpyconvnet() {
+    (void) Py_InitModule("pyconvnet", _ConvNetMethods);
+    import_array();
+}
+#else
+void INITNAME() {
+    (void) Py_InitModule(QUOTEME(MODELNAME), _ConvNetMethods);
+    import_array();
+}
+#endif
+
+PyObject* initModel(PyObject *self, PyObject *args) {
+    assert(model == NULL);
+
+    PyDictObject* pyLayerParams;
+    PyListObject* pyDeviceIDs, *pyDeviceCPUs;
+    int pyMinibatchSize;
+    int pyWeightUpdateFreq;
+
+    if (!PyArg_ParseTuple(args, "O!O!O!ii",
+                          &PyDict_Type, &pyLayerParams,
+                          &PyList_Type, &pyDeviceIDs,
+                          &PyList_Type, &pyDeviceCPUs,
+                          &pyMinibatchSize,
+                          &pyWeightUpdateFreq)) {
+        return NULL;
+    }
+    intv& deviceIDs = *getIntV((PyObject*)pyDeviceIDs);
+    vector<intv*>& deviceCPUs = *new vector<intv*>();
+    for (int i = 0; i < PyList_GET_SIZE(pyDeviceCPUs); i++) {
+        intv* v = getIntV(PyList_GetItem((PyObject*)pyDeviceCPUs, i));
+        deviceCPUs.push_back(v);
+    }
+    model = new ConvNet((PyObject*)pyLayerParams,
+                        deviceIDs,
+                        deviceCPUs,
+                        pyMinibatchSize,
+                        pyWeightUpdateFreq);
+
+    model->start();
+    return Py_BuildValue("i", 0);
+}
+
+/*
+ * Starts training/testing on the given batch (asynchronous -- returns immediately).
+ */
+PyObject* startBatch(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    PyListObject* data;
+    double progress;
+    int test = 0;
+    if (!PyArg_ParseTuple(args, "O!d|i",
+        &PyList_Type, &data,
+        &progress,
+        &test)) {
+        return NULL;
+    }
+    CPUData* cpuData = new CPUData((PyObject*)data);
+    
+    TrainingWorker* wr = new TrainingWorker(*model, *cpuData, progress, test);
+    model->getWorkerQueue().enqueue(wr);
+    return Py_BuildValue("i", 0);
+}
+
+/*
+ * Starts testing on the given batch (asynchronous -- returns immediately).
+ */
+PyObject* startMultiviewTest(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    PyListObject* data;
+    int numViews;
+    PyArrayObject* pyProbs = NULL;
+    char* logregName = NULL;
+    if (!PyArg_ParseTuple(args, "O!i|O!s",
+        &PyList_Type, &data,
+        &numViews,
+        &PyArray_Type, &pyProbs,
+        &logregName)) {
+        return NULL;
+    }
+    CPUData* cpuData = new CPUData((PyObject*)data);
+    MultiviewTestWorker* wr = pyProbs == NULL ? new MultiviewTestWorker(*model, *cpuData, numViews)
+                                              : new MultiviewTestWorker(*model, *cpuData, numViews, *new Matrix(pyProbs), logregName);
+    model->getWorkerQueue().enqueue(wr);
+    return Py_BuildValue("i", 0);
+}
+
+PyObject* startFeatureWriter(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    PyListObject* data;
+    PyListObject* pyFtrs;
+    PyListObject* pyLayerNames;
+    if (!PyArg_ParseTuple(args, "O!O!O!",
+        &PyList_Type, &data,
+        &PyList_Type, &pyFtrs,
+        &PyList_Type, &pyLayerNames)) {
+        return NULL;
+    }
+    stringv* layerNames = getStringV((PyObject*)pyLayerNames);
+    CPUData* cpuData = new CPUData((PyObject*)data);
+    MatrixV* ftrs = getMatrixV((PyObject*)pyFtrs);
+    
+    FeatureWorker* wr = new FeatureWorker(*model, *cpuData, *ftrs, *layerNames);
+    model->getWorkerQueue().enqueue(wr);
+    return Py_BuildValue("i", 0);
+}
+
+PyObject* startDataGrad(PyObject *self, PyObject *args) {
+//    assert(model != NULL);
+//    PyListObject* data;
+//    int dataLayerIdx, softmaxLayerIdx;
+//    if (!PyArg_ParseTuple(args, "O!ii",
+//        &PyList_Type, &data,
+//        &dataLayerIdx, &softmaxLayerIdx)) {
+//        return NULL;
+//    }
+//    CPUData* cpuData = new CPUData((PyObject*)data);
+//    Matrix& ftrs = *mvec.back();
+//    mvec.pop_back();
+//    
+//    DataGradWorker* wr = new DataGradWorker(*model, *cpuData, ftrs, dataLayerIdx, softmaxLayerIdx);
+//    model->getWorkerQueue().enqueue(wr);
+    return Py_BuildValue("i", 0);
+}
+
+/*
+ * Waits for the trainer to finish training on the batch given to startBatch.
+ */
+PyObject* finishBatch(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    WorkResult* res = model->getResultQueue().dequeue();
+    assert(res != NULL);
+    assert(res->getResultType() == WorkResult::BATCH_DONE);
+    
+    Cost& cost = res->getResults();
+    PyObject* dict = PyDict_New();
+    CostMap& costMap = cost.getCostMap();
+    for (CostMap::const_iterator it = costMap.begin(); it != costMap.end(); ++it) {
+        PyObject* v = PyList_New(0);
+        for (vector<double>::const_iterator iv = it->second->begin(); iv != it->second->end(); ++iv) {
+            PyObject* f = PyFloat_FromDouble(*iv);
+            PyList_Append(v, f);
+        }
+        PyDict_SetItemString(dict, it->first.c_str(), v);
+    }
+    
+    PyObject* retVal = Py_BuildValue("Ni", dict, cost.getNumCases());
+    delete res; // Deletes cost too
+    return retVal;
+}
+
+PyObject* checkGradients(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    PyListObject* data;
+    if (!PyArg_ParseTuple(args, "O!",
+        &PyList_Type, &data)) {
+        return NULL;
+    }
+    CPUData* cpuData = new CPUData((PyObject*)data);
+    
+    GradCheckWorker* wr = new GradCheckWorker(*model, *cpuData);
+    model->getWorkerQueue().enqueue(wr);
+    WorkResult* res = model->getResultQueue().dequeue();
+    assert(res != NULL);
+    assert(res->getResultType() == WorkResult::BATCH_DONE);
+    delete res;
+    return Py_BuildValue("i", 0);
+}
+
+/*
+ * Copies weight matrices from GPU to system memory.
+ */
+PyObject* syncWithHost(PyObject *self, PyObject *args) {
+    assert(model != NULL);
+    SyncWorker* wr = new SyncWorker(*model);
+    model->getWorkerQueue().enqueue(wr);
+    WorkResult* res = model->getResultQueue().dequeue();
+    assert(res != NULL);
+    assert(res->getResultType() == WorkResult::SYNC_DONE);
+    
+    delete res;
+    return Py_BuildValue("i", 0);
+}
+
diff --git a/src/quantizer.cu b/src/quantizer.cu
new file mode 100644
index 0000000..02d0574
--- /dev/null
+++ b/src/quantizer.cu
@@ -0,0 +1,65 @@
+#include <quantizer.cuh>
+
+using namespace std;
+
+/*=================
+ * Quantizer
+ * ================
+ */
+
+Quantizer& Quantizer::make(PyObject* lrsDict) {
+    string type = pyDictGetString(lrsDict, "type");
+    if (type == "default") {
+        return *new Quantizer();
+    } else if (type == "half") {
+        return *new HalfQuantizer();
+    }
+    throw string("Unknown quantizer type ") + type;
+}
+
+Quantizer::Quantizer() : _numRows(0), _numCols(0), _trans(false) {
+}
+
+Quantizer::~Quantizer() {
+}
+
+void Quantizer::quantize(NVMatrix& src, NVMatrix& tgt) {
+    _quantize(src, tgt);
+    _quantized = &tgt;
+    _numRows = src.getNumRows();
+    _numCols = src.getNumCols();
+    _trans = src.isTrans();
+}
+
+void Quantizer::dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput) {
+    _dequantize(tgt, scaleTarget, scaleOutput);
+    tgt.setTrans(_trans);
+    tgt.reshape(_numRows, _numCols);
+}
+
+void Quantizer::dequantize(NVMatrix& tgt) {
+    dequantize(tgt, 0, 1);
+}
+
+void Quantizer::_quantize(NVMatrix& src, NVMatrix& tgt) {
+    src.copy(tgt);
+}
+
+void Quantizer::_dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput) {
+    tgt.add(*_quantized, scaleTarget, scaleOutput);
+}
+
+/*=================
+ * HalfQuantizer
+ * ================
+ */
+HalfQuantizer::HalfQuantizer() : Quantizer() {
+}
+
+void HalfQuantizer::_quantize(NVMatrix& src, NVMatrix& tgt) {
+    convQuantizeHalf(src, tgt);
+}
+
+void HalfQuantizer::_dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput) {
+    convDequantizeHalf(*_quantized, tgt, _numRows * _numCols, scaleTarget, scaleOutput);
+}
diff --git a/src/softmaxtree.cu b/src/softmaxtree.cu
new file mode 100644
index 0000000..e686077
--- /dev/null
+++ b/src/softmaxtree.cu
@@ -0,0 +1,441 @@
+#include <softmaxtree.cuh>
+
+#include "layer.cuh"
+
+using namespace std;
+
+/*
+ * This launches a series of blocks for every node at a given depth.
+ * The "series" just spans the length of the weight vectors.
+ * 
+ * The operation performed is (loosely):
+ *     targets[d] := weights[d] + targets[d-1]
+ * 
+ * Block size: (y, x) = (1, B_X)
+ * Grid size:  (y, x) = (numNodesAtDepth, ceil(numFeatures/B_X))
+ * 
+ * weights: (numNodes, numFeatures)
+ * nodes:   numNodesAtDepth-length array of ushort2 
+ *          where x coordinate gives node idx and y coordinate gives parent idx
+ * targets: (numNodes, numFeatures)
+ * 
+ */
+template<int B_X,bool root>
+__global__ void kSoftmaxTreeFwd(float* weights, ushort2* nodes, float* targets, const int numFeatures) {
+    __shared__ ushort2 node; // node.x == node idx, node.y == parent node idx
+    const int depthNodeIdx = blockIdx.y;
+    const int featureOffset = blockIdx.x * B_X + threadIdx.x;
+    if (threadIdx.x == 0) {
+        node = nodes[depthNodeIdx];
+    }
+    __syncthreads();
+    weights += featureOffset;
+    targets += featureOffset;
+    // No loops for now
+    if (featureOffset < numFeatures) {
+        if (root) {
+            targets[node.x * numFeatures] = weights[numFeatures * node.x];
+        } else {
+            targets[node.x * numFeatures] = targets[node.y * numFeatures] + weights[numFeatures * node.x];
+        }
+    }
+}
+
+/*
+ * This launches a series of blocks for every node at a given height.
+ * The "series" just spans the length of the weight vectors.
+ * 
+ * The operation performed is (loosely):
+ *     grads[h] := sum_d{grads[h-1]}
+ * 
+ * Block size: (y, x) = (1, B_X)
+ * Grid size:  (y, x) = (numNodesAtHeight, ceil(numFeatures/B_X))
+ * 
+ * grads:   (numNodes, numFeatures)
+ * nodes:   numNodesAtHeight-length array of ushort2 
+ *          where x coordinate gives node idx and y coordinate gives NUMBER OF CHILDREN 
+ *                                                                   ^ (note difference with kSoftmaxTreeFwd)
+ * childrenPtrs: numNodesAtHeight-length array of pointers to children indices
+ * 
+ * The idea is to start one of these grids at each height, in sequence, starting
+ * from height = 1.
+ * 
+ * The rows 0-numLabels-1 of grads must already have the correct softmax gradients (these
+ * are the nodes at height = 0).
+ * 
+ */
+template<int B_X>
+__global__ void kSoftmaxTreeBwd(float* grads, ushort2* nodes, ushort** childrenPtrs, const int numFeatures) {
+    __shared__ ushort2 node; // node.x == node idx, node.y == parent node idx
+    __shared__ ushort* childrenPtr;
+    __shared__ ushort children[B_X];
+    const int heightNodeIdx = blockIdx.y;
+    const int featureOffset = blockIdx.x * B_X + threadIdx.x;
+    if (threadIdx.x == 0) {
+        node = nodes[heightNodeIdx];
+        childrenPtr = childrenPtrs[heightNodeIdx];
+    }
+    __syncthreads();
+    
+    grads += featureOffset;
+    const int nodeIdx = node.x;
+    const int numChildren = node.y;
+    
+    float nodeGrad = 0;
+    for (int c = 0; c < numChildren; c += B_X) {
+        
+        if (c + threadIdx.x < numChildren) {
+            children[threadIdx.x] = childrenPtr[c + threadIdx.x];
+        }
+        __syncthreads();
+        if (featureOffset < numFeatures) {
+            const int numChildrenLeft = min(B_X, numChildren - c);
+            for  (int cc = 0; cc < numChildrenLeft; ++cc) {
+                const int childIdx = children[cc];
+                //const int childIdx = childrenPtr[c + cc];
+                nodeGrad += grads[childIdx * numFeatures];
+            }
+        }
+        __syncthreads();
+    }
+    if (featureOffset < numFeatures) {
+        grads[nodeIdx * numFeatures] = nodeGrad;
+    }
+}
+
+/*
+ * 
+ * Block size: (y, x) = (1, B_X)
+ * Grid size:  (y, x) = (1, numNodes)
+ * 
+ * weights:     (numNodes, numFeatures)
+ * weightsInc:  (numNodes, numFeatures)
+ * weightsGrad: (numNodes, numFeatures)
+ * nodeSizes:   numNodes-array whose ith element gives number of leaves under
+ *              node with label i.
+ * 
+ * TODO: why did I make nodeSizes ushort? int would prolly be fine.
+ */
+template<int B_X>
+__global__ void kSoftmaxTreeUpdateWeights(float* weights, float* weightsInc, float* weightsGrad,
+                                          ushort* nodeSizes, const int numFeatures,
+                                          float eps, const float mom, float wc) {
+    __shared__ int nodeSize; // node.x == node idx, node.y == parent node idx
+    const int nodeIdx = blockIdx.x;
+    if (threadIdx.x == 0) {
+        nodeSize = nodeSizes[nodeIdx];
+    }
+    __syncthreads();
+    weights += nodeIdx * numFeatures;
+    weightsInc += nodeIdx * numFeatures;
+    weightsGrad += nodeIdx * numFeatures;
+    
+    // TODO: make these shared?
+//    eps *= sqrtf(nodeSize);
+    wc /= nodeSize;
+    eps /= nodeSize; // larger epsw at the leaves
+    
+    for (int f = threadIdx.x; f < numFeatures; f += B_X) {
+        const float inc = mom * weightsInc[f] + eps * (weightsGrad[f] - wc * weights[f]);
+        weightsInc[f] = inc;
+        weights[f] += inc;
+    }
+}
+
+/*
+ * ==================
+ * SoftmaxNode
+ * ==================
+ */
+int SoftmaxNode::setDistances(std::map<int, SoftmaxNodeV*>& nodeHeights,
+                              std::map<int, SoftmaxNodeV*>& nodeDepths) {
+    _height = 0;
+    for (SoftmaxNodeV::iterator it = _children.begin(); it != _children.end(); ++it) {
+        _height = max(_height, (*it)->setDistances(nodeHeights, nodeDepths));
+    }
+    _height += _children.size() > 0;
+    if (nodeHeights.count(_height) == 0) {
+        nodeHeights[_height] = new SoftmaxNodeV();
+    }
+    if (nodeDepths.count(_depth) == 0) {
+        nodeDepths[_depth] = new SoftmaxNodeV();
+    }
+
+    nodeHeights[_height]->push_back(this);
+    nodeDepths[_depth]->push_back(this);
+    return _height;
+}
+
+void SoftmaxNode::setNodeCounts(int &nodes, int& leaves) {
+    nodes++;
+    leaves += _children.size() == 0;
+    for (SoftmaxNodeV::iterator it = _children.begin(); it != _children.end(); ++it) {
+        (*it)->setNodeCounts(nodes, leaves);
+    }
+}
+
+int SoftmaxNode::setSizes(ushort* nodeSizes) {
+    _size = _children.size() == 0;
+    for (SoftmaxNodeV::iterator it = _children.begin(); it != _children.end(); ++it) {
+        _size += (*it)->setSizes(nodeSizes);
+    }
+    nodeSizes[_label] = _size;
+    return _size;
+}
+
+SoftmaxNode::SoftmaxNode(SoftmaxNode* parent, int label) 
+    : _parent(parent), _label(label), _size(0), _height(0) {
+    _depth = parent == NULL ? 0 : parent->getDepth() + 1;
+}
+
+SoftmaxNode::~SoftmaxNode() {
+    for (SoftmaxNodeV::iterator it = _children.begin(); it != _children.end(); ++it) {
+        delete *it;
+    }
+}
+
+int SoftmaxNode::getDepth() const {
+    return _depth;
+}
+
+int SoftmaxNode::getHeight() const {
+    return _height;
+}
+
+int SoftmaxNode::getSize() const {
+    return _size;
+}
+
+int SoftmaxNode::getLabel() const {
+    return _label;
+}
+
+SoftmaxNode* SoftmaxNode::getParent() {
+    return _parent;
+}
+
+SoftmaxNodeV& SoftmaxNode::getChildren() {
+    return _children;
+}
+
+SoftmaxNode& SoftmaxNode::addChild(int label) {
+    _children.push_back(new SoftmaxNode(this, label));
+    return *_children.back();
+}
+
+/*
+ * ==================
+ * SoftmaxTree
+ * ==================
+ */
+SoftmaxTree::SoftmaxTree(int rootLabel)  {
+    _root = new SoftmaxNode(NULL, rootLabel);
+    _nodeSizes = NULL;
+    _numNodes = 0;
+    _numLeaves = 0;
+}
+
+SoftmaxTree::~SoftmaxTree() {
+    checkCudaErrors(cudaFreeHost(_nodeSizes));
+    
+    for (map<int, SoftmaxNodeV*>::iterator it = _nodeHeights.begin(); it != _nodeHeights.end(); ++it) {
+        int height = it->first;
+        SoftmaxNodeV& nodes = *it->second;
+        for (int n = 0; n < nodes.size(); n++) {
+            checkCudaErrors(cudaFreeHost(_nodeChildMeta[height][n]));
+        }
+        checkCudaErrors(cudaFreeHost(_nodeChildMeta[height]));
+        checkCudaErrors(cudaFreeHost(_nodeChildMeta[height]));
+        delete &nodes;
+    }
+    for (map<int, SoftmaxNodeV*>::iterator it = _nodeDepths.begin(); it != _nodeDepths.end(); ++it) {
+        SoftmaxNodeV& nodes = *it->second;
+        int depth = it->first;
+        checkCudaErrors(cudaFreeHost(_nodeFwdMeta[depth]));
+        delete &nodes;
+    }
+    
+    delete _root;
+}
+
+void SoftmaxTree::setFwdMeta() {
+    for (map<int, SoftmaxNodeV*>::iterator it = _nodeDepths.begin(); it != _nodeDepths.end(); ++it) {
+        SoftmaxNodeV& nodes = *it->second;
+        ushort2* meta;
+        checkCudaErrors(cudaHostAlloc(&meta, sizeof(ushort2) * nodes.size(), cudaHostAllocPortable));
+        int depth = it->first;
+        _nodeFwdMeta[depth] = meta;
+        for (int n = 0; n < nodes.size(); n++) {
+            meta[n].x = nodes[n]->getLabel();
+            // Setting the root to have parent 0 is ok because the fwd kernel won't
+            // query this anyway when root == true.
+            meta[n].y = nodes[n]->getParent() == NULL ? 0 : nodes[n]->getParent()->getLabel();
+        }
+    }
+}
+
+void SoftmaxTree::setBwdMeta() {
+    for (map<int, SoftmaxNodeV*>::iterator it = _nodeHeights.begin(); it != _nodeHeights.end(); ++it) {
+        SoftmaxNodeV& nodes = *it->second;
+        ushort2* meta;
+        ushort** childMeta;
+        checkCudaErrors(cudaHostAlloc(&meta, sizeof(ushort2) * nodes.size(), cudaHostAllocPortable));
+        checkCudaErrors(cudaHostAlloc(&childMeta, sizeof(ushort*) * nodes.size(), cudaHostAllocPortable));
+        int height = it->first;
+        _nodeBwdMeta[height] = meta;
+        _nodeChildMeta[height] = childMeta;
+        for (int n = 0; n < nodes.size(); n++) {
+            checkCudaErrors(cudaHostAlloc(&childMeta[n], sizeof(ushort) * nodes[n]->getChildren().size(), cudaHostAllocPortable));
+            for (int c = 0; c < nodes[n]->getChildren().size(); c++) {
+                childMeta[n][c] = nodes[n]->getChildren()[c]->getLabel();
+            }
+            meta[n].x = nodes[n]->getLabel();
+            meta[n].y = nodes[n]->getChildren().size();
+        }
+    }
+}
+
+void SoftmaxTree::setDistances() {
+    _nodeHeights.clear();
+    _nodeDepths.clear();
+    _root->setDistances(_nodeHeights, _nodeDepths);
+}
+
+void SoftmaxTree::setNodeCounts() {
+    _numNodes = 0;
+    _numLeaves = 0;
+    _root->setNodeCounts(_numNodes, _numLeaves);
+}
+
+void SoftmaxTree::setNodeSizes() {
+    assert(_numLeaves > 0);
+    checkCudaErrors(cudaHostAlloc(&_nodeSizes, sizeof(ushort) * _numNodes, cudaHostAllocPortable));
+    _root->setSizes(_nodeSizes);
+}
+
+void SoftmaxTree::finalize() {
+    setDistances();
+    setNodeCounts();
+    setNodeSizes();
+    setFwdMeta();
+    setBwdMeta();
+}
+
+SoftmaxNode& SoftmaxTree::getRoot() {
+    return *_root;
+}
+
+SoftmaxNodeV& SoftmaxTree::getNodesAtHeight(int height) {
+    return *_nodeHeights[height];
+}
+
+SoftmaxNodeV& SoftmaxTree::getNodesAtDepth(int depth) {
+    return *_nodeDepths[depth];
+}
+
+int SoftmaxTree::getHeight() const {
+    return _root->getHeight();
+}
+
+/*
+ * A tree with only a root is taken to have depth 0.
+ */
+int SoftmaxTree::getDepth() const {
+    return _nodeDepths.size() - 1;
+}
+
+int SoftmaxTree::getNumLeaves() const {
+    return _numLeaves;
+}
+
+int SoftmaxTree::getNumNodes() const {
+    return _numNodes;
+}
+
+/*
+* offsets: (numNodes, numFeatures)
+* targets: (numNodes, numFeatures) 
+*/
+void SoftmaxTree::makeWeights(NVMatrix& offsets, NVMatrix& targets) {
+    preprocess(offsets);
+    preprocess(targets);
+    assert(offsets.getNumRows() == _numNodes);
+    assert(targets.isSameDims(offsets));
+    int numFeatures = offsets.getNumCols();
+    dim3 threads = dim3(256); // 256 seems to work best on dummy binary tree
+    dim3 blocks = dim3(DIVUP(numFeatures, 256), 1); // Only the root is at depth 0
+    cudaFuncSetCacheConfig(kSoftmaxTreeFwd<256, true>, cudaFuncCachePreferL1);
+    cudaFuncSetCacheConfig(kSoftmaxTreeFwd<256, false>, cudaFuncCachePreferL1);
+    kSoftmaxTreeFwd<256, true><<<blocks, threads>>>(offsets.getDevData(), _nodeFwdMeta[0], targets.getDevData(), numFeatures);
+    getLastCudaError("kSoftmaxTreeFwd: kernel execution failed");
+    for (int d = 1; d <= getDepth(); d++) {
+        blocks = dim3(DIVUP(numFeatures, 256), _nodeDepths[d]->size());
+        kSoftmaxTreeFwd<256, false><<<blocks, threads>>>(offsets.getDevData(), _nodeFwdMeta[d], targets.getDevData(), numFeatures);
+        getLastCudaError("kSoftmaxTreeFwd: kernel execution failed");
+    }
+    
+    postprocess(offsets);
+    postprocess(targets);
+}
+
+/*
+* grads: (numNodes, numFeatures)
+* 
+* The idea is that grads contains gradients for the leaves 
+* (i.e. the first numLabels rows), so this routine will
+* distribute them up the tree. 
+* 
+*/
+void SoftmaxTree::distributeGradients(NVMatrix& grads) {
+    preprocess(grads);
+    assert(grads.getNumRows() == _numNodes);
+    int numFeatures = grads.getNumCols();
+    // The leaves (nodes at height = 0) already have gradients computed.
+    // So start at the nodes at height = 1.
+    dim3 threads = dim3(512); // this block size works best :/
+    cudaFuncSetCacheConfig(kSoftmaxTreeBwd<512>, cudaFuncCachePreferL1);
+    for (int h = 1; h <= getHeight(); ++h) {
+        dim3 blocks = dim3(DIVUP(numFeatures, 512), _nodeHeights[h]->size());
+        kSoftmaxTreeBwd<512><<<blocks, threads>>>(grads.getDevData(), _nodeBwdMeta[h], _nodeChildMeta[h],  numFeatures);
+        getLastCudaError("kSoftmaxTreeBwd: kernel execution failed");
+    }
+    postprocess(grads);
+}
+
+/*
+ * inc := mom * inc - wc * epsW * weight + epsW * grad
+ * weight := weight + inc
+ * 
+ * weights: (numNodes, numFeatures)
+ * incs:    (numNodes, numFeatures)
+ * grads:   (numNodes , numFeatures)
+ */
+void SoftmaxTree::updateWeights(NVMatrix& weights, NVMatrix& incs, NVMatrix& grads, float epsWBase, float mom, float wcBase) {
+    preprocess(weights);
+    preprocess(incs);
+    preprocess(grads);
+    
+    assert(grads.getNumRows() == _numNodes);
+    assert(grads.isSameDims(incs));
+    assert(grads.isSameDims(weights));
+    int numFeatures = grads.getNumCols();
+    dim3 threads = dim3(512);
+    dim3 blocks = dim3(_numNodes);
+    cudaFuncSetCacheConfig(kSoftmaxTreeUpdateWeights<512>, cudaFuncCachePreferL1);
+    kSoftmaxTreeUpdateWeights<512><<<blocks, threads>>>(weights.getDevData(), incs.getDevData(), grads.getDevData(),
+                                                        _nodeSizes, numFeatures, epsWBase, mom, wcBase);
+    getLastCudaError("kSoftmaxTreeUpdateWeights: kernel execution failed");
+    weights.transpose();
+    incs.transpose();
+    grads.transpose();
+}
+
+void SoftmaxTree::preprocess(NVMatrix& inp) {
+    inp.transpose();
+    assert(!inp.isTrans());
+    assert(inp.isContiguous());
+}
+
+void SoftmaxTree::postprocess(NVMatrix& inp) {
+    inp.transpose();
+}
diff --git a/src/test.cu b/src/test.cu
new file mode 100644
index 0000000..dde2eb1
--- /dev/null
+++ b/src/test.cu
@@ -0,0 +1,378 @@
+#include <iostream>
+#include <stdlib.h>
+#include <vector>
+#include <set>
+#include <test.cuh>
+#include <layer_kernels.cuh>
+#include <multisoftmax.h>
+#include <cpuCNN.cuh>
+
+static StopWatchInterface *timer = NULL;
+using namespace std;
+void init_tests(int boardNum) {
+	cudaSetDevice(boardNum > -1 ? boardNum : 0);
+//    cublasInit();
+	NVMatrix::initCublas();
+    NVMatrix::initRandom(7);
+    sdkCreateTimer(&timer);
+}
+
+void compareResults(Matrix& cpu, NVMatrix& gpu, const char* matrixName) {
+    Matrix gpuOnCPU(cpu);
+    gpu.copyToHost(gpuOnCPU);
+    gpuOnCPU.subtract(cpu);
+    gpuOnCPU.apply(Matrix::ABS);
+    printf("Max diff between CPU/GPU matrices %s: %.6f\n", matrixName, gpuOnCPU.max());
+}
+
+void test_blattice() {
+    printf("===============================\n");
+    printf("test_blattice\n");
+    printf("===============================\n");
+
+    int numCases = 2;
+    int numOut = 32;
+    int setSize = 3;
+
+    cout << "numCases: " << numCases << endl;
+    cout << "numOut: " << numOut << endl;
+    cout << "setSize: " << setSize << endl;
+    NVMatrix nvEnergies(numCases, numOut);
+    Matrix energies(numCases, numOut);
+    Matrix bLattice(numOut, numCases * setSize);
+
+    nvEnergies.randomizeUniform();
+    nvEnergies.copyToHost(energies);
+    //energies.randomizeUniform();
+    bLattice.apply(Matrix::ZERO); // for now
+    
+    Matrix &enMax = energies.max(1);
+    energies.addVector(enMax, -1);
+    
+    nvEnergies.copyFromHost(energies);
+    NVMatrix nvBLattice(bLattice, true);
+
+    sdkResetTimer(&timer);
+    sdkStartTimer(&timer);
+    
+    MSMBackward(nvEnergies, nvBLattice, setSize);
+
+    cudaThreadSynchronize();
+    sdkStopTimer(&timer);
+    
+    printf("Energies: \n");
+    nvEnergies.print(10, 5);
+    
+    printf("GPU (partial) result:\n");
+    nvBLattice.print(0, 5, 0, 5);
+    printf("GPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+}
+
+//void test_multiSoftmaxCPU() {
+//    printf("===============================\n");
+//    printf("test_multiSoftmaxCPU\n");
+//    printf("===============================\n");
+//
+//    int numCases = 2;
+//    int numOut = 5;
+//    int setSize = 3;
+//    
+////    int numCases = 128;
+////    int numOut = 1000;
+////    int setSize = 5;
+//
+//    cout << "numCases: " << numCases << endl;
+//    cout << "numOut: " << numOut << endl;
+//    cout << "setSize: " << setSize << endl;
+//    
+//    Matrix energies(numCases, numOut);
+//    Matrix B(numOut + 1, setSize + 1);
+//    Matrix probs(energies);
+//    energies.randomizeUniform();
+//    probs.apply(Matrix::ZERO); // for now
+//    
+//    Matrix &enMax = energies.max(1);
+//    energies.addVector(enMax, -1);
+//    B.apply(Matrix::ZERO);
+//
+//    sdkResetTimer(&timer);
+//    sdkStartTimer(&timer);
+//
+//    MultiSoftmaxCPU_T(energies, B, probs, setSize, -1);
+//
+//    cudaThreadSynchronize();
+//    sdkStopTimer(&timer);
+//    
+//    printf("Energies: \n");
+//    energies.print(10, 5);
+//    
+//    printf("CPU (partial) result:\n");
+//    probs.print(0, 5, 0, 5);
+//    printf("CPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+//}
+
+void test_multiSoftmaxCPU_parallel() {
+    printf("===============================\n");
+    printf("test_multiSoftmaxCPU_parallel\n");
+    printf("===============================\n");
+
+    int workers = 8;
+    
+    int numCases = 2;
+    int numOut = 5;
+    int setSize = 2;
+    
+//    int numCases = 128;
+//    int numOut = 1000;
+//    int setSize = 5;
+
+    cout << "workers: " << workers << endl;
+    cout << "numCases: " << numCases << endl;
+    cout << "numOut: " << numOut << endl;
+    cout << "setSize: " << setSize << endl;
+    
+    NVMatrix nvEnergies(numCases, numOut);
+    Matrix energies(numCases, numOut);
+    vector<Matrix*> B;
+    Matrix probs(energies);
+    Matrix fixed(numCases, 1);
+    nvEnergies.randomizeUniform();
+    nvEnergies.copyToHost(energies);
+    //energies.randomizeUniform();
+    probs.apply(Matrix::ZERO); // for now
+    
+    Matrix &enMax = energies.max(1);
+    energies.addVector(enMax, -1);
+    
+    fixed.apply(Matrix::ONE);
+    fixed.scale(2);
+    
+    for (int i = 0; i < workers; i++) {
+        B.push_back(new Matrix(numOut + 1, setSize + 1));
+        B[i]->apply(Matrix::ONE);
+        B[i]->scale(-INF);
+    }
+
+    sdkResetTimer(&timer);
+    sdkStartTimer(&timer);
+
+    MultiSoftmaxCPU_T_parallel(energies, B, probs, fixed, setSize, true);
+
+    cudaThreadSynchronize();
+    sdkStopTimer(&timer);
+    
+    printf("Energies: \n");
+    energies.print(10, 10);
+    
+    printf("CPU (partial) result:\n");
+    probs.print(0, 5, 0, 10);
+    printf("CPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+}
+
+SoftmaxTree* makeDummyTree(int depth) {
+    int numNodes = (1 << (depth + 1)) - 1;
+    int numLeaves = (numNodes + 1) / 2;
+    
+    int idx = numNodes - 1;
+    SoftmaxTree* tree = new SoftmaxTree(idx--);
+    vector<SoftmaxNode*> prevLevel;
+    
+    prevLevel.push_back(&tree->getRoot());
+    while (idx >= 0) {
+        int sz = prevLevel.size();
+        for (int i = 0; i < sz; i++) {
+            SoftmaxNode& node = *prevLevel[0];
+            SoftmaxNode& child1 = node.addChild(idx--);
+            SoftmaxNode& child2 = node.addChild(idx--);
+            prevLevel.push_back(&child1);
+            prevLevel.push_back(&child2);
+            prevLevel.erase(prevLevel.begin());
+        }
+    }
+    tree->finalize();
+    assert(tree->getNumLeaves() == numLeaves);
+    assert(tree->getNumNodes() == numNodes);
+    return tree;
+}
+
+void test_sftree_fwd() {
+    printf("===============================\n");
+    printf("test_sftree_fwd\n");
+    printf("===============================\n");
+
+    int numFeatures = 6*6*128;
+    int depth = 10;
+    SoftmaxTree* tree = makeDummyTree(depth);
+    cout << "numFeatures: " << numFeatures << endl;
+    cout << "depth: " << depth << endl;
+    cout << "numNodes: " << tree->getNumNodes() << endl;
+    cout << "numLabels: " << tree->getNumLeaves() << endl;
+    
+    Matrix weights(tree->getNumNodes(), numFeatures);
+    Matrix targets(tree->getNumNodes(), numFeatures);
+    NVMatrix nvWeights(tree->getNumNodes(), numFeatures);
+    NVMatrix nvTargets(tree->getNumNodes(), numFeatures);
+
+    weights.randomizeUniform();
+
+    nvWeights.copyFromHost(weights);
+    
+    sdkResetTimer(&timer);
+    sdkStartTimer(&timer);
+
+    cpuSoftmaxTreeFwd(weights.getData(), targets.getData(), numFeatures, *tree);
+
+    sdkStopTimer(&timer);
+    printf("CPU (partial) result:\n");
+    targets.print(0, 7, 0, 5);
+    printf("CPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+
+    sdkResetTimer(&timer);
+    cudaDeviceSynchronize();
+    
+    nvWeights.transpose();
+    nvTargets.transpose();
+    sdkStartTimer(&timer);
+    
+    tree->makeWeights(nvWeights, nvTargets);
+    cudaDeviceSynchronize();
+    sdkStopTimer(&timer);
+    
+    nvWeights.transpose();
+    nvTargets.transpose();
+    printf("GPU (partial) result:\n");
+    nvTargets.print(0, 7, 0, 5);
+    printf("GPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+    compareResults(targets, nvTargets, "targets");
+}
+
+void test_sftree_bwd() {
+    printf("===============================\n");
+    printf("test_sftree_bwd\n");
+    printf("===============================\n");
+
+    int numFeatures = 6*6*128;
+    int depth = 10;
+    SoftmaxTree* tree = makeDummyTree(depth);
+    cout << "numFeatures: " << numFeatures << endl;
+    cout << "depth: " << depth << endl;
+    cout << "numNodes: " << tree->getNumNodes() << endl;
+    cout << "numLabels: " << tree->getNumLeaves() << endl;
+    
+    Matrix grads(tree->getNumNodes(), numFeatures);
+    NVMatrix nvGrads(tree->getNumNodes(), numFeatures);
+
+    grads.randomizeUniform();
+
+    nvGrads.copyFromHost(grads);
+    
+    sdkResetTimer(&timer);
+    sdkStartTimer(&timer);
+
+    cpuSoftmaxTreeBwd(grads.getData(), numFeatures, *tree);
+
+    sdkStopTimer(&timer);
+    printf("CPU (partial) result:\n");
+    grads.print(0, 7, 0, 5);
+    printf("CPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+
+    sdkResetTimer(&timer);
+    cudaDeviceSynchronize();
+    
+    nvGrads.transpose();
+    sdkStartTimer(&timer);
+    
+    tree->distributeGradients(nvGrads);
+    cudaDeviceSynchronize();
+    sdkStopTimer(&timer);
+    
+    nvGrads.transpose();
+    printf("GPU (partial) result:\n");
+    nvGrads.print(0, 7, 0, 5);
+    printf("GPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+    compareResults(grads, nvGrads, "grads");
+}
+
+void test_sftree_update() {
+    printf("===============================\n");
+    printf("test_sftree_update\n");
+    printf("===============================\n");
+
+    float eps = 0.001, wc = 0.005, mom = 0.9;
+    int numFeatures = 6*6*128;
+    int depth = 10;
+    SoftmaxTree* tree = makeDummyTree(depth);
+    cout << "numFeatures: " << numFeatures << endl;
+    cout << "depth: " << depth << endl;
+    cout << "numNodes: " << tree->getNumNodes() << endl;
+    cout << "numLabels: " << tree->getNumLeaves() << endl;
+    
+    Matrix grads(tree->getNumNodes(), numFeatures);
+    Matrix weights(tree->getNumNodes(), numFeatures);
+    Matrix incs(tree->getNumNodes(), numFeatures);
+    NVMatrix nvGrads(tree->getNumNodes(), numFeatures);
+    NVMatrix nvWeights(tree->getNumNodes(), numFeatures);
+    NVMatrix nvIncs(tree->getNumNodes(), numFeatures);
+
+    grads.randomizeUniform();
+    weights.randomizeUniform();
+    incs.randomizeUniform();
+
+    nvGrads.copyFromHost(grads);
+    nvWeights.copyFromHost(weights);
+    nvIncs.copyFromHost(incs);
+    
+    sdkResetTimer(&timer);
+    sdkStartTimer(&timer);
+
+    cpuSoftmaxTreeUpdateWeights(weights.getData(), incs.getData(), grads.getData(), numFeatures, eps, mom, wc, *tree);
+
+    sdkStopTimer(&timer);
+    printf("CPU (partial) result:\n");
+    weights.print(0, 7, 0, 5);
+    printf("CPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+
+    sdkResetTimer(&timer);
+    cudaDeviceSynchronize();
+    
+    nvGrads.transpose();
+    nvWeights.transpose();
+    nvIncs.transpose();
+    sdkStartTimer(&timer);
+    
+    tree->updateWeights(nvWeights, nvIncs, nvGrads, eps, mom, wc);
+    cudaDeviceSynchronize();
+    sdkStopTimer(&timer);
+    
+    nvGrads.transpose();
+    nvWeights.transpose();
+    nvIncs.transpose();
+    printf("GPU (partial) result:\n");
+    nvWeights.print(0, 7, 0, 5);
+    printf("GPU time: %.6f msec\n", sdkGetTimerValue(&timer));
+    compareResults(weights, nvWeights, "weights");
+    compareResults(incs, nvIncs, "incs");
+}
+
+int main(int argc, char** argv) {
+    
+    int boardNum = get_board_lock();
+    if (boardNum == GPU_LOCK_NO_BOARD) {
+        printf("No free GPU boards!\n");
+        exit(EXIT_FAILURE);
+    } else if(boardNum == GPU_LOCK_NO_SCRIPT) {
+        printf("Running on default board.\n");
+    } else {
+        printf("Running on board %d\n", boardNum);
+    }
+    init_tests(boardNum);
+    
+//    test_blattice();
+//    test_multiSoftmaxCPU();
+//    test_multiSoftmaxCPU_parallel();
+//    test_sftree_fwd();
+//    test_sftree_bwd();
+//    test_mdiag();
+//    test_mdiagGrad();
+    return 0;
+}
diff --git a/src/util.cu b/src/util.cu
new file mode 100644
index 0000000..bf4f712
--- /dev/null
+++ b/src/util.cu
@@ -0,0 +1,124 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <util.cuh>
+
+using namespace std;
+
+stringv* getStringV(PyObject* pyList) {
+    if (pyList == NULL) {
+        return NULL;
+    }
+    stringv* vec = new stringv(); 
+    for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
+        vec->push_back(string(PyString_AS_STRING(PyList_GET_ITEM(pyList, i))));
+    }
+    return vec;
+}
+
+floatv* getFloatV(PyObject* pyList) {
+    if (pyList == NULL) {
+        return NULL;
+    }
+    floatv* vec = new floatv(); 
+    for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
+        vec->push_back(PyFloat_AS_DOUBLE(PyList_GET_ITEM(pyList, i)));
+    }
+    return vec;
+}
+
+intv* getIntV(PyObject* pyList) {
+    if (pyList == NULL) {
+        return NULL;
+    }
+    intv* vec = new intv(); 
+    for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
+        vec->push_back(PyInt_AS_LONG(PyList_GET_ITEM(pyList, i)));
+    }
+    return vec;
+}
+
+int* getIntA(PyObject* pyList) {
+    if (pyList == NULL) {
+        return NULL;
+    }
+    int* arr = new int[PyList_GET_SIZE(pyList)];
+    for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
+        arr[i] = PyInt_AS_LONG(PyList_GET_ITEM(pyList, i));
+    }
+    return arr;
+}
+
+MatrixV* getMatrixV(PyObject* pyList) {
+    return getMatrixV(pyList, PyList_GET_SIZE(pyList));
+}
+
+MatrixV* getMatrixV(PyObject* pyList, int len) {
+    if (pyList == NULL) {
+        return NULL;
+    }
+    MatrixV* vec = new MatrixV(); 
+    for (int i = 0; i < len; i++) {
+        vec->push_back(new Matrix((PyArrayObject*)PyList_GET_ITEM(pyList, i)));
+    }
+    return vec;
+}
+
+int pyDictGetInt(PyObject* dict, const char* key) {
+    return PyInt_AS_LONG(PyDict_GetItemString(dict, key));
+}
+
+intv* pyDictGetIntV(PyObject* dict, const char* key) {
+    return getIntV(PyDict_GetItemString(dict, key));
+}
+
+int* pyDictGetIntA(PyObject* dict, const char* key) {
+    return getIntA(PyDict_GetItemString(dict, key));
+}
+
+string pyDictGetString(PyObject* dict, const char* key) {
+    return string(PyString_AS_STRING(PyDict_GetItemString(dict, key)));
+}
+
+float pyDictGetFloat(PyObject* dict, const char* key) {
+    return PyFloat_AS_DOUBLE(PyDict_GetItemString(dict, key));
+}
+
+floatv* pyDictGetFloatV(PyObject* dict, const char* key) {
+    return getFloatV(PyDict_GetItemString(dict, key));
+}
+
+Matrix* pyDictGetMatrix(PyObject* dict, const char* key) {
+    return new Matrix((PyArrayObject*)PyDict_GetItemString(dict, key));
+}
+
+MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key) {
+    return getMatrixV(PyDict_GetItemString(dict, key));
+}
+
+stringv* pyDictGetStringV(PyObject* dict, const char* key) {
+    return getStringV(PyDict_GetItemString(dict, key));
+}
\ No newline at end of file
diff --git a/src/weights.cu b/src/weights.cu
new file mode 100644
index 0000000..7104536
--- /dev/null
+++ b/src/weights.cu
@@ -0,0 +1,378 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <weights.cuh>
+#include <softmaxtree.cuh>
+#include <lr.cuh>
+#include "worker.cuh"
+
+NVMatrix& Weights::operator*() const {
+    return getW();
+}
+
+Weights::Weights(Weights& srcWeights, LearningRateSchedule& lrs)
+    : _srcWeights(&srcWeights), _lrs(&lrs), _wc(0), _wball(0), _onGPU(false), _numUpdates(0),
+        _weights(NULL), _weightsInc(NULL), _weightsGrad(NULL), _cleanup(false) {
+    _hWeights = &srcWeights.getCPUW();
+    _hWeightsInc = &srcWeights.getCPUWInc();
+    _mom = srcWeights.getMom();
+    _useGrad = srcWeights.isUseGrad();   
+    _superEps = srcWeights.getSuperEps();
+}
+
+Weights::Weights(Matrix& hWeights, Matrix& hWeightsInc, LearningRateSchedule& lrs, float wc,
+                 float wball, float mom, float superEps, bool useGrad, bool cleanup)
+    : _srcWeights(NULL), _hWeights(&hWeights), _hWeightsInc(&hWeightsInc), _numUpdates(0),
+        _lrs(&lrs), _wc(wc), _wball(wball), _mom(mom), _useGrad(useGrad), _superEps(superEps),
+        _onGPU(false), _weights(NULL),_weightsInc(NULL), _weightsGrad(NULL), _cleanup(cleanup) {
+    assert(_superEps <= 0 || _useGrad); // superWeights ==> useGrad
+}
+
+Weights::~Weights() {
+	delete _lrs;
+    if (_cleanup) {
+        delete _hWeights;
+        delete _hWeightsInc;
+        if (_srcWeights == NULL) {
+            delete _weights;
+            delete _weightsInc;
+            delete _weightsGrad;
+        }
+    }
+}
+
+NVMatrix& Weights::getW() const {
+    assert(_onGPU);
+    return *_weights;
+}
+
+NVMatrix& Weights::getInc() const {
+    assert(_onGPU);
+    return *_weightsInc;
+}
+
+NVMatrix& Weights::getGrad() const {
+    assert(_onGPU);
+    return _useGrad ? *_weightsGrad : *_weightsInc;
+}
+
+Matrix& Weights::getCPUW() const {
+    return *_hWeights;
+}
+
+Matrix& Weights::getCPUWInc() const {
+    return *_hWeightsInc;
+}
+
+int Weights::getNumRows() const {
+    return _hWeights->getNumRows();
+}
+
+int Weights::getNumCols() const {
+    return _hWeights->getNumCols();
+}
+
+void Weights::copyToCPU() {
+    if (_srcWeights == NULL) {
+        assert(_onGPU);
+        _weights->copyToHost(*_hWeights);
+        _weightsInc->copyToHost(*_hWeightsInc);
+    }
+}
+
+// This function is assumed to be called in the order in which the layers
+// were defined
+void Weights::copyToGPU() {
+    assert(!_onGPU);
+    if (_srcWeights == NULL) {
+        _weights = _weights == NULL ? new NVMatrix() : _weights;
+        _weightsInc = _weightsInc == NULL ? new NVMatrix() : _weightsInc;
+        _weights->copyFromHost(*_hWeights, true);
+        _weightsInc->copyFromHost(*_hWeightsInc, true);
+        _weightsGrad = _useGrad ? (_weightsGrad == NULL ? new NVMatrix(*_weights) : _weightsGrad) : NULL;
+        _weightsGradAvg = _superEps > 0 ? new NVMatrix() : NULL;
+        _weightsGrad2Avg = _superEps > 0 ? new NVMatrix() : NULL;
+    } else {
+        _weights = _srcWeights->_weights;
+        _weightsInc = _srcWeights->_weightsInc;
+        _weightsGrad = _srcWeights->_weightsGrad;
+    }
+    _onGPU = true;
+}
+
+#define SUPERMOM_THREADS    256
+#define SUPERMOM_BLOCKS_MAX 4096
+
+/*
+ * V = eps * g / (G2 - G^2 + superEps)^.5 + mom * V
+ */
+__global__ void superMomUpdate(float* V, float* g, float* G, float* G2,
+                               const float eps, const float mom, const float superEps, const int numElements) {
+    const int tidx = blockIdx.x * SUPERMOM_THREADS + threadIdx.x;
+    
+    for (int t = tidx; t < numElements; t += gridDim.x * SUPERMOM_THREADS) {
+        V[t] = /*mom*/0.9 * V[t] + eps * __fdividef(g[t], sqrtf(G2[t] - G[t] + superEps));
+    }
+}
+
+// When _useGrad is false, weightsInc is assumed to contain the 
+// entire, properly scaled weight increment.
+// OTHERWISE, scale your gradient by 1 / numCases only.
+// The scaling by epsW will be done in this routine.
+void Weights::update(float progress) {
+    // Only true owner of weights updates
+    if (_srcWeights == NULL && _lrs->getBaseRate() > 0) {
+        assert(_onGPU);
+        if (_superEps <= 0) {
+            if (_useGrad) {
+                _weightsInc->add(*_weightsGrad, _mom, _lrs->getRate(progress));
+            }
+        } else {
+            if (!_weightsGradAvg->isSameDims(*_weightsGrad)) {
+                _weightsGradAvg->resize(*_weightsGrad);
+                _weightsGrad2Avg->resize(*_weightsGrad);
+                _weightsGradAvg->apply(NVMatrixOps::Zero());
+                _weightsGrad2Avg->apply(NVMatrixOps::Zero());
+            }
+            _weightsGradAvg->add(*_weightsGrad, _mom, 1 - _mom);
+            _weightsGrad2Avg->applyBinary(Grad2AvgOperator(_mom), *_weightsGrad);
+            // Geoff version
+
+            // Make sure all matrices are contiguous
+            assert(_weightsGrad->isContiguous());
+            assert(_weightsGradAvg->isContiguous());
+            assert(_weightsGrad2Avg->isContiguous());
+            assert(_weightsInc->isContiguous());
+            // Make sure they all have the same transposedness
+            assert(_weightsGrad->isTrans() == _weightsGradAvg->isTrans());
+            assert(_weightsGradAvg->isTrans() == _weightsGrad2Avg->isTrans());
+            assert(_weightsGrad2Avg->isTrans() == _weightsInc->isTrans());
+            // Make sure they all have the same sizes
+            assert(_weightsGrad->isSameDims(*_weightsGradAvg));
+            assert(_weightsGradAvg->isSameDims(*_weightsGrad2Avg));
+            assert(_weightsGrad2Avg->isSameDims(*_weightsInc));
+            
+            int numElements = _weights->getNumElements();
+            dim3 blocks(std::min(DIVUP(numElements, SUPERMOM_THREADS), SUPERMOM_BLOCKS_MAX));
+            dim3 threads(SUPERMOM_THREADS);
+            //float super =  _superEps + 1000000*_weightsGrad2Avg->sum() / numElements;
+            //printf("super: %f\n", super);
+            superMomUpdate<<<blocks, threads>>>(_weightsInc->getDevData(), _weightsGrad->getDevData(),
+                                                _weightsGradAvg->getDevData(), _weightsGrad2Avg->getDevData(),
+                                                _lrs->getRate(progress), _mom, _superEps, numElements);
+            getLastCudaError("superMomUpdate: Kernel execution failed");
+            //_weightsInc->print(4,4);
+            //_weightsGrad2Avg->print(5,5);exit(0);
+            // Ilya version
+        }
+        if (_wc > 0) {
+            _weightsInc->add(*_weights, -_wc * _lrs->getRate(progress));
+        }
+        _weights->add(*_weightsInc);
+        _numUpdates = 0;
+    }
+}
+
+int Weights::incNumUpdates() {
+    if (_srcWeights != NULL) {
+        return _srcWeights->incNumUpdates();
+    }
+    return _numUpdates++;
+}
+
+// Returns the number of times a gradient has been computed for this
+// weight matrix during the current pass (interval between two calls of update())
+// through the net. This number will only be greater than 1 if this weight matrix
+// is *shared* by multiple layers in the net.
+int Weights::getNumUpdates() const {
+    if (_srcWeights != NULL) {
+        return _srcWeights->getNumUpdates();
+    }
+    return _numUpdates;
+}
+
+float Weights::getEps(float progress) const {
+    return _lrs->getRate(progress);
+}
+
+float Weights::getMom() const {
+    return _mom;
+}
+
+float Weights::getWC() const {
+    return _wc;
+}
+
+float Weights::getWBall() const {
+    return _wball;
+}
+
+bool Weights::isUseGrad() const { // is good grammar
+    return _useGrad;
+}
+
+bool Weights::isOwner() const {
+    return _srcWeights == NULL;
+}
+
+float Weights::getSuperEps() const {
+    return _superEps;
+}
+
+LearningRateSchedule& Weights::getLearningRateSchedule() const {
+	return *_lrs;
+}
+
+/* 
+ * ===============
+ * TreeWeights
+ * ===============
+ */
+TreeWeights::TreeWeights(SoftmaxTree& tree, Matrix& hWeights, Matrix& hWeightsInc, LearningRateSchedule& lrs, float wcBase, float mom)
+ : _tree(&tree), Weights(hWeights, hWeightsInc, lrs, wcBase, 0, mom, 0, true) {
+    assert(hWeights.isTrans());
+    assert(hWeightsInc.isTrans());
+}
+
+NVMatrix& TreeWeights::getW() const {
+    return *_leafWeights;
+}
+
+NVMatrix& TreeWeights::getInc() const {
+    return *_leafInc;
+}
+
+NVMatrix& TreeWeights::getGrad() const {
+    return *_leafGrad;
+}
+
+NVMatrix& TreeWeights::getAllW() const {
+    return *_weights;
+}
+
+NVMatrix& TreeWeights::getAllInc() const {
+    return *_weightsInc;
+}
+
+NVMatrix& TreeWeights::getAllGrad() const {
+    return *_weightsGrad;
+}
+
+void TreeWeights::copyToGPU() {
+    assert(!_onGPU);
+    Weights::copyToGPU();
+    _tree->finalize();
+    _effWeights.resize(*_weights);
+    _leafWeights = &_effWeights.sliceCols(0, _tree->getNumLeaves());
+    _leafGrad = &_weightsGrad->sliceCols(0, _tree->getNumLeaves());
+    _leafInc = &_weightsInc->sliceCols(0, _tree->getNumLeaves());
+    assert(_leafWeights->isView());
+    makeWeights();
+}
+
+int TreeWeights::getNumRows() const {
+    return _tree->getNumNodes();
+}
+
+void TreeWeights::update(float progress) {
+     // Only true owner of weights updates
+    if (_lrs->getBaseRate() > 0) {
+        assert(_onGPU);
+        distributeGradients();
+        _tree->updateWeights(*_weights, *_weightsInc, *_weightsGrad, _lrs->getRate(progress), _mom, _wc);
+        makeWeights();
+        _numUpdates = 0;
+    }
+}
+
+void TreeWeights::makeWeights() {
+    _tree->makeWeights(*_weights, _effWeights);
+}
+
+void TreeWeights::distributeGradients() {
+    _tree->distributeGradients(*_weightsGrad);
+}
+
+/* 
+ * ===============
+ * DummyWeights
+ * ===============
+ */
+DummyWeights::DummyWeights(Matrix& hWeights, Matrix& hWeightsInc,
+                           NVMatrix& weights, NVMatrix& incs, NVMatrix& grads)
+ : Weights(hWeights, hWeightsInc, *new LearningRateSchedule(0), 0, 0, 0, 0, true, false) {
+    _onGPU = true;
+    _weights = &weights;
+    _weightsInc = &incs;
+    _weightsGrad = &grads;
+}
+
+/* 
+ * ===============
+ * WeightList
+ * ===============
+ */
+Weights& WeightList::operator[](const int idx) const {
+    return *_weightList[idx];
+}
+
+WeightList::~WeightList() {
+    for (int i = 0; i < _weightList.size(); i++) {
+        delete _weightList[i];
+    }
+}
+
+WeightList::WeightList() {
+}
+
+
+void WeightList::addWeights(Weights& w) {
+    _weightList.push_back(&w);
+}
+
+
+void WeightList::update(float progress) {
+    for (int i = 0; i < getSize(); i++) {
+        _weightList[i]->update(progress);
+    }
+}
+
+void WeightList::copyToCPU() {
+    for (int i = 0; i < getSize(); i++) {
+        _weightList[i]->copyToCPU();
+    }
+}
+
+void WeightList::copyToGPU() {
+    for (int i = 0; i < getSize(); i++) {
+        _weightList[i]->copyToGPU();
+    }
+}
+
+int WeightList::getSize() const {
+    return _weightList.size();
+}
diff --git a/src/worker.cu b/src/worker.cu
new file mode 100644
index 0000000..76de9fd
--- /dev/null
+++ b/src/worker.cu
@@ -0,0 +1,279 @@
+/* 
+ * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <util.cuh>
+#include <worker.cuh>
+
+using namespace std;
+
+/* 
+ * ====================
+ * WorkResult
+ * ====================
+ */
+WorkResult::WorkResult(WorkResult::RESULTS resultType, Cost& results) : _resultType(resultType), _results(&results) {
+}
+
+WorkResult::WorkResult(WorkResult::RESULTS resultType) : _resultType(resultType), _results(NULL) {
+}
+
+WorkResult::~WorkResult() {
+    delete _results; // delete NULL is ok
+}
+
+Cost& WorkResult::getResults() const {
+    return *_results;
+}
+
+WorkResult::RESULTS WorkResult::getResultType() const {
+    return _resultType;
+}
+
+/* 
+ * ====================
+ * Worker
+ * ====================
+ */
+Worker::Worker(ConvNet& convNet) : _convNet(&convNet) {
+}
+
+/* 
+ * ====================
+ * DataWorker
+ * ====================
+ */
+DataWorker::DataWorker(ConvNet& convNet, CPUData& data) : Worker(convNet), _data(&data) {
+    _dp = &convNet.getDataProvider();
+    _dp->setData(*_data);
+}
+
+DataWorker::~DataWorker() {
+    _dp->clearData();
+}
+
+/* 
+ * ====================
+ * TrainingWorker
+ * ====================
+ */
+TrainingWorker::TrainingWorker(ConvNet& convNet, CPUData& data, double progress, bool test)
+    : DataWorker(convNet, data), _progress(progress), _test(test) {
+}
+
+// Need to setData here (as opposed to the constructor) because the constructor executes in
+// the original CPU thread, which is not the one with GPU access.
+void TrainingWorker::run() {
+	_convNet->setTrainingProgress(_progress);
+    Cost& batchCost = *new Cost(0);
+    for (int i = 0; i < _dp->getNumMinibatches(); i++) {
+        _convNet->fprop(i, _test ? PASS_TEST : PASS_TRAIN);
+        _convNet->getCost(batchCost);
+        
+        if (!_test) {
+            _convNet->bprop(PASS_TRAIN);
+            _convNet->updateWeights();
+        }
+    }
+    _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
+}
+
+/*
+ * ====================
+ * SyncWorker
+ * ====================
+ */
+SyncWorker::SyncWorker(ConvNet& convNet) : Worker(convNet) {
+}
+
+void SyncWorker::run() {
+    _convNet->copyToCPU();
+    _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::SYNC_DONE));
+}
+
+/* 
+ * ====================
+ * GradCheckWorker
+ * ====================
+ */
+GradCheckWorker::GradCheckWorker(ConvNet& convNet, CPUData& data) 
+    : DataWorker(convNet, data) {
+}
+
+void GradCheckWorker::run() {
+    _convNet->checkGradients();
+    exit(0);
+}
+
+/* 
+ * ====================
+ * MultiviewTestWorker
+ * ====================
+ */
+MultiviewTestWorker::MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, Matrix& cpuProbs, const char* logregName) 
+    : DataWorker(convNet, data), _numViews(numViews), _cpuProbs(&cpuProbs), _logregName(logregName) {
+    assert(_data->getNumCases() % _numViews == 0);
+}
+
+MultiviewTestWorker::MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews) 
+    : DataWorker(convNet, data), _numViews(numViews), _cpuProbs(NULL), _logregName("") {
+    assert(_data->getNumCases() % _numViews == 0);
+}
+
+MultiviewTestWorker::~MultiviewTestWorker() {
+    delete _cpuProbs;
+}
+
+void MultiviewTestWorker::run() {
+    int numCasesReal = _dp->getNumCases() / _numViews;
+    int numMiniReal = DIVUP(numCasesReal, _dp->getMinibatchSize());
+    
+    Cost& batchCost = *new Cost(0);
+    for (int i = 0; i < numMiniReal; i++) {
+        for (int v = 0; v < _numViews; v++) {
+            CPUData& mini = _dp->getDataSlice(v * numCasesReal + i * _dp->getMinibatchSize(),
+                                              min((v + 1) * numCasesReal, v * numCasesReal + (i + 1) * _dp->getMinibatchSize()));
+            _convNet->fprop(mini, v == 0 ? PASS_MULTIVIEW_TEST_START : v == _numViews - 1 ? PASS_MULTIVIEW_TEST_END : PASS_MULTIVIEW_TEST);
+        }
+        if (_cpuProbs != NULL) {
+            LogregCostLayer& logregLayer = *dynamic_cast<LogregCostLayer*>(&_convNet->getLayer(_logregName));
+            cudaSetDevice(logregLayer.getDeviceID());
+            Matrix& miniProbs = _cpuProbs->sliceRows(i * _dp->getMinibatchSize(),
+                                                     min(numCasesReal, (i + 1) * _dp->getMinibatchSize()));
+            NVMatrix& acts = logregLayer.getProbsAccum();
+            NVMatrix acts_T;
+            acts.transpose(acts_T);
+            acts_T.copyToHost(miniProbs);
+            
+            delete &miniProbs;
+        }
+        _convNet->getCost(batchCost);
+    }
+    cudaDeviceSynchronize();
+    _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
+}
+
+/* 
+ * ====================
+ * FeatureWorker
+ * ====================
+ */
+FeatureWorker::FeatureWorker(ConvNet& convNet, CPUData& data, MatrixV& ftrs, stringv& layerNames)
+    : DataWorker(convNet, data), _ftrs(&ftrs), _layerNames(&layerNames) {
+    assert(layerNames.size() == ftrs.size());
+    for (int i = 0; i < layerNames.size(); i++) {
+        assert(ftrs[i]->getNumRows() == data.getNumCases());
+        assert(!ftrs[i]->isTrans());
+    }
+}
+
+FeatureWorker::~FeatureWorker() {
+    for (int i = 0; i < _ftrs->size(); i++) {
+        delete _ftrs->at(i);
+    }
+    delete _ftrs;
+    delete _layerNames;
+}
+
+void FeatureWorker::run() {
+    
+    Cost& batchCost = *new Cost(0);
+    
+    for (int i = 0; i < _dp->getNumMinibatches(); i++) {
+        _convNet->fprop(i, PASS_FEATURE_GEN);
+        _convNet->getCost(batchCost);
+        for (int f = 0; f < _layerNames->size(); f++) {
+            Layer& ftrLayer = _convNet->getLayer(_layerNames->at(f));
+            int d = ftrLayer.getDeviceID();
+            cudaSetDevice(d);
+            Matrix& miniFtrs = _ftrs->at(f)->sliceRows(i * _dp->getMinibatchSize(),
+                                                       min(_dp->getNumCases(), (i + 1) * _dp->getMinibatchSize()));
+            NVMatrix& acts = ftrLayer.getActs();
+            NVMatrix acts_T;
+            if (acts.isTrans()) {
+                NVMatrix& soft_T = acts.getTranspose();
+                soft_T.transpose(acts_T);
+                delete &soft_T;
+            } else {
+                acts.transpose(acts_T);
+            }
+            acts_T.copyToHost(miniFtrs);
+            delete &miniFtrs;
+        }
+    }
+    cudaDeviceSynchronize();
+    _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
+}
+
+/* 
+ * ====================
+ * DataGradWorker
+ * ====================
+ */
+DataGradWorker::DataGradWorker(ConvNet& convNet, CPUData& data, Matrix& dataGrads, int dataLayerIdx, int softmaxLayerIdx)
+    : DataWorker(convNet, data), _dataGrads(&dataGrads), _dataLayerIdx(dataLayerIdx), _softmaxLayerIdx(softmaxLayerIdx) {
+    assert(dataGrads.getNumRows() == data.getNumCases());
+    assert(!dataGrads.isTrans());
+}
+
+DataGradWorker::~DataGradWorker() {
+    delete _dataGrads;
+}
+
+void DataGradWorker::run() {
+//    DataLayer& dataLayer = *dynamic_cast<DataLayer*>(&_convNet->getLayer(_dataLayerIdx));
+//    SoftmaxLayer& softmaxLayer = *dynamic_cast<SoftmaxLayer*>(&_convNet->getLayer(_softmaxLayerIdx));
+//    softmaxLayer.setDoLogregGrad(false);
+//    Cost& batchCost = *new Cost(0);
+//    for (int i = 0; i < _dp->getNumMinibatches(); i++) {
+//        _convNet->fprop(i, PASS_TEST);
+//        _convNet->getCost(batchCost);
+//        softmaxLayer.getActs().apply(NVMatrixOps::Log(), softmaxLayer.getActsGrad());
+//        
+//        softmaxLayer.getActsGrad().addScalar(1);
+//        softmaxLayer.getActsGrad().scale(-1);
+//        softmaxLayer.incRcvdBInputs();
+//        softmaxLayer.bprop(PASS_TEST);
+//        
+//        Matrix& miniDataGrads = _dataGrads->sliceRows(i * _dp->getMinibatchSize(),
+//                                                      min(_dp->getNumCases(), (i + 1) * _dp->getMinibatchSize()));
+//        NVMatrix& grads = dataLayer.getActsGrad();
+//        NVMatrix grads_T;
+//        if (grads.isTrans()) {
+//            NVMatrix& soft_T = grads.getTranspose();
+//            soft_T.transpose(grads_T);
+//            delete &soft_T;
+//        } else {
+//            grads.transpose(grads_T);
+//        }
+//        grads_T.copyToHost(miniDataGrads);
+//        delete &miniDataGrads;
+//        
+//        _convNet->reset();
+//    }
+//    cudaThreadSynchronize();
+//    _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
+}
diff --git a/test.py b/test.py
new file mode 100755
index 0000000..5b823ed
--- /dev/null
+++ b/test.py
@@ -0,0 +1,32 @@
+import sys
+import re
+
+def binary_sum(arr):
+    if len(arr) == 1:
+        return arr[0]
+    mid = len(arr) / 2
+    sum_left = binary_sum(arr[:mid])
+    sum_right = binary_sum(arr[mid:])
+    return [a + b for a,b in zip(sum_left, sum_right)]
+
+def test(path):
+    p = re.compile(r'^batch \d+:.*\[((?:[\d\.]+(?:, )?)+)\]}, (\d+)\)\s*$')
+    sums = []
+    sums2 = []
+    ncases = 0
+    with open(path) as f:
+        for line in f:
+            m = p.match(line)
+            if m:
+                vals = m.group(1).split(',')
+                if len(sums) == 0: sums = [0] * len(vals)
+                sums = [s + float(v) for s,v in zip(sums, vals)]
+                sums2 += [[float(v) for v in vals]]
+                ncases += int(m.group(2))
+    return [s/ncases for s in sums], [s/ncases for s in binary_sum(sums2)], ncases
+    
+if __name__ == "__main__":
+    errs, errs2, ncases = test(sys.argv[1])
+    print errs
+    print errs2
+    print "--- %d cases" % ncases
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..f8afe96
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# 3000..3146
+for i in {3035..3146}; do
+#for i in {2000..2047}; do
+    echo "Testing on batch $i"
+    python convnet.py -f /nobackup/kriz/tmp/ConvNet__2012-06-25_17.55.06 --test-only=1 --test-range="$i" --multiview-test=1
+done
diff --git a/tm.sh b/tm.sh
new file mode 100755
index 0000000..420bd75
--- /dev/null
+++ b/tm.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=pool1a_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-pool1a_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=pool1b_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-pool1b_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=pool1a_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-pool1a_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=pool1b_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-pool1b_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=conv2a_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-conv2a_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=conv2b_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-conv2b_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=pool2a --feature-path=/nobackup/kriz/tmp/mit-images/all/features-pool2a --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=pool2b --feature-path=/nobackup/kriz/tmp/mit-images/all/features-pool2b --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=conv3a_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-conv3a_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=conv3b_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-conv3b_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=conv4a_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-conv4a_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=conv4b_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-conv4b_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=pool3a_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-pool3a_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=pool3b_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-pool3b_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=conv6a_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-conv6a_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=conv6b_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-conv6b_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=fc2048a_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-fc2048a_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=fc2048b_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-fc2048b_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=fc2048ba_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-fc2048ba_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+python shownet.py -f /nobackup/kriz/tmp/ConvNet__2012-09-23_14.57.12 --write-features=fc2048bb_neuron --feature-path=/nobackup/kriz/tmp/mit-images/all/features-fc2048bb_neuron --data-path=/nobackup/kriz/tmp/mit-images/all/batches --test-range=1-5 --train-range=1
+sleep 1
+
diff --git a/txt-preds.py b/txt-preds.py
new file mode 100755
index 0000000..2c2ff2c
--- /dev/null
+++ b/txt-preds.py
@@ -0,0 +1,15 @@
+from util import *
+import os
+import sys
+
+if __name__ == "__main__":
+    path = sys.argv[1]
+    for f in sorted(os.listdir(path)):
+        dic = unpickle(os.path.join(path, f))
+        preds = dic['data']
+        assert preds.shape[1] == 1000
+        for c in xrange(preds.shape[0]): # loop over cases
+            # Notice the +1 here to convert from 0-based indices to 1-based
+            top5 = [x[0] + 1 for x in reversed(sorted(list(enumerate(preds[c,:])), key=lambda x:x[1])[-5:])]
+            assert min(top5) >= 1 and max(top5) <= 1000
+            print " ".join(str(x) for x in top5)
diff --git a/verify-test-preds.py b/verify-test-preds.py
new file mode 100755
index 0000000..8f5cfc3
--- /dev/null
+++ b/verify-test-preds.py
@@ -0,0 +1,31 @@
+import sys
+from util import *
+import pylab as pl
+import numpy as n
+import numpy.random as nr
+from PIL import Image
+from StringIO import StringIO
+
+def print_top5(preds, lnames):
+    print preds
+    for i in xrange(len(preds)):
+        print "Label %d: %s" %(i, lnames[preds[i]])
+
+if __name__ == "__main__":
+    pred_path = sys.argv[1]
+    data_path = sys.argv[2]
+    batch = nr.randint(98) + 3000
+    data = unpickle(os.path.join(data_path, 'data_batch_%d' % batch))[0]
+    preds = [n.array([int(x) - 1 for x in l.split(' ')]) for l in open(pred_path).readlines()]
+
+    img_idx = nr.randint(len(data))
+    meta = unpickle(os.path.join(data_path, 'batches.meta'))
+    lnames = meta['label_names']
+    print "Batch: %d, img idx: %d" % (batch, img_idx)
+
+    img = n.asarray(Image.open(StringIO(data[img_idx])).convert('RGB'))
+
+    print_top5(preds[(batch - 3000) * 1024 + img_idx], lnames)
+    
+    pl.imshow(img)
+    pl.show()