[conv1a] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0.00 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv1b] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0.00 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv2a] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0.00 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv2b] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0.00 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv3a] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv3b] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv4a] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv4b] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv5a] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [conv5b] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [fc2048a] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [fc2048b] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [fc2048ba] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [fc2048bb] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [fc1000] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=dexp[tgtFactor=250,noiseStdev=0,numSteps=4] schedB=dexp[tgtFactor=10,noiseStdev=0,numSteps=2] [logprob] coeff=1 topk=5 [hs1a] enable=true [hs2a] enable=true [hs1b] enable=true [hs2b] enable=true [rnorm1a] scale=0.0001 pow=0.75 minDiv=2 [rnorm1b] scale=0.0001 pow=0.75 minDiv=2 [rnorm2a] scale=0.0001 pow=0.75 minDiv=2 [rnorm2b] scale=0.0001 pow=0.75 minDiv=2 [cnorm2a] scale=0.001 pow=0.75 [cnorm2b] scale=0.001 pow=0.75 # this is like #159 (so uses same def file: 153) but with learning rate decaying to 0.00004 # not starting this because its first ~24 epochs are gonna be completley identical to 169, so might as well wait for that one # 169 backup at 23 epochs: /nobackup/kriz/tmp/ConvNet__2013-01-07_04.03.15.bak (guppy9) # on guppy8 # /nobackup/kriz/tmp/ConvNet__2013-01-07_04.03.15.layer-170 # logs/layers-170.log # epoch 68: set color noise to 0 from 0.1 #NOTE: free gpus 2,3 after done # epoch 74: set conv1 epsw to 0 from 0.01 # logprob: 1.838933, 0.423407, 0.195827 # multiview logprob: 1.730076, 0.405280, 0.180480 # resuming for 55 more epochs, this time adding in the fade-in of images with prob 5%: # /nobackup/kriz/tmp/ConvNet__2013-01-07_04.03.15.layer-170.fade # guppy8 # logs/layers-170-contfade.log # epoch 107: set epochs to 300 to increase learning rate, also turned on 0.1 # epoch 127: set epsw to 0.0025148669 from 0.01, set schedw to default from dexp[250,0,4] # epoch 127: set epsw to 0.00063245555 from 0.0025148669 # epoch 127: set epsw to 0.0015874011 -- which is the correct one for thi slevel # epoch 127: set epsw to 0.00025198421 # epoch 137: set color noise to 0 from 0.1, also turned off fade-in # restart from scratch with fade: # logs/layers-170-fade.log # /nobackup/kriz/tmp/ConvNet__2013-01-31_18.52.37 # guppy8 # epoch 61: set color noise to 0 from 0.1 # epoch 76: set conv1 epsw to 0 from 0.01 # logprob: 1.831731, 0.422944, 0.195334 # multiview: logprob: 1.726111, 0.402500, 0.181160 # now with fade probability 0.15 instead of 0.05: # krunch # logs/layers-170-fade-0.15.log # /nobackup/kriz/tmp/ConvNet__2013-02-06_08.27.55 # moving to guppy9 # epoch 68: set color noise to 0 from 0.1 # epoch 74: set conv1 epsw to 0 from 0.1, set fade prob to 0.05 from 0.15 # logprob: 1.836181, 0.425008, 0.196351 # restart normal 170 but on 2 gpus which can't talk to each other -- just to make sure it'll work fine, and to measure the effect of quantization (#170-quant) # guppy9 # logs/layers-170-gpu-1-2.log # /nobackup/kriz/tmp/ConvNet__2013-02-23_12.16.26 # epoch 60: set color noise to 0 from 0.1 # epoch 63: moved to guppy5 # /nobackup_a/kriz/tmp/ConvNet__2013-02-23_12.16.26/