[conv1a] epsW=0.0 epsB=0.0 momW=0.9 momB=0.9 wc=0.0005 wball=0.00 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv1b] epsW=0.0 epsB=0.0 momW=0.9 momB=0.9 wc=0.0005 wball=0.00 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv2a] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0.00 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv2b] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0.00 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv3a] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv3b] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv4a] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv4b] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv5a] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [conv5b] epsW=0.01 epsB=0.02 momW=0.9 momB=0.9 wc=0.0005 wball=0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [fc2048a] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [fc2048b] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [fc2048ba] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [fc2048bb] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [fc1000] epsW=0.01,0.01 epsB=0.02 momW=0.9,0.9 momB=0.9 wc=0.0005,0.0005 wball=0,0 schedW=jdexp[500,0,4] schedB=dexp[10,0,2] [logprob] coeff=1 topk=5 [hs1a] enable=true [hs2a] enable=true [hs1b] enable=true [hs2b] enable=true [rnorm1a] scale=0.0001 pow=0.75 minDiv=2 [rnorm1b] scale=0.0001 pow=0.75 minDiv=2 [rnorm2a] scale=0.0001 pow=0.75 minDiv=2 [rnorm2b] scale=0.0001 pow=0.75 minDiv=2 [cnorm2a] scale=0.001 pow=0.75 [cnorm2b] scale=0.001 pow=0.75 # this is like #159 (so uses same def file: 153) but with jdexp # this is also like 172, which uses jdexp, but here i'm usign the kind of jdexp that jumps to geometric mean instead of previous level (and also 172 decayed learning rate by factor of 1000 instead of 500) # its also like 169, in that it decays learning rate to 0.00002, but this one's jumpy. so it should be compared to 169. # on gpu # initialized from 24 epochs of #169 # logs/layers-174.log # /storage/tmp/ConvNet__2013-01-07_04.03.15.layers-174 # epoch 58: made backup to /storage/tmp/ConvNet__2013-01-07_04.03.15.layers-174.bak # epoch 62: set color noise to 0 from 0.1 # epoch 74: set conv1 epsw to 0 from 0.01 # epoch 86: killed for noisy epsw experiments # moved to guppy9 # logprob: 1.825121, 0.425222, 0.193946 # multiview logprob: 1.729982, 0.404380, 0.180420