Initial commit
This commit is contained in:
commit
9fdd561586
246 changed files with 58283 additions and 0 deletions
529
include/neuron.cuh
Normal file
529
include/neuron.cuh
Normal file
|
@ -0,0 +1,529 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef NEURONS_CUH
|
||||
#define NEURONS_CUH
|
||||
|
||||
#include <assert.h>
|
||||
#include <string>
|
||||
#include <nvmatrix.cuh>
|
||||
#include <helper_cuda.h>
|
||||
|
||||
template <class GradientOp>
|
||||
class AddGradientBinaryOperator {
|
||||
GradientOp _op;
|
||||
public:
|
||||
AddGradientBinaryOperator(GradientOp op) : _op(op) {
|
||||
}
|
||||
__device__ inline float operator()(const float unitActGrad, const float unitAct, const float target) const {
|
||||
return _op(unitActGrad, unitAct) + target;
|
||||
}
|
||||
};
|
||||
|
||||
template <class GradientOp>
|
||||
class AddGradientOperator {
|
||||
GradientOp _op;
|
||||
public:
|
||||
AddGradientOperator(GradientOp op) : _op(op) {
|
||||
}
|
||||
__device__ inline float operator()(const float unitActGrad, const float target) const {
|
||||
return target + _op(unitActGrad);
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* Neuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = x
|
||||
* =======================
|
||||
*/
|
||||
class Neuron {
|
||||
protected:
|
||||
bool _activated;
|
||||
// Inputs and outputs potentially point to the same matrix, depending on the neuron
|
||||
NVMatrix* _inputs, *_outputs;
|
||||
virtual void _activate() {
|
||||
if (_inputs != _outputs) {
|
||||
_inputs->copy(*_outputs);
|
||||
}
|
||||
}
|
||||
virtual void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
if (&target != &actsGrad) {
|
||||
actsGrad.copy(target);
|
||||
}
|
||||
}
|
||||
virtual void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
if (&target != &actsGrad) {
|
||||
target.add(actsGrad);
|
||||
}
|
||||
}
|
||||
public:
|
||||
Neuron() : _activated(false), _inputs(NULL), _outputs(NULL) {
|
||||
}
|
||||
virtual void activate(NVMatrix& inputs, NVMatrix& outputs) {
|
||||
_activated = true;
|
||||
_inputs = &inputs;
|
||||
_outputs = &outputs;
|
||||
_activate();
|
||||
}
|
||||
|
||||
virtual void computeInputGrad(NVMatrix& actsGrad, NVMatrix& target, bool add) {
|
||||
assert(_activated);
|
||||
if (!add) {
|
||||
target.resize(actsGrad);
|
||||
_computeInputGrad(actsGrad, target);
|
||||
} else {
|
||||
_addInputGrad(actsGrad, target);
|
||||
}
|
||||
}
|
||||
|
||||
static Neuron& makeNeuron(PyObject* neuronDict);
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* LogisticNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = 1 / (1 + e^-x)
|
||||
* =======================
|
||||
*/
|
||||
class LogisticNeuron : public Neuron {
|
||||
protected:
|
||||
void _activate() {
|
||||
_inputs->apply(NVMatrixOps::Logistic(), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(LogisticGradientOperator(), *_outputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<LogisticGradientOperator>(LogisticGradientOperator()), *_outputs, target, target);
|
||||
}
|
||||
public:
|
||||
class LogisticGradientOperator {
|
||||
public:
|
||||
__device__ inline float operator()(float unitActGrad, float unitAct) const {
|
||||
return unitActGrad * unitAct * (1.0f - unitAct);
|
||||
}
|
||||
};
|
||||
|
||||
LogisticNeuron() : Neuron() {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* ReluNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = max(0, x)
|
||||
* =======================
|
||||
*/
|
||||
class ReluNeuron : public Neuron {
|
||||
protected:
|
||||
virtual void _activate() {
|
||||
_inputs->apply(ReluOperator(), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(ReluGradientOperator(), *_outputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<ReluGradientOperator>(ReluGradientOperator()), *_outputs, target, target);
|
||||
}
|
||||
public:
|
||||
class ReluOperator {
|
||||
public:
|
||||
__device__ inline float operator()(float x) const {
|
||||
return x < 0.0f ? 0.0f : x;
|
||||
}
|
||||
};
|
||||
|
||||
class ReluGradientOperator {
|
||||
public:
|
||||
__device__ inline float operator()(float unitActGrad, float unitAct) const {
|
||||
return unitActGrad * (unitAct > 0.0f);
|
||||
}
|
||||
};
|
||||
|
||||
ReluNeuron() : Neuron() {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* NoisyReluNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = max(0, max(0, x) + gaussian noise with variance equal to max(0, x))
|
||||
* =======================
|
||||
*/
|
||||
class NoisyReluNeuron : public ReluNeuron {
|
||||
protected:
|
||||
void _activate() {
|
||||
ReluNeuron::_activate();
|
||||
_outputs->addGaussianNoise(*_outputs, false);
|
||||
_outputs->apply(ReluOperator());
|
||||
}
|
||||
public:
|
||||
NoisyReluNeuron() : ReluNeuron() {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* BoundedReluNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = min(a, max(0, x))
|
||||
* =======================
|
||||
*/
|
||||
class BoundedReluNeuron : public Neuron {
|
||||
protected:
|
||||
float _a;
|
||||
|
||||
void _activate() {
|
||||
_inputs->apply(BoundedReluOperator(_a), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(BoundedReluGradientOperator(_a), *_outputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<BoundedReluGradientOperator>(BoundedReluGradientOperator(_a)), *_outputs, target, target);
|
||||
}
|
||||
public:
|
||||
class BoundedReluOperator {
|
||||
private:
|
||||
float _a;
|
||||
public:
|
||||
BoundedReluOperator(float a) : _a(a) {
|
||||
}
|
||||
__device__ inline float operator()(float x) const {
|
||||
return x < 0.0f ? 0.0f : x > _a ? _a : x;
|
||||
}
|
||||
};
|
||||
|
||||
class BoundedReluGradientOperator {
|
||||
private:
|
||||
float _a;
|
||||
public:
|
||||
BoundedReluGradientOperator(float a) : _a(a) {
|
||||
}
|
||||
__device__ inline float operator()(float unitActGrad, float unitAct) const {
|
||||
return unitActGrad * (unitAct > 0.0f) * (unitAct < _a);
|
||||
}
|
||||
};
|
||||
|
||||
BoundedReluNeuron(float a) : Neuron(), _a(a) {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* AbsNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = abs(x)
|
||||
* =======================
|
||||
*/
|
||||
class AbsNeuron : public Neuron {
|
||||
protected:
|
||||
void _activate() {
|
||||
assert(_inputs != _outputs);
|
||||
_inputs->apply(NVMatrixOps::Abs(), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(AbsGradientOperator(), *_inputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<AbsGradientOperator>(AbsGradientOperator()), *_inputs, target, target);
|
||||
}
|
||||
public:
|
||||
class AbsGradientOperator {
|
||||
public:
|
||||
__device__ inline float operator()(float unitActGrad, float unitInput) const {
|
||||
return unitActGrad * (unitInput > 0.0f ? 1.0f : -1.0f);
|
||||
}
|
||||
};
|
||||
|
||||
AbsNeuron() : Neuron() {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* TanhNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = a*tanh(b*x)
|
||||
* =======================
|
||||
*/
|
||||
class TanhNeuron : public Neuron {
|
||||
protected:
|
||||
float _a, _b;
|
||||
|
||||
void _activate() {
|
||||
_inputs->apply(TanhOperator(_a, _b), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(TanhGradientOperator(_a, _b), *_outputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<TanhGradientOperator>(TanhGradientOperator(_a, _b)), *_outputs, target, target);
|
||||
}
|
||||
public:
|
||||
class TanhOperator {
|
||||
private:
|
||||
float _a, _n2b;
|
||||
public:
|
||||
TanhOperator(float a, float b) : _a(a), _n2b(-2*b) {
|
||||
}
|
||||
virtual __device__ inline float operator()(float x) const {
|
||||
return _a * (__fdividef(2.0f, 1.0f + __expf(x * _n2b)) - 1.0f);
|
||||
}
|
||||
};
|
||||
|
||||
class TanhGradientOperator {
|
||||
private:
|
||||
float _b, _a;
|
||||
public:
|
||||
TanhGradientOperator(float a, float b) : _b(b), _a(a) {
|
||||
}
|
||||
__device__ inline float operator()(float unitActGrad, float unitAct) const {
|
||||
// const float t = (1.0f - __fdividef(unitAct, _a)) / 2.0f;
|
||||
// return unitActGrad * _n4ab * (t * (t - 1.0f));
|
||||
return unitActGrad * _b * (_a - __fdividef(unitAct * unitAct, _a));
|
||||
}
|
||||
};
|
||||
|
||||
TanhNeuron(float a, float b) : Neuron(), _a(a), _b(b) {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* DoubleReluNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = x - a*tanh(x/a)
|
||||
* =======================
|
||||
*/
|
||||
class DoubleReluNeuron : public Neuron {
|
||||
protected:
|
||||
float _a;
|
||||
|
||||
void _activate() {
|
||||
assert(_inputs != _outputs);
|
||||
_inputs->apply(DoubleReluOperator(_a), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(DoubleReluGradientOperator(_a), *_inputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<DoubleReluGradientOperator>(DoubleReluGradientOperator(_a)), *_inputs, target, target);
|
||||
}
|
||||
public:
|
||||
class DoubleReluOperator {
|
||||
private:
|
||||
float _a, _n2a;
|
||||
public:
|
||||
DoubleReluOperator(float a) : _a(a), _n2a(-2.0f / a) {
|
||||
}
|
||||
virtual __device__ inline float operator()(float x) const {
|
||||
return x - _a * (__fdividef(2.0f, 1.0f + __expf(_n2a * x)) - 1.0f);
|
||||
}
|
||||
};
|
||||
|
||||
class DoubleReluGradientOperator {
|
||||
private:
|
||||
float _n2a;
|
||||
public:
|
||||
DoubleReluGradientOperator(float a) : _n2a(-2.0f / a) {
|
||||
}
|
||||
__device__ inline float operator()(float unitActGrad, float unitInput) const {
|
||||
const float tanh = __fdividef(2.0f, 1.0f + __expf(_n2a * unitInput)) - 1.0f;
|
||||
return unitActGrad * (tanh*tanh);
|
||||
}
|
||||
};
|
||||
|
||||
DoubleReluNeuron(float a) : Neuron(), _a(a) {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* SoftReluNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = log(1 + e^x)
|
||||
* =======================
|
||||
*/
|
||||
class SoftReluNeuron : public Neuron {
|
||||
protected:
|
||||
void _activate() {
|
||||
assert(_inputs != _outputs);
|
||||
_inputs->apply(SoftReluOperator(), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(SoftReluGradientOperator(), *_inputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<SoftReluGradientOperator>(SoftReluGradientOperator()), *_inputs, target, target);
|
||||
}
|
||||
public:
|
||||
class SoftReluOperator {
|
||||
public:
|
||||
__device__ inline float operator()(float x) const {
|
||||
// This piece-wise implementation has better numerical stability than
|
||||
// simply computing log(1 + e^x).
|
||||
return x > 4.0f ? x : __logf(1.0f + __expf(x));
|
||||
}
|
||||
};
|
||||
|
||||
class SoftReluGradientOperator {
|
||||
public:
|
||||
__device__ inline float operator()(float unitActGrad, float unitInput) const {
|
||||
if (unitInput > 4.0f) {
|
||||
return unitActGrad;
|
||||
}
|
||||
const float f = __expf(unitInput);
|
||||
return unitActGrad * __fdividef(f, 1.0f + f);
|
||||
}
|
||||
};
|
||||
|
||||
SoftReluNeuron() : Neuron() {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* SquareNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = x^2
|
||||
* =======================
|
||||
*/
|
||||
class SquareNeuron : public Neuron {
|
||||
protected:
|
||||
void _activate() {
|
||||
assert(_inputs != _outputs);
|
||||
_inputs->apply(NVMatrixOps::Square(), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(SquareGradientOperator(), *_inputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<SquareGradientOperator>(SquareGradientOperator()), *_inputs, target, target);
|
||||
}
|
||||
public:
|
||||
class SquareGradientOperator {
|
||||
public:
|
||||
__device__ inline float operator()(float unitActGrad, float unitInput) const {
|
||||
return unitActGrad * 2.0f * unitInput;
|
||||
}
|
||||
};
|
||||
|
||||
SquareNeuron() : Neuron() {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* SqrtNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = sqrt(x)
|
||||
* =======================
|
||||
*/
|
||||
class SqrtNeuron : public Neuron {
|
||||
protected:
|
||||
void _activate() {
|
||||
_inputs->apply(NVMatrixOps::Sqrt(), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(SqrtGradientOperator(), *_outputs, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyTernary(AddGradientBinaryOperator<SqrtGradientOperator>(SqrtGradientOperator()), *_outputs, target, target);
|
||||
}
|
||||
public:
|
||||
class SqrtGradientOperator {
|
||||
public:
|
||||
__device__ inline float operator()(float unitActGrad, float unitAct) const {
|
||||
return __fdividef(unitActGrad, 2.0f * unitAct);
|
||||
}
|
||||
};
|
||||
|
||||
SqrtNeuron() : Neuron() {
|
||||
}
|
||||
};
|
||||
|
||||
/* =======================
|
||||
* LinearNeuron
|
||||
* -----------------------
|
||||
*
|
||||
* f(x) = a*x + b
|
||||
* =======================
|
||||
*/
|
||||
class LinearNeuron : public Neuron {
|
||||
protected:
|
||||
float _a, _b;
|
||||
void _activate() {
|
||||
_inputs->apply(LinearOperator(_a, _b), *_outputs);
|
||||
}
|
||||
|
||||
void _computeInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.scale(_a, target);
|
||||
}
|
||||
|
||||
void _addInputGrad(NVMatrix& actsGrad, NVMatrix& target) {
|
||||
actsGrad.applyBinary(AddGradientOperator<NVMatrixOps::MultByScalar>(NVMatrixOps::MultByScalar(_a)), target, target);
|
||||
}
|
||||
public:
|
||||
class LinearOperator {
|
||||
protected:
|
||||
float _a, _b;
|
||||
public:
|
||||
__device__ inline float operator()(float x) const {
|
||||
return _a * x + _b;
|
||||
}
|
||||
LinearOperator(float a, float b) : _a(a), _b(b) {
|
||||
}
|
||||
};
|
||||
|
||||
LinearNeuron(float a, float b) : Neuron(), _a(a), _b(b) {
|
||||
}
|
||||
};
|
||||
#endif /* NEURONS_CUH */
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue