AlexNet/include/weights.cuh
Laurent El Shafey 9fdd561586 Initial commit
2024-12-10 08:56:11 -08:00

150 lines
4.8 KiB
Text

/*
* Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef WEIGHTS_CUH
#define WEIGHTS_CUH
#include <string>
#include <vector>
#include <iostream>
#include <helper_cuda.h>
#include <assert.h>
#include <nvmatrix.cuh>
#include <matrix.h>
#include "util.cuh"
#include "softmaxtree.cuh"
#include <lr.cuh>
using namespace std;
class Weights {
protected:
Matrix* _hWeights, *_hWeightsInc;
NVMatrix* _weights, *_weightsInc, *_weightsGrad;
NVMatrix* _weightsGradAvg, *_weightsGrad2Avg;
LearningRateSchedule* _lrs;
float _wc, _mom, _wball, _superEps;
bool _onGPU, _useGrad, _cleanup;
int _numUpdates;
// Non-NULL if these weights are really shared from some other layer
Weights* _srcWeights;
public:
class Grad2AvgOperator {
private:
float _mom;
public:
Grad2AvgOperator(float mom) : _mom(mom) {
}
__device__ inline float operator()(const float G2, const float g) const {
return _mom * G2 + (1.0f - _mom) * g * g;
}
};
NVMatrix& operator*() const;
Weights(Weights& srcWeights, LearningRateSchedule& lrs);
Weights(Matrix& hWeights, Matrix& hWeightsInc, LearningRateSchedule& lrs, float wc, float wball, float mom, float superEps, bool useGrad, bool cleanup=true);
virtual ~Weights();
virtual NVMatrix& getW() const;
virtual NVMatrix& getInc() const;
virtual NVMatrix& getGrad() const;
virtual Matrix& getCPUW() const;
virtual Matrix& getCPUWInc() const;
virtual LearningRateSchedule& getLearningRateSchedule() const;
virtual int getNumRows() const;
virtual int getNumCols() const;
virtual void copyToCPU();
// This function is assumed to be called in the order in which the layers
// were defined
virtual void copyToGPU();
virtual void update(float progress);
int incNumUpdates();
// Returns the number of times a gradient has been computed for this
// weight matrix during the current pass (interval between two calls of update())
// through the net. This number will only be greater than 1 if this weight matrix
// is *shared* by multiple layers in the net.
int getNumUpdates() const;
float getEps(float progress) const;
float getMom() const;
float getWC() const;
float getWBall() const;
bool isUseGrad() const;
bool isOwner() const;
float getSuperEps() const;
};
class TreeWeights : public Weights {
protected:
NVMatrix _effWeights;
NVMatrix* _leafWeights, *_leafGrad, *_leafInc;
SoftmaxTree* _tree;
public:
void copyToGPU();
void update(float progress);
NVMatrix& getW() const;
NVMatrix& getInc() const;
NVMatrix& getGrad() const;
NVMatrix& getAllW() const;
NVMatrix& getAllInc() const;
NVMatrix& getAllGrad() const;
int getNumRows() const;
void makeWeights();
void distributeGradients();
TreeWeights(SoftmaxTree& tree, Matrix& hWeights, Matrix& hWeightsInc, LearningRateSchedule& lrs, float wcBase, float mom);
};
class DummyWeights : public Weights {
public:
DummyWeights(Matrix& hWeights, Matrix& hWeightsInc, NVMatrix& weights, NVMatrix& incs, NVMatrix& grads);
};
class WeightList {
private:
std::vector<Weights*> _weightList;
public:
Weights& operator[](const int idx) const;
~WeightList();
WeightList();
void addWeights(Weights& w);
void update(float progress);
void copyToCPU();
void copyToGPU();
int getSize() const;
};
#endif /* WEIGHTS_CUH */