if (Double.isNaN(bLayer.getLearningRate())) bLayer.setLearningRate(learningRate); if (Double.isNaN(bLayer.getBiasLearningRate())) { bLayer.setBiasLearningRate(biasLearningRate); } else { bLayer.setBiasLearningRate(bLayer.getLearningRate()); if (bLayer.getLearningRateSchedule() == null) bLayer.setLearningRateSchedule(learningRateSchedule); if (Double.isNaN(bLayer.getL1())) bLayer.setL1(l1); if (Double.isNaN(bLayer.getL2())) bLayer.setL2(l2); if (bLayer.getActivationFn() == null) bLayer.setActivationFn(activationFn); if (bLayer.getWeightInit() == null) bLayer.setWeightInit(weightInit); if (Double.isNaN(bLayer.getBiasInit())) bLayer.setBiasInit(biasInit); if (bLayer.getUpdater() == null) bLayer.setUpdater(updater); if (bLayer.getIUpdater() == null) { bLayer.setIUpdater(iUpdater.clone()); if (bLayer.getGradientNormalization() == null) bLayer.setGradientNormalization(gradientNormalization); if (Double.isNaN(bLayer.getGradientNormalizationThreshold())) bLayer.setGradientNormalizationThreshold(gradientNormalizationThreshold);
private static void configureBaseLayer(String layerName, BaseLayer bLayer, boolean useRegularization, boolean useDropConnect, Double dropOut, Double l2, Double l2Bias, Double l1, Double l1Bias, Distribution dist) { if (useRegularization && (Double.isNaN(l1) && Double.isNaN(bLayer.getL1()) && Double.isNaN(l2) && Double.isNaN(bLayer.getL2()) && Double.isNaN(l2Bias) && Double.isNaN(l1Bias) && (Double.isNaN(dropOut) || dropOut == 0.0) && (Double.isNaN(bLayer.getDropOut()) || bLayer.getDropOut() == 0.0))) OneTimeLogger.warn(log, "Layer \"" + layerName + "\" regularization is set to true but l1, l2 or dropout has not been added to configuration."); if (!Double.isNaN(l1) && Double.isNaN(bLayer.getL1())) { bLayer.setL1(l1); if (!Double.isNaN(l2) && Double.isNaN(bLayer.getL2())) { bLayer.setL2(l2); if (!Double.isNaN(l1Bias) && Double.isNaN(bLayer.getL1Bias())) { bLayer.setL1Bias(l1Bias); if (!Double.isNaN(l2Bias) && Double.isNaN(bLayer.getL2Bias())) { bLayer.setL2Bias(l2Bias); || (!Double.isNaN(bLayer.getL1()) && bLayer.getL1() > 0.0) || (!Double.isNaN(l2) && l2 > 0.0) || (!Double.isNaN(bLayer.getL2()) && bLayer.getL2() > 0.0) || (!Double.isNaN(l1Bias) && l1Bias > 0.0) || (!Double.isNaN(bLayer.getL1Bias()) && bLayer.getL1Bias() > 0.0) || (!Double.isNaN(l2Bias) && l2Bias > 0.0) || (!Double.isNaN(bLayer.getL2Bias()) && bLayer.getL2Bias() > 0.0))) { OneTimeLogger.warn(log, "Layer \"" + layerName + "\" l1 or l2 has been added to configuration but useRegularization is set to false.");
Map<Integer, Double> momentumSchedule, double adamMeanDecay, double adamVarDecay, double rho, double rmsDecay, double epsilon) { if ((!Double.isNaN(momentum) || !Double.isNaN(layer.getMomentum())) && layer.getUpdater() != Updater.NESTEROVS) OneTimeLogger.warn(log, "Layer \"" + layerName + "\" momentum has been set but will not be applied unless the updater is set to NESTEROVS."); if ((momentumSchedule != null || layer.getMomentumSchedule() != null) && layer.getUpdater() != Updater.NESTEROVS) OneTimeLogger.warn(log, "Layer \"" + layerName + "\" momentum schedule has been set but will not be applied unless the updater is set to NESTEROVS."); if ((!Double.isNaN(adamVarDecay) || (!Double.isNaN(layer.getAdamVarDecay()))) && layer.getUpdater() != Updater.ADAM) OneTimeLogger.warn(log, "Layer \"" + layerName + "\" adamVarDecay is set but will not be applied unless the updater is set to Adam."); if ((!Double.isNaN(adamMeanDecay) || !Double.isNaN(layer.getAdamMeanDecay())) && layer.getUpdater() != Updater.ADAM) OneTimeLogger.warn(log, "Layer \"" + layerName + "\" adamMeanDecay is set but will not be applied unless the updater is set to Adam."); if ((!Double.isNaN(rho) || !Double.isNaN(layer.getRho())) && layer.getUpdater() != Updater.ADADELTA) OneTimeLogger.warn(log, "Layer \"" + layerName + "\" rho is set but will not be applied unless the updater is set to ADADELTA."); if ((!Double.isNaN(rmsDecay) || (!Double.isNaN(layer.getRmsDecay()))) && layer.getUpdater() != Updater.RMSPROP) OneTimeLogger.warn(log, "Layer \"" + layerName + "\" rmsdecay is set but will not be applied unless the updater is set to RMSPROP."); IUpdater u = layer.getIUpdater(); if (!Double.isNaN(layer.getLearningRate())) { u.applySchedules(0, layer.getLearningRate()); } else if (!Double.isNaN(learningRate)) {
if (l == null || !(l instanceof BaseLayer) || ((BaseLayer) l).getIUpdater() != null) { Updater u = bl.getUpdater(); double lr = bl.getLearningRate(); double eps = bl.getEpsilon(); double rho = bl.getRho(); bl.setIUpdater(new Sgd(lr)); break; case ADAM: double meanDecay = bl.getAdamMeanDecay(); double varDecay = bl.getAdamVarDecay(); bl.setIUpdater(Adam.builder().learningRate(lr).beta1(meanDecay).beta2(varDecay).epsilon(eps) .build()); break; case ADADELTA: bl.setIUpdater(new AdaDelta(rho, eps)); break; case NESTEROVS: Map<Integer, Double> momentumSchedule = bl.getMomentumSchedule(); double momentum = bl.getMomentum(); bl.setIUpdater(new Nesterovs(lr, momentum, momentumSchedule)); break; case ADAGRAD: bl.setIUpdater(new AdaGrad(lr, eps)); break; case RMSPROP: double rmsDecay = bl.getRmsDecay(); bl.setIUpdater(new RmsProp(lr, rmsDecay, eps));
originalUpdater = bl.getUpdater(); origWeightInit = bl.getWeightInit(); if (activationFn != null) bl.setActivationFn(activationFn); if (weightInit != null) bl.setWeightInit(weightInit); if (biasInit != null) bl.setBiasInit(biasInit); if (dist != null) bl.setDist(dist); if (learningRate != null) { bl.setLearningRate(learningRate); bl.setBiasLearningRate(learningRate); bl.setBiasLearningRate(biasLearningRate); if (learningRateSchedule != null) bl.setLearningRateSchedule(learningRateSchedule); bl.setL1(l1); if (l2 != null) bl.setL2(l2); if (l1Bias != null) bl.setL1Bias(l1Bias); if (l2Bias != null) bl.setL2Bias(l2Bias); if (updater != null) bl.setUpdater(updater); if (iUpdater != null)
activationFn = bl.getActivationFn().toString(); int nParams = layer.initializer().numParams(nnc); layerInfoRows.add(new String[] {i18N.getMessage("train.model.layerinfotable.layerNParams"), String.valueOf(nParams)}); if (nParams > 0) { WeightInit wi = bl.getWeightInit(); String str = wi.toString(); if (wi == WeightInit.DISTRIBUTION) { str += bl.getDist(); IUpdater u = bl.getIUpdater(); String us = (u == null ? "" : u.getClass().getSimpleName()); layerInfoRows.add(new String[] {i18N.getMessage("train.model.layerinfotable.layerUpdater"),
if (n.getLayer() instanceof BaseLayer) { BaseLayer bl = (BaseLayer) n.getLayer(); IUpdater u = bl.getIUpdater(); if (u instanceof Sgd) { double lr = bl.getLearningRate(); if (lr != 1.0) { throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " IActivation activation = bl.getActivationFn(); if (activation != null) { if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
@Override public INDArray activate(boolean training) { INDArray z = preOutput(training); //INDArray ret = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform( // conf.getLayer().getActivationFunction(), z, conf.getExtraArgs() )); INDArray ret = layerConf().getActivationFn().getActivation(z, training); if (maskArray != null) { applyMask(ret); } return ret; }
Layer layer = lv.getLayerConf().getLayer(); if (layer instanceof BaseLayer && ((BaseLayer) layer).getActivationFn() == null) { String layerName = layer.getLayerName(); ((BaseLayer) layer).setActivationFn(ia);
break; case Schedule: if (baseLayer.getLearningRateSchedule().containsKey(iteration)) { newLr = baseLayer.getLearningRateSchedule().get(iteration); } else { newLr = lr; if (baseLayer.getIUpdater() instanceof Nesterovs) { if (baseLayer.getMomentumSchedule() != null && baseLayer.getMomentumSchedule().containsKey(iteration)) { newMomentum = baseLayer.getMomentumSchedule().get(iteration); } else { newMomentum = baseLayer.getMomentum(); if (((BaseLayer) layer.conf().getLayer()).getIUpdater() instanceof Nesterovs) { ((BaseLayer) vs.getLayer().conf().getLayer()).setMomentum(newMomentum);
throw new IllegalStateException("No labels found"); if (outputLayer instanceof BaseLayer && ((BaseLayer) outputLayer.conf().getLayer()).getWeightInit() == WeightInit.ZERO) { throw new IllegalStateException("Output layer weights cannot be initialized to zero when using backprop.");
for (Layer layer : layers) { if (layer.conf().getLayer() instanceof BaseLayer) { lrs.add(((BaseLayer) layer.conf().getLayer()).getLearningRate()); } else { lrs.add(0.0);
if (lrPolicy != LearningRatePolicy.None || baseLayer.getIUpdater() instanceof Nesterovs) { applyLrDecayPolicy(lrPolicy, iteration);
activationFn = bl.getActivationFn().toString(); int nParams = layer.initializer().numParams(nnc); layerInfoRows.add(new String[] {i18N.getMessage("train.model.layerinfotable.layerNParams"), String.valueOf(nParams)}); if (nParams > 0) { WeightInit wi = bl.getWeightInit(); String str = wi.toString(); if (wi == WeightInit.DISTRIBUTION) { str += bl.getDist(); IUpdater u = bl.getIUpdater(); String us = (u == null ? "" : u.getClass().getSimpleName()); layerInfoRows.add(new String[] {i18N.getMessage("train.model.layerinfotable.layerUpdater"),
IUpdater u = bl.getIUpdater(); if (u instanceof Sgd) { double lr = bl.getLearningRate(); if (lr != 1.0) { throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " IActivation activation = bl.getActivationFn(); if (activation != null) { if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
afn = ((BaseLayer) layer.conf().getLayer()).getActivationFn().toString(); } else { afn = "n/a";
if ((l instanceof BaseLayer) && ((BaseLayer) l).getActivationFn() == null) { try { JsonNode jsonNode = mapper.readTree(json); ((BaseLayer) l).setActivationFn(ia);
for (Layer layer : layers) { if (layer.conf().getLayer() instanceof BaseLayer) { lrs.add(((BaseLayer) layer.conf().getLayer()).getLearningRate()); } else { lrs.add(0.0);
activationFn = bl.getActivationFn().toString(); int nParams = layer.initializer().numParams(nnc); layerInfoRows.add(new String[] {i18N.getMessage("train.model.layerinfotable.layerNParams"), String.valueOf(nParams)}); if (nParams > 0) { WeightInit wi = bl.getWeightInit(); String str = wi.toString(); if (wi == WeightInit.DISTRIBUTION) { str += bl.getDist(); IUpdater u = bl.getIUpdater(); String us = (u == null ? "" : u.getClass().getSimpleName()); layerInfoRows.add(new String[] {i18N.getMessage("train.model.layerinfotable.layerUpdater"),
afn = ((BaseLayer) layer.conf().getLayer()).getActivationFn().toString(); } else { afn = "n/a";