@SuppressWarnings("unchecked") protected double getExpectationValue() { expectations.zero(); for (int taskIndex = 0; taskIndex < numThreads; taskIndex++) { CRF.Factors exCopy = new CRF.Factors(expectations); tasks.add(new ExpectationTask(start,end,exCopy)); this.expectations.plusEquals(((ExpectationTask)task).getExpectationsCopy(), 1);
public void getBatchValueGradient(double[] buffer, int batchIndex, int[] batchAssignments) { assert(batchIndex < this.numBatches) : "Incorrect batch index: " + batchIndex + ", range(0, " + this.numBatches + ")"; assert(batchAssignments.length == 2 && batchAssignments[0] <= batchAssignments[1]) : "Invalid batch assignments: " + Arrays.toString(batchAssignments); CRF.Factors batchExpectations = expectations.get(batchIndex); if (batchIndex == numBatches-1) { // crf parameters' check has to be done only once, infinite values are allowed crf.parameters.assertNotNaN(); // factor the constraints and the prior into the expectations of last batch // Gradient = (constraints - expectations + prior) = -(expectations - constraints - prior) // The minus sign is factored in combineGradients method after all gradients are computed batchExpectations.plusEquals(constraints, -1.0); if (usingHyperbolicPrior) batchExpectations.plusEqualsHyperbolicPriorGradient(crf.parameters, -hyperbolicPriorSlope, hyperbolicPriorSharpness); else batchExpectations.plusEqualsGaussianPriorGradient(crf.parameters, -gaussianPriorVariance); batchExpectations.assertNotNaNOrInfinite(); } double[] gradient = cachedGradient.get(batchIndex); // set the cached gradient batchExpectations.getParameters(gradient); System.arraycopy(gradient, 0, buffer, 0, gradient.length); }
cachedGradient.zero(); cachedGradient.plusEquals(((GELatticeTask)task).getGradient(), 1); this.cachedValue += crf.getParameters().gaussianPrior(gpv); cachedGradient.plusEqualsGaussianPriorGradient(crf.getParameters(), gpv);
cachedGradient.zero(); cachedGradient.plusEquals(((GELatticeTask)task).getGradient(), 1); this.cachedValue += crf.getParameters().gaussianPrior(gpv); cachedGradient.plusEqualsGaussianPriorGradient(crf.getParameters(), gpv);
cachedGradient.zero(); cachedGradient.plusEquals(((GELatticeTask)task).getGradient(), 1); this.cachedValue += crf.getParameters().gaussianPrior(gpv); cachedGradient.plusEqualsGaussianPriorGradient(crf.getParameters(), gpv);
@SuppressWarnings("unchecked") protected double getExpectationValue() { expectations.zero(); for (int taskIndex = 0; taskIndex < numThreads; taskIndex++) { CRF.Factors exCopy = new CRF.Factors(expectations); tasks.add(new ExpectationTask(start,end,exCopy)); this.expectations.plusEquals(((ExpectationTask)task).getExpectationsCopy(), 1);
expectations.plusEquals(constraints, -1.0); if (usingHyperbolicPrior) expectations.plusEqualsHyperbolicPriorGradient(crf.parameters, -hyperbolicPriorSlope, hyperbolicPriorSharpness); else expectations.plusEqualsGaussianPriorGradient(crf.parameters, -gaussianPriorVariance); expectations.assertNotNaNOrInfinite(); expectations.getParameters(cachedGradient); MatrixOps.timesEquals (cachedGradient, -1.0); // This implements the -(...) in the above comment
expectations.plusEquals(constraints, -1.0); if (usingHyperbolicPrior) expectations.plusEqualsHyperbolicPriorGradient(crf.parameters, -hyperbolicPriorSlope, hyperbolicPriorSharpness); else expectations.plusEqualsGaussianPriorGradient(crf.parameters, -gaussianPriorVariance); expectations.assertNotNaNOrInfinite(); expectations.getParameters(cachedGradient); MatrixOps.timesEquals (cachedGradient, -1.0); // This implements the -(...) in the above comment
expectations.plusEquals(constraints, -1.0); if (usingHyperbolicPrior) expectations.plusEqualsHyperbolicPriorGradient(crf.parameters, -hyperbolicPriorSlope, hyperbolicPriorSharpness); else expectations.plusEqualsGaussianPriorGradient(crf.parameters, -gaussianPriorVariance); expectations.assertNotNaNOrInfinite(); expectations.getParameters(cachedGradient); MatrixOps.timesEquals (cachedGradient, -1.0); // This implements the -(...) in the above comment
/** * Adjust the parameters by learning rate according to the gradient of this * single Instance, and return the true label sequence likelihood. */ public double trainIncrementalLikelihood(Instance trainingInstance, double rate) { double singleLoglik; constraints.zero(); expectations.zero(); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); singleLoglik = new SumLatticeDefault(crf, fvs, labelSequence, constraints.new Incrementor()).getTotalWeight(); singleLoglik -= new SumLatticeDefault(crf, fvs, null, expectations.new Incrementor()).getTotalWeight(); // Calculate parameter gradient given these instances: (constraints - // expectations) constraints.plusEquals(expectations, -1); // Change the parameters a little by this difference, obeying // weightsFrozen crf.parameters.plusEquals(constraints, rate, true); return singleLoglik; } }
/** * Adjust the parameters by learning rate according to the gradient of this * single Instance, and return the true label sequence likelihood. */ public double trainIncrementalLikelihood(Instance trainingInstance, double rate) { double singleLoglik; constraints.zero(); expectations.zero(); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); singleLoglik = new SumLatticeDefault(crf, fvs, labelSequence, constraints.new Incrementor()).getTotalWeight(); singleLoglik -= new SumLatticeDefault(crf, fvs, null, expectations.new Incrementor()).getTotalWeight(); // Calculate parameter gradient given these instances: (constraints - // expectations) constraints.plusEquals(expectations, -1); // Change the parameters a little by this difference, obeying // weightsFrozen crf.parameters.plusEquals(constraints, rate, true); return singleLoglik; } }
/** * Adjust the parameters by learning rate according to the gradient of this * single Instance, and return the true label sequence likelihood. */ public double trainIncrementalLikelihood(Instance trainingInstance, double rate) { double singleLoglik; constraints.zero(); expectations.zero(); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); singleLoglik = new SumLatticeDefault(crf, fvs, labelSequence, constraints.new Incrementor()).getTotalWeight(); singleLoglik -= new SumLatticeDefault(crf, fvs, null, expectations.new Incrementor()).getTotalWeight(); // Calculate parameter gradient given these instances: (constraints - // expectations) constraints.plusEquals(expectations, -1); // Change the parameters a little by this difference, obeying // weightsFrozen crf.parameters.plusEquals(constraints, rate, true); return singleLoglik; } }
/** Returns the log probability of the training sequence labels and the prior over parameters. */ public double getValue () { if (crf.weightsValueChangeStamp != cachedValueWeightsStamp) { // The cached value is not up to date; it was calculated for a different set of CRF weights. cachedValueWeightsStamp = crf.weightsValueChangeStamp; // cachedValue will soon no longer be stale long startingTime = System.currentTimeMillis(); //crf.print(); // Get the value of all the all the true labels, also filling in expectations at the same time. cachedValue = getExpectationValue (); // Incorporate prior on parameters if (usingHyperbolicPrior) // Hyperbolic prior cachedValue += crf.parameters.hyberbolicPrior(hyperbolicPriorSlope, hyperbolicPriorSharpness); else // Gaussian prior cachedValue += crf.parameters.gaussianPrior(gaussianPriorVariance); // gsc: make sure the prior gives a correct value assert(!(Double.isNaN(cachedValue) || Double.isInfinite(cachedValue))) : "Label likelihood is NaN/Infinite"; logger.info ("getValue() (loglikelihood, optimizable by label likelihood) = "+cachedValue); long endingTime = System.currentTimeMillis(); logger.fine ("Inference milliseconds = "+(endingTime - startingTime)); } return cachedValue; }
/** Returns the log probability of the training sequence labels and the prior over parameters. */ public double getValue () { if (crf.weightsValueChangeStamp != cachedValueWeightsStamp) { // The cached value is not up to date; it was calculated for a different set of CRF weights. cachedValueWeightsStamp = crf.weightsValueChangeStamp; // cachedValue will soon no longer be stale long startingTime = System.currentTimeMillis(); //crf.print(); // Get the value of all the all the true labels, also filling in expectations at the same time. cachedValue = getExpectationValue (); // Incorporate prior on parameters if (usingHyperbolicPrior) // Hyperbolic prior cachedValue += crf.parameters.hyberbolicPrior(hyperbolicPriorSlope, hyperbolicPriorSharpness); else // Gaussian prior cachedValue += crf.parameters.gaussianPrior(gaussianPriorVariance); // gsc: make sure the prior gives a correct value assert(!(Double.isNaN(cachedValue) || Double.isInfinite(cachedValue))) : "Label likelihood is NaN/Infinite"; logger.info ("getValue() (loglikelihood, optimizable by label likelihood) = "+cachedValue); long endingTime = System.currentTimeMillis(); logger.fine ("Inference milliseconds = "+(endingTime - startingTime)); } return cachedValue; }
/** * Returns the log probability of a batch of training sequence labels and the prior over * parameters, if last batch then incorporate the prior on parameters as well. */ public double getBatchValue(int batchIndex, int[] batchAssignments) { assert(batchIndex < this.numBatches) : "Incorrect batch index: " + batchIndex + ", range(0, " + this.numBatches + ")"; assert(batchAssignments.length == 2 && batchAssignments[0] <= batchAssignments[1]) : "Invalid batch assignments: " + Arrays.toString(batchAssignments); // Get the value of all the true labels for current batch, also filling in expectations double value = getExpectationValue(batchIndex, batchAssignments); if (batchIndex == numBatches-1) { if (usingHyperbolicPrior) // Hyperbolic prior value += crf.parameters.hyberbolicPrior(hyperbolicPriorSlope, hyperbolicPriorSharpness); else // Gaussian prior value += crf.parameters.gaussianPrior(gaussianPriorVariance); } assert(!(Double.isNaN(value) || Double.isInfinite(value))) : "Label likelihood is NaN/Infinite, batchIndex: " + batchIndex + "batchAssignments: " + Arrays.toString(batchAssignments); // update cache cachedValue[batchIndex] = value; return value; }
public void getBatchValueGradient(double[] buffer, int batchIndex, int[] batchAssignments) { assert(batchIndex < this.numBatches) : "Incorrect batch index: " + batchIndex + ", range(0, " + this.numBatches + ")"; assert(batchAssignments.length == 2 && batchAssignments[0] <= batchAssignments[1]) : "Invalid batch assignments: " + Arrays.toString(batchAssignments); CRF.Factors batchExpectations = expectations.get(batchIndex); if (batchIndex == numBatches-1) { // crf parameters' check has to be done only once, infinite values are allowed crf.parameters.assertNotNaN(); // factor the constraints and the prior into the expectations of last batch // Gradient = (constraints - expectations + prior) = -(expectations - constraints - prior) // The minus sign is factored in combineGradients method after all gradients are computed batchExpectations.plusEquals(constraints, -1.0); if (usingHyperbolicPrior) batchExpectations.plusEqualsHyperbolicPriorGradient(crf.parameters, -hyperbolicPriorSlope, hyperbolicPriorSharpness); else batchExpectations.plusEqualsGaussianPriorGradient(crf.parameters, -gaussianPriorVariance); batchExpectations.assertNotNaNOrInfinite(); } double[] gradient = cachedGradient.get(batchIndex); // set the cached gradient batchExpectations.getParameters(gradient); System.arraycopy(gradient, 0, buffer, 0, gradient.length); }
/** * Returns the log probability of a batch of training sequence labels and the prior over * parameters, if last batch then incorporate the prior on parameters as well. */ public double getBatchValue(int batchIndex, int[] batchAssignments) { assert(batchIndex < this.numBatches) : "Incorrect batch index: " + batchIndex + ", range(0, " + this.numBatches + ")"; assert(batchAssignments.length == 2 && batchAssignments[0] <= batchAssignments[1]) : "Invalid batch assignments: " + Arrays.toString(batchAssignments); // Get the value of all the true labels for current batch, also filling in expectations double value = getExpectationValue(batchIndex, batchAssignments); if (batchIndex == numBatches-1) { if (usingHyperbolicPrior) // Hyperbolic prior value += crf.parameters.hyberbolicPrior(hyperbolicPriorSlope, hyperbolicPriorSharpness); else // Gaussian prior value += crf.parameters.gaussianPrior(gaussianPriorVariance); } assert(!(Double.isNaN(value) || Double.isInfinite(value))) : "Label likelihood is NaN/Infinite, batchIndex: " + batchIndex + "batchAssignments: " + Arrays.toString(batchAssignments); // update cache cachedValue[batchIndex] = value; return value; }
/** Returns the log probability of the training sequence labels and the prior over parameters. */ public double getValue () { if (crf.weightsValueChangeStamp != cachedValueWeightsStamp) { // The cached value is not up to date; it was calculated for a different set of CRF weights. cachedValueWeightsStamp = crf.weightsValueChangeStamp; // cachedValue will soon no longer be stale long startingTime = System.currentTimeMillis(); //crf.print(); // Get the value of all the all the true labels, also filling in expectations at the same time. cachedValue = getExpectationValue (); // Incorporate prior on parameters if (usingHyperbolicPrior) // Hyperbolic prior cachedValue += crf.parameters.hyberbolicPrior(hyperbolicPriorSlope, hyperbolicPriorSharpness); else // Gaussian prior cachedValue += crf.parameters.gaussianPrior(gaussianPriorVariance); // gsc: make sure the prior gives a correct value assert(!(Double.isNaN(cachedValue) || Double.isInfinite(cachedValue))) : "Label likelihood is NaN/Infinite"; logger.info ("getValue() (loglikelihood, optimizable by label likelihood) = "+cachedValue); long endingTime = System.currentTimeMillis(); logger.fine ("Inference milliseconds = "+(endingTime - startingTime)); } return cachedValue; }
/** * Returns the log probability of a batch of training sequence labels and the prior over * parameters, if last batch then incorporate the prior on parameters as well. */ public double getBatchValue(int batchIndex, int[] batchAssignments) { assert(batchIndex < this.numBatches) : "Incorrect batch index: " + batchIndex + ", range(0, " + this.numBatches + ")"; assert(batchAssignments.length == 2 && batchAssignments[0] <= batchAssignments[1]) : "Invalid batch assignments: " + Arrays.toString(batchAssignments); // Get the value of all the true labels for current batch, also filling in expectations double value = getExpectationValue(batchIndex, batchAssignments); if (batchIndex == numBatches-1) { if (usingHyperbolicPrior) // Hyperbolic prior value += crf.parameters.hyberbolicPrior(hyperbolicPriorSlope, hyperbolicPriorSharpness); else // Gaussian prior value += crf.parameters.gaussianPrior(gaussianPriorVariance); } assert(!(Double.isNaN(value) || Double.isInfinite(value))) : "Label likelihood is NaN/Infinite, batchIndex: " + batchIndex + "batchAssignments: " + Arrays.toString(batchAssignments); // update cache cachedValue[batchIndex] = value; return value; }
/** * Set the constraints by running forward-backward with the <i>output label * sequence provided</i>, thus restricting it to only those paths that agree with * the label sequence. */ protected void gatherConstraints(InstanceList ilist) { logger.info("Gathering constraints..."); assert (constraints.structureMatches(crf.parameters)); constraints.zero(); for (Instance instance : ilist) { FeatureVectorSequence input = (FeatureVectorSequence) instance.getData(); FeatureSequence output = (FeatureSequence) instance.getTarget(); double instanceWeight = ilist.getInstanceWeight(instance); Transducer.Incrementor incrementor = instanceWeight == 1.0 ? constraints.new Incrementor() : constraints.new WeightedIncrementor(instanceWeight); new SumLatticeDefault (this.crf, input, output, incrementor); } constraints.assertNotNaNOrInfinite(); }