/** Automatically sets the learning rate to one that would be good */ public void setLearningRateByLikelihood(InstanceList trainingSample) { int numIterations = 5; // was 10 -akm 1/25/08 double bestLearningRate = Double.NEGATIVE_INFINITY; double bestLikelihoodChange = Double.NEGATIVE_INFINITY; double currLearningRate = 5e-11; while (currLearningRate < 1) { currLearningRate *= 2; crf.parameters.zero(); double beforeLikelihood = computeLikelihood(trainingSample); double likelihoodChange = trainSample(trainingSample, numIterations, currLearningRate) - beforeLikelihood; System.out.println("likelihood change = " + likelihoodChange + " for learningrate=" + currLearningRate); if (likelihoodChange > bestLikelihoodChange) { bestLikelihoodChange = likelihoodChange; bestLearningRate = currLearningRate; } } // reset the parameters crf.parameters.zero(); // conservative estimate for learning rate bestLearningRate /= 2; System.out.println("Setting learning rate to " + bestLearningRate); setLearningRate(bestLearningRate); }
@SuppressWarnings("unchecked") protected void gatherConstraints( PRAuxiliaryModel auxModel, double[][][][] cachedDots) { initialProbList = new ArrayList<double[]>(); finalProbList = new ArrayList<double[]>(); transitionProbList = new ArrayList<double[][][]>(); constraints = new CRF.Factors(crf.getParameters()); expectations = new CRF.Factors(crf.getParameters()); constraints.zero(); for (int ii = 0; ii < trainingSet.size(); ii++) { Instance inst = trainingSet.get(ii); Sequence input = (Sequence) inst.getData(); SumLatticePR geLatt = new SumLatticePR(crf, ii, input, null, auxModel, cachedDots[ii], false, null, null, true); double gammas[][] = geLatt.getGammas(); double initialProbs[] = toProbabilities(gammas[0]); initialProbList.add(initialProbs); double finalProbs[] = toProbabilities(gammas[gammas.length - 1]); finalProbList.add(finalProbs); double transitionProbs[][][] = geLatt.getXis(); toProbabilities(transitionProbs); transitionProbList.add(transitionProbs); new SumLatticeKL(crf, input, initialProbs, finalProbs, transitionProbs, null, constraints.new Incrementor()); } }
/** Automatically sets the learning rate to one that would be good */ public void setLearningRateByLikelihood(InstanceList trainingSample) { int numIterations = 5; // was 10 -akm 1/25/08 double bestLearningRate = Double.NEGATIVE_INFINITY; double bestLikelihoodChange = Double.NEGATIVE_INFINITY; double currLearningRate = 5e-11; while (currLearningRate < 1) { currLearningRate *= 2; crf.parameters.zero(); double beforeLikelihood = computeLikelihood(trainingSample); double likelihoodChange = trainSample(trainingSample, numIterations, currLearningRate) - beforeLikelihood; System.out.println("likelihood change = " + likelihoodChange + " for learningrate=" + currLearningRate); if (likelihoodChange > bestLikelihoodChange) { bestLikelihoodChange = likelihoodChange; bestLearningRate = currLearningRate; } } // reset the parameters crf.parameters.zero(); // conservative estimate for learning rate bestLearningRate /= 2; System.out.println("Setting learning rate to " + bestLearningRate); setLearningRate(bestLearningRate); }
/** * Adjust the parameters by learning rate according to the gradient of this * single Instance, and return the true label sequence likelihood. */ public double trainIncrementalLikelihood(Instance trainingInstance, double rate) { double singleLoglik; constraints.zero(); expectations.zero(); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); singleLoglik = new SumLatticeDefault(crf, fvs, labelSequence, constraints.new Incrementor()).getTotalWeight(); singleLoglik -= new SumLatticeDefault(crf, fvs, null, expectations.new Incrementor()).getTotalWeight(); // Calculate parameter gradient given these instances: (constraints - // expectations) constraints.plusEquals(expectations, -1); // Change the parameters a little by this difference, obeying // weightsFrozen crf.parameters.plusEquals(constraints, rate, true); return singleLoglik; } }
@SuppressWarnings("unchecked") protected void gatherConstraints( PRAuxiliaryModel auxModel, double[][][][] cachedDots) { initialProbList = new ArrayList<double[]>(); finalProbList = new ArrayList<double[]>(); transitionProbList = new ArrayList<double[][][]>(); constraints = new CRF.Factors(crf.getParameters()); expectations = new CRF.Factors(crf.getParameters()); constraints.zero(); for (int ii = 0; ii < trainingSet.size(); ii++) { Instance inst = trainingSet.get(ii); Sequence input = (Sequence) inst.getData(); SumLatticePR geLatt = new SumLatticePR(crf, ii, input, null, auxModel, cachedDots[ii], false, null, null, true); double gammas[][] = geLatt.getGammas(); double initialProbs[] = toProbabilities(gammas[0]); initialProbList.add(initialProbs); double finalProbs[] = toProbabilities(gammas[gammas.length - 1]); finalProbList.add(finalProbs); double transitionProbs[][][] = geLatt.getXis(); toProbabilities(transitionProbs); transitionProbList.add(transitionProbs); new SumLatticeKL(crf, input, initialProbs, finalProbs, transitionProbs, null, constraints.new Incrementor()); } }
/** * Adjust the parameters by learning rate according to the gradient of this * single Instance, and return the true label sequence likelihood. */ public double trainIncrementalLikelihood(Instance trainingInstance, double rate) { double singleLoglik; constraints.zero(); expectations.zero(); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); singleLoglik = new SumLatticeDefault(crf, fvs, labelSequence, constraints.new Incrementor()).getTotalWeight(); singleLoglik -= new SumLatticeDefault(crf, fvs, null, expectations.new Incrementor()).getTotalWeight(); // Calculate parameter gradient given these instances: (constraints - // expectations) constraints.plusEquals(expectations, -1); // Change the parameters a little by this difference, obeying // weightsFrozen crf.parameters.plusEquals(constraints, rate, true); return singleLoglik; } }
@SuppressWarnings("unchecked") protected void gatherConstraints( PRAuxiliaryModel auxModel, double[][][][] cachedDots) { initialProbList = new ArrayList<double[]>(); finalProbList = new ArrayList<double[]>(); transitionProbList = new ArrayList<double[][][]>(); constraints = new CRF.Factors(crf.getParameters()); expectations = new CRF.Factors(crf.getParameters()); constraints.zero(); for (int ii = 0; ii < trainingSet.size(); ii++) { Instance inst = trainingSet.get(ii); Sequence input = (Sequence) inst.getData(); SumLatticePR geLatt = new SumLatticePR(crf, ii, input, null, auxModel, cachedDots[ii], false, null, null, true); double gammas[][] = geLatt.getGammas(); double initialProbs[] = toProbabilities(gammas[0]); initialProbList.add(initialProbs); double finalProbs[] = toProbabilities(gammas[gammas.length - 1]); finalProbList.add(finalProbs); double transitionProbs[][][] = geLatt.getXis(); toProbabilities(transitionProbs); transitionProbList.add(transitionProbs); new SumLatticeKL(crf, input, initialProbs, finalProbs, transitionProbs, null, constraints.new Incrementor()); } }
/** * Adjust the parameters by learning rate according to the gradient of this * single Instance, and return the true label sequence likelihood. */ public double trainIncrementalLikelihood(Instance trainingInstance, double rate) { double singleLoglik; constraints.zero(); expectations.zero(); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); singleLoglik = new SumLatticeDefault(crf, fvs, labelSequence, constraints.new Incrementor()).getTotalWeight(); singleLoglik -= new SumLatticeDefault(crf, fvs, null, expectations.new Incrementor()).getTotalWeight(); // Calculate parameter gradient given these instances: (constraints - // expectations) constraints.plusEquals(expectations, -1); // Change the parameters a little by this difference, obeying // weightsFrozen crf.parameters.plusEquals(constraints, rate, true); return singleLoglik; } }
/** * Set the constraints by running forward-backward with the <i>output label * sequence provided</i>, thus restricting it to only those paths that agree with * the label sequence. */ protected void gatherConstraints(InstanceList ilist) { logger.info("Gathering constraints..."); assert (constraints.structureMatches(crf.parameters)); constraints.zero(); for (Instance instance : ilist) { FeatureVectorSequence input = (FeatureVectorSequence) instance.getData(); FeatureSequence output = (FeatureSequence) instance.getTarget(); double instanceWeight = ilist.getInstanceWeight(instance); Transducer.Incrementor incrementor = instanceWeight == 1.0 ? constraints.new Incrementor() : constraints.new WeightedIncrementor(instanceWeight); new SumLatticeDefault (this.crf, input, output, incrementor); } constraints.assertNotNaNOrInfinite(); }
/** * Resets, computes and fills expectations from all instances, also updating * the entropy value. <p> * * Analogous to <tt>CRFOptimizableByLabelLikelihood.getExpectationValue<tt>. */ public void computeExpectations() { expectations.zero(); // now, update the expectations due to each instance for entropy reg. for (int ii = 0; ii < data.size(); ii++) { FeatureVectorSequence input = (FeatureVectorSequence) data.get(ii).getData(); SumLattice lattice = new SumLatticeDefault(crf,input, true); // udpate the expectations EntropyLattice entropyLattice = new EntropyLattice( input, lattice.getGammas(), lattice.getXis(), crf, incrementor, scalingFactor); cachedValue += entropyLattice.getEntropy(); } }
/** * Resets, computes and fills expectations from all instances, also updating * the entropy value. <p> * * Analogous to <tt>CRFOptimizableByLabelLikelihood.getExpectationValue<tt>. */ public void computeExpectations() { expectations.zero(); // now, update the expectations due to each instance for entropy reg. for (int ii = 0; ii < data.size(); ii++) { FeatureVectorSequence input = (FeatureVectorSequence) data.get(ii).getData(); SumLattice lattice = new SumLatticeDefault(crf,input, true); // udpate the expectations EntropyLattice entropyLattice = new EntropyLattice( input, lattice.getGammas(), lattice.getXis(), crf, incrementor, scalingFactor); cachedValue += entropyLattice.getEntropy(); } }
/** * Set the constraints by running forward-backward with the <i>output label * sequence provided</i>, thus restricting it to only those paths that agree with * the label sequence. */ protected void gatherConstraints(InstanceList ilist) { logger.info("Gathering constraints..."); assert (constraints.structureMatches(crf.parameters)); constraints.zero(); for (Instance instance : ilist) { FeatureVectorSequence input = (FeatureVectorSequence) instance.getData(); FeatureSequence output = (FeatureSequence) instance.getTarget(); double instanceWeight = ilist.getInstanceWeight(instance); Transducer.Incrementor incrementor = instanceWeight == 1.0 ? constraints.new Incrementor() : constraints.new WeightedIncrementor(instanceWeight); new SumLatticeDefault (this.crf, input, output, incrementor); } constraints.assertNotNaNOrInfinite(); }
protected void gatherConstraints (InstanceList ilist) { // Set the constraints by running forward-backward with the *output // label sequence provided*, thus restricting it to only those // paths that agree with the label sequence. // Zero the constraints[] // Reset constraints[] to zero before we fill them again assert (constraints.structureMatches(crf.parameters)); constraints.zero(); for (Instance instance : ilist) { FeatureVectorSequence input = (FeatureVectorSequence) instance.getData(); FeatureSequence output = (FeatureSequence) instance.getTarget(); double instanceWeight = ilist.getInstanceWeight(instance); //System.out.println ("Constraint-gathering on instance "+i+" of "+ilist.size()); Transducer.Incrementor incrementor = instanceWeight == 1.0 ? constraints.new Incrementor() : constraints.new WeightedIncrementor(instanceWeight); new SumLatticeDefault (this.crf, input, output, incrementor); } // System.out.println ("testing Value and Gradient"); // TestOptimizable.testValueAndGradientCurrentParameters (this); }
protected void gatherConstraints (InstanceList ilist) { // Set the constraints by running forward-backward with the *output // label sequence provided*, thus restricting it to only those // paths that agree with the label sequence. // Zero the constraints[] // Reset constraints[] to zero before we fill them again assert (constraints.structureMatches(crf.parameters)); constraints.zero(); for (Instance instance : ilist) { FeatureVectorSequence input = (FeatureVectorSequence) instance.getData(); FeatureSequence output = (FeatureSequence) instance.getTarget(); double instanceWeight = ilist.getInstanceWeight(instance); //System.out.println ("Constraint-gathering on instance "+i+" of "+ilist.size()); Transducer.Incrementor incrementor = instanceWeight == 1.0 ? constraints.new Incrementor() : constraints.new WeightedIncrementor(instanceWeight); new SumLatticeDefault (this.crf, input, output, incrementor); } // System.out.println ("testing Value and Gradient"); // TestOptimizable.testValueAndGradientCurrentParameters (this); }
protected void gatherConstraints (InstanceList ilist) { // Set the constraints by running forward-backward with the *output // label sequence provided*, thus restricting it to only those // paths that agree with the label sequence. // Zero the constraints[] // Reset constraints[] to zero before we fill them again assert (constraints.structureMatches(crf.parameters)); constraints.zero(); for (Instance instance : ilist) { FeatureVectorSequence input = (FeatureVectorSequence) instance.getData(); FeatureSequence output = (FeatureSequence) instance.getTarget(); double instanceWeight = ilist.getInstanceWeight(instance); //System.out.println ("Constraint-gathering on instance "+i+" of "+ilist.size()); Transducer.Incrementor incrementor = instanceWeight == 1.0 ? constraints.new Incrementor() : constraints.new WeightedIncrementor(instanceWeight); new SumLatticeDefault (this.crf, input, output, incrementor); } // System.out.println ("testing Value and Gradient"); // TestOptimizable.testValueAndGradientCurrentParameters (this); }
/** * Set the constraints by running forward-backward with the <i>output label * sequence provided</i>, thus restricting it to only those paths that agree with * the label sequence. */ protected void gatherConstraints(InstanceList ilist) { logger.info("Gathering constraints..."); assert (constraints.structureMatches(crf.parameters)); constraints.zero(); for (Instance instance : ilist) { FeatureVectorSequence input = (FeatureVectorSequence) instance.getData(); FeatureSequence output = (FeatureSequence) instance.getTarget(); double instanceWeight = ilist.getInstanceWeight(instance); Transducer.Incrementor incrementor = instanceWeight == 1.0 ? constraints.new Incrementor() : constraints.new WeightedIncrementor(instanceWeight); new SumLatticeDefault (this.crf, input, output, incrementor); } constraints.assertNotNaNOrInfinite(); }
/** * Resets, computes and fills expectations from all instances, also updating * the entropy value. <p> * * Analogous to <tt>CRFOptimizableByLabelLikelihood.getExpectationValue<tt>. */ public void computeExpectations() { expectations.zero(); // now, update the expectations due to each instance for entropy reg. for (int ii = 0; ii < data.size(); ii++) { FeatureVectorSequence input = (FeatureVectorSequence) data.get(ii).getData(); SumLattice lattice = new SumLatticeDefault(crf,input, true); // udpate the expectations EntropyLattice entropyLattice = new EntropyLattice( input, lattice.getGammas(), lattice.getXis(), crf, incrementor, scalingFactor); cachedValue += entropyLattice.getEntropy(); } }
private double computeLikelihood(InstanceList trainingSample) { double loglik = 0.0; for (int i = 0; i < trainingSample.size(); i++) { Instance trainingInstance = trainingSample.get(i); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); loglik += new SumLatticeDefault(crf, fvs, labelSequence, null) .getTotalWeight(); loglik -= new SumLatticeDefault(crf, fvs, null, null) .getTotalWeight(); } constraints.zero(); expectations.zero(); return loglik; }
private double computeLikelihood(InstanceList trainingSample) { double loglik = 0.0; for (int i = 0; i < trainingSample.size(); i++) { Instance trainingInstance = trainingSample.get(i); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); loglik += new SumLatticeDefault(crf, fvs, labelSequence, null) .getTotalWeight(); loglik -= new SumLatticeDefault(crf, fvs, null, null) .getTotalWeight(); } constraints.zero(); expectations.zero(); return loglik; }
private double computeLikelihood(InstanceList trainingSample) { double loglik = 0.0; for (int i = 0; i < trainingSample.size(); i++) { Instance trainingInstance = trainingSample.get(i); FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance .getData(); Sequence labelSequence = (Sequence) trainingInstance.getTarget(); loglik += new SumLatticeDefault(crf, fvs, labelSequence, null) .getTotalWeight(); loglik -= new SumLatticeDefault(crf, fvs, null, null) .getTotalWeight(); } constraints.zero(); expectations.zero(); return loglik; }