public CRFExtractor (CRF crf, Pipe tokpipe, TokenizationFilter filter, String backgroundTag) { this.crf = crf; tokenizationPipe = tokpipe; featurePipe = (Pipe) crf.getInputPipe (); this.filter = filter; this.backgroundTag = backgroundTag; }
public CRFExtractor (CRF crf, Pipe tokpipe, TokenizationFilter filter, String backgroundTag) { this.crf = crf; tokenizationPipe = tokpipe; featurePipe = (Pipe) crf.getInputPipe (); this.filter = filter; this.backgroundTag = backgroundTag; }
public CRFExtractor (CRF crf, Pipe tokpipe, TokenizationFilter filter, String backgroundTag) { this.crf = crf; tokenizationPipe = tokpipe; featurePipe = (Pipe) crf.getInputPipe (); this.filter = filter; this.backgroundTag = backgroundTag; }
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject (); int version = in.readInt (); if ((version == 0) || (featurePipe == null)) { featurePipe = (Pipe) crf.getInputPipe (); } if (version < 2) { filter = new BIOTokenizationFilter (); } }
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject (); int version = in.readInt (); if ((version == 0) || (featurePipe == null)) { featurePipe = (Pipe) crf.getInputPipe (); } if (version < 2) { filter = new BIOTokenizationFilter (); } }
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject (); int version = in.readInt (); if ((version == 0) || (featurePipe == null)) { featurePipe = (Pipe) crf.getInputPipe (); } if (version < 2) { filter = new BIOTokenizationFilter (); } }
public void readModel(InputStream is) throws IOException, ClassNotFoundException { final GZIPInputStream gin = new GZIPInputStream(is); final ObjectInputStream ois = new ObjectInputStream(gin); model = (CRF) ois.readObject(); trained = true; model.getInputPipe().getDataAlphabet().stopGrowth(); ois.close(); }
/** Create a CRF whose states and weights are a copy of those from another CRF. */ public CRF (CRF other) { // This assumes that "other" has non-null inputPipe and outputPipe. We'd need to add another constructor to handle this if not. this (other.getInputPipe (), other.getOutputPipe ()); copyStatesAndWeightsFrom (other); assertWeightsLength (); }
private void setupClassifier(String trainingdata) { try { crf_input = new ObjectInputStream(ResourceUtils.loadResource( trainingdata, this.getClass())); crf = (CRF) crf_input.readObject(); crf_input.close(); } catch (FileNotFoundException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); crf_pipe = crf.getInputPipe(); crf_pipe.setTargetProcessing(false); crf_estimator = new ViterbiConfidenceEstimator(crf); }
/** Create a CRF whose states and weights are a copy of those from another CRF. */ public CRF (CRF other) { // This assumes that "other" has non-null inputPipe and outputPipe. We'd need to add another constructor to handle this if not. this (other.getInputPipe (), other.getOutputPipe ()); copyStatesAndWeightsFrom (other); assertWeightsLength (); }
/** Create a CRF whose states and weights are a copy of those from another CRF. */ public CRF (CRF other) { // This assumes that "other" has non-null inputPipe and outputPipe. We'd need to add another constructor to handle this if not. this (other.getInputPipe (), other.getOutputPipe ()); copyStatesAndWeightsFrom (other); assertWeightsLength (); }
public void skiptestOldCrf() { CRF crf = (CRF) FileUtils.readObject(new File(oldCrfFile)); Instance inst = crf.getInputPipe().instanceFrom( new Instance(testString, null, null, null)); Sequence output = crf.transduce((Sequence) inst.getData()); String std = output.toString(); assertEquals(" B-PER I-PER O O", std); }
static CRFExtractor hackCrfExtor (CRF crf) { Pipe[] newPipes = new Pipe [3]; SerialPipes pipes = (SerialPipes) crf.getInputPipe (); for (int i = 0; i < 3; i++) { Pipe p0 = pipes.getPipe (0); //pipes.removePipe (0); TODO Fix me //p0.setParent (null); newPipes[i] = p0; } Pipe tokPipe = new SerialPipes (newPipes); CRFExtractor extor = new CRFExtractor (crf, (Pipe)tokPipe); return extor; }
static CRFExtractor hackCrfExtor (CRF crf) { Pipe[] newPipes = new Pipe [3]; SerialPipes pipes = (SerialPipes) crf.getInputPipe (); for (int i = 0; i < 3; i++) { Pipe p0 = pipes.getPipe (0); //pipes.removePipe (0); TODO Fix me //p0.setParent (null); newPipes[i] = p0; } Pipe tokPipe = new SerialPipes (newPipes); CRFExtractor extor = new CRFExtractor (crf, (Pipe)tokPipe); return extor; }
public void skiptestOldCrf() { CRF crf = (CRF) FileUtils.readObject(new File(oldCrfFile)); Instance inst = crf.getInputPipe().instanceFrom( new Instance(testString, null, null, null)); Sequence output = crf.transduce((Sequence) inst.getData()); String std = output.toString(); assertEquals(" B-PER I-PER O O", std); }
private Sequence<String> getCoding(Word word, int expectedSylls) { Instance instance = new Instance(word, null, null, null); instance = crf.getInputPipe().instanceFrom(instance); Sequence inSeq = (Sequence) instance.getData(); List<Sequence<Object>> outSeqs = crf.getMaxLatticeFactory().newMaxLattice(crf, inSeq).bestOutputSequences(10); for (Sequence outSeq : outSeqs) { // see if the outSeq is legal and if so then return it if (isLegal(word.getValue(), outSeq, expectedSylls)) { return outSeq; } } // none legal? just return highest probability return (Sequence) outSeqs.get(0); }
public Set<Integer> tagSyllStarts(List<String> wordUnigrams) { Instance instance = new Instance(wordUnigrams, null, null, null); instance = crf.getInputPipe().instanceFrom(instance); Sequence inSeq = (Sequence) instance.getData(); Sequence<Object> outSeqs = crf.getMaxLatticeFactory().newMaxLattice(crf, inSeq).bestOutputSequence(); return SyllTagTrainer.startsFromGraphemeSyllEnding(outSeqs); } }
public Set<Integer> tagSyllStarts(List<String> wordUnigrams) { Instance instance = new Instance(wordUnigrams, null, null, null); instance = crf.getInputPipe().instanceFrom(instance); Sequence inSeq = (Sequence) instance.getData(); Sequence<Object> outSeqs = crf.getMaxLatticeFactory().newMaxLattice(crf, inSeq).bestOutputSequence(); return SyllTagTrainer.startsFromGraphemeSyllEnding(outSeqs); } }
@Override public List<Alignment> inferAlignments(Word x, int nBest) { Instance instance = new Instance(x.getValue(), null, null, null); instance = crf.getInputPipe().instanceFrom(instance); Sequence inSeq = (Sequence) instance.getData(); List<Sequence<Object>> outSeqs = crf.getMaxLatticeFactory().newMaxLattice(crf, inSeq).bestOutputSequences(nBest); ArrayList<Alignment> results = Lists.newArrayListWithCapacity(outSeqs.size()); double z = crf.getSumLatticeFactory().newSumLattice(crf, inSeq).getTotalWeight(); for (Sequence<Object> outSeq : outSeqs) { double score = crf.getSumLatticeFactory().newSumLattice(crf, inSeq, outSeq).getTotalWeight(); Alignment align = makeAlignment(x, outSeq, score - z); if (!resultsContain(align.getGraphones(), results)) { results.add(align); } } return results; }
@Override public List<Alignment> inferAlignments(Word x, int nBest) { Instance instance = new Instance(x.getValue(), null, null, null); instance = crf.getInputPipe().instanceFrom(instance); Sequence inSeq = (Sequence) instance.getData(); List<Sequence<Object>> outSeqs = crf.getMaxLatticeFactory().newMaxLattice(crf, inSeq).bestOutputSequences(nBest); ArrayList<Alignment> results = Lists.newArrayListWithCapacity(outSeqs.size()); double z = crf.getSumLatticeFactory().newSumLattice(crf, inSeq).getTotalWeight(); for (Sequence<Object> outSeq : outSeqs) { double score = crf.getSumLatticeFactory().newSumLattice(crf, inSeq, outSeq).getTotalWeight(); List<Pair<String, String>> graphemes = makeGraphemes(x, outSeq); if (!resultsContain(graphemes, results)) { results.add(new Alignment(x, graphemes, score - z)); } } return results; }