public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) { this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE, generatorDescriptor, resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); }
TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName); nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) {
factory.createContextGenerator(), factory.createSequenceCodec()); NameSampleSequenceStream ss = new NameSampleSequenceStream(samples, factory.createContextGenerator()); new NameSampleSequenceStream(samples, factory.createContextGenerator(), false); seqModel = trainer.train(ss); } else { return new TokenNameFinderModel(languageCode, seqModel, factory.getFeatureGenerator(), factory.getResources(), manifestInfoEntries, factory.getSequenceCodec(), factory); } else { return new TokenNameFinderModel(languageCode, nameFinderModel, beamSize, factory.getFeatureGenerator(), factory.getResources(), manifestInfoEntries, factory.getSequenceCodec(), factory);
public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, final Map<String, Object> resources, SequenceCodec<String> seqCodec) throws InvalidFormatException { TokenNameFinderFactory theFactory; if (subclassName == null) { // will create the default factory theFactory = new TokenNameFinderFactory(); } else { try { theFactory = ExtensionLoader.instantiateExtension( TokenNameFinderFactory.class, subclassName); } catch (Exception e) { String msg = "Could not instantiate the " + subclassName + ". The initialization throw an exception."; System.err.println(msg); e.printStackTrace(); throw new InvalidFormatException(msg, e); } } theFactory.init(featureGeneratorBytes, resources, seqCodec); return theFactory; }
public NameFinderME(TokenNameFinderModel model) { TokenNameFinderFactory factory = model.getFactory(); seqCodec = factory.createSequenceCodec(); sequenceValidator = seqCodec.createSequenceValidator(); this.model = model.getNameFinderSequenceModel(); contextGenerator = factory.createContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
params, TokenNameFinderFactory.create(null, featureGeneratorString.getBytes(), resources, new BioCodec()));
public SequenceCodec<String> createSequenceCodec() { if (artifactProvider != null) { String sequeceCodecImplName = artifactProvider.getManifestProperty( TokenNameFinderModel.SEQUENCE_CODEC_CLASS_NAME_PARAMETER); return instantiateSequenceCodec(sequeceCodecImplName); } else { return seqCodec; } }
private boolean isModelValid(MaxentModel model) { String[] outcomes = new String[model.getNumOutcomes()]; for (int i = 0; i < model.getNumOutcomes(); i++) { outcomes[i] = model.getOutcome(i); } return getFactory().createSequenceCodec().areOutcomesCompatible(outcomes); }
public NameContextGenerator createContextGenerator() { AdaptiveFeatureGenerator featureGenerator = createFeatureGenerators(); if (featureGenerator == null) { featureGenerator = new CachedFeatureGenerator( new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), new OutcomePriorFeatureGenerator(), new PreviousMapFeatureGenerator(), new BigramNameFeatureGenerator(), new SentenceFeatureGenerator(true, false)); } return new DefaultNameContextGenerator(featureGenerator); }
public SequenceCodec<String> getSequenceCodec() { return this.getFactory().getSequenceCodec(); }
public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, final Map<String, Object> resources, SequenceCodec<String> seqCodec) throws InvalidFormatException { TokenNameFinderFactory theFactory; if (subclassName == null) { // will create the default factory theFactory = new TokenNameFinderFactory(); } else { try { theFactory = ExtensionLoader.instantiateExtension( TokenNameFinderFactory.class, subclassName); } catch (Exception e) { String msg = "Could not instantiate the " + subclassName + ". The initialization throw an exception."; System.err.println(msg); e.printStackTrace(); throw new InvalidFormatException(msg, e); } } theFactory.init(featureGeneratorBytes, resources, seqCodec); return theFactory; }
@Test public void testOnlyWithNamesTypeOverride() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]); Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]); Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
public NameFinderME(TokenNameFinderModel model) { TokenNameFinderFactory factory = model.getFactory(); seqCodec = factory.createSequenceCodec(); sequenceValidator = seqCodec.createSequenceValidator(); this.model = model.getNameFinderSequenceModel(); contextGenerator = factory.createContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
public SequenceCodec<String> createSequenceCodec() { if (artifactProvider != null) { String sequeceCodecImplName = artifactProvider.getManifestProperty( TokenNameFinderModel.SEQUENCE_CODEC_CLASS_NAME_PARAMETER); return instantiateSequenceCodec(sequeceCodecImplName); } else { return seqCodec; } }
private boolean isModelValid(MaxentModel model) { String[] outcomes = new String[model.getNumOutcomes()]; for (int i = 0; i < model.getNumOutcomes(); i++) { outcomes[i] = model.getOutcome(i); } return getFactory().createSequenceCodec().areOutcomesCompatible(outcomes); }
public NameContextGenerator createContextGenerator() { AdaptiveFeatureGenerator featureGenerator = createFeatureGenerators(); if (featureGenerator == null) { featureGenerator = new CachedFeatureGenerator( new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), new OutcomePriorFeatureGenerator(), new PreviousMapFeatureGenerator(), new BigramNameFeatureGenerator(), new SentenceFeatureGenerator(true, false)); } return new DefaultNameContextGenerator(featureGenerator); }
public SequenceCodec<String> getSequenceCodec() { return this.getFactory().getSequenceCodec(); }
TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName); nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) {
private TokenNameFinderModel train(File trainFile, LANGUAGE lang, TrainingParameters params, int types) throws IOException { ObjectStream<NameSample> samples = new Conll02NameSampleStream( lang,new MarkableFileInputStreamFactory(trainFile), types); return NameFinderME.train(lang.toString().toLowerCase(), null, samples, params, new TokenNameFinderFactory()); }
factory.createContextGenerator(), factory.createSequenceCodec()); NameSampleSequenceStream ss = new NameSampleSequenceStream(samples, factory.createContextGenerator()); new NameSampleSequenceStream(samples, factory.createContextGenerator(), false); seqModel = trainer.train(ss); } else { return new TokenNameFinderModel(languageCode, seqModel, factory.getFeatureGenerator(), factory.getResources(), manifestInfoEntries, factory.getSequenceCodec(), factory); } else { return new TokenNameFinderModel(languageCode, nameFinderModel, beamSize, factory.getFeatureGenerator(), factory.getResources(), manifestInfoEntries, factory.getSequenceCodec(), factory);