private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(EvalitaNameSampleStreamTest.class, "/opennlp/tools/formats/" + name); return new EvalitaNameSampleStream(lang, in, EvalitaNameSampleStream.GENERATE_PERSON_ENTITIES); }
@Before public void setup() throws IOException { ResourceAsStreamFactory stream = new ResourceAsStreamFactory( getClass(), "/opennlp/tools/formats/brown-cluster.txt"); BrownCluster brownCluster = new BrownCluster(stream.createInputStream()); generator = new BrownBigramFeatureGenerator(brownCluster); }
private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(Conll03NameSampleStreamTest.class, "/opennlp/tools/formats/" + name); return new Conll03NameSampleStream(lang, in, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); }
@Test public void testEvaluator() throws IOException { ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory( getClass(), "/opennlp/tools/chunker/output.txt"); ResourceAsStreamFactory inExpected = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/chunker/output.txt"); ResourceAsStreamFactory detailedOutputStream = new ResourceAsStreamFactory( getClass(), "/opennlp/tools/chunker/detailedOutput.txt"); new InputStreamReader(detailedOutputStream.createInputStream(), StandardCharsets.UTF_8)); String line = reader.readLine();
private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(Conll02NameSampleStreamTest.class, "/opennlp/tools/formats/" + name); return new Conll02NameSampleStream(lang, in, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); }
private static ObjectStream<StringList> openData(String name) throws IOException { InputStreamFactory in = new ResourceAsStreamFactory( NameFinderCensus90NameStreamTest.class, "/opennlp/tools/formats/" + name); return new NameFinderCensus90NameStream(in, StandardCharsets.UTF_8); }
public static LanguageDetectorSampleStream createSampleStream() throws IOException { ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory( LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt"); PlainTextByLineStream lineStream = new PlainTextByLineStream(streamFactory, "UTF-8"); return new LanguageDetectorSampleStream(lineStream); } }
private static ObjectStream<SentenceSample> createSampleStream() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory( SentenceDetectorFactoryTest.class, "/opennlp/tools/sentdetect/Sentences.txt"); return new SentenceSampleStream(new PlainTextByLineStream( in, StandardCharsets.UTF_8)); }
private static ObjectStream<POSSample> createSampleStream() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory( POSTaggerFactoryTest.class, "/opennlp/tools/postag/AnnotatedSentences.txt"); return new WordTagSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)); }
private static ObjectStream<Parse> createParseSampleStream() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory( ParseSampleStreamTest.class, "/opennlp/tools/parser/test.parse"); return new ParseSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)); }
private static ObjectStream<ChunkSample> createSampleStream() throws IOException { ResourceAsStreamFactory in = new ResourceAsStreamFactory( ChunkerFactoryTest.class, "/opennlp/tools/chunker/test.txt"); return new ChunkSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)); }
private static ADSentenceStream openData() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"); return new ADSentenceStream(new PlainTextByLineStream(in, "UTF-8")); } }
private static ObjectStream<POSSample> createSampleStream() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class, "/opennlp/tools/postag/AnnotatedSentences.txt"); return new WordTagSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)); }
private static ObjectStream<TokenSample> createSampleStream() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory( TokenizerFactoryTest.class, "/opennlp/tools/tokenize/token.train"); return new TokenSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)); }
private static ObjectStream<DocumentSample> createSampleStream() throws IOException { InputStreamFactory isf = new ResourceAsStreamFactory( DoccatFactoryTest.class, "/opennlp/tools/doccat/DoccatSample.txt"); return new DocumentSampleStream(new PlainTextByLineStream(isf, "UTF-8")); }
public void reset() throws IOException { try { if (samples != null) { samples.close(); } InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/parser/parser.train"); samples = new ParseSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)); } catch (UnsupportedEncodingException e) { // Should never happen Assert.fail(e.getMessage()); } } };
@Before public void setup() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(ADSentenceSampleStreamTest.class, "/opennlp/tools/formats/ad.sample"); try (ADSentenceSampleStream stream = new ADSentenceSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) { SentenceSample sample; while ((sample = stream.read()) != null) { System.out.println(sample.getDocument()); System.out.println("<fim>"); samples.add(sample); } } }
/** * Creates a NameSample stream using an annotated corpus * * @return * @throws IOException * @throws URISyntaxException */ private static ObjectStream<NameSample> createSample() throws IOException, URISyntaxException { InputStreamFactory in = new ResourceAsStreamFactory( DictionaryNameFinderEvaluatorTest.class, "/opennlp/tools/namefind/AnnotatedSentences.txt"); return new NameSampleDataStream(new PlainTextByLineStream(in, StandardCharsets.ISO_8859_1)); }
@Before public void setup() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"); try (ADNameSampleStream stream = new ADNameSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) { NameSample sample; while ((sample = stream.read()) != null) { samples.add(sample); } } }
@Before public void setup() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory( ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"); try (ADChunkSampleStream stream = new ADChunkSampleStream(new PlainTextByLineStream(in, "UTF-8"))) { ChunkSample sample; while ((sample = stream.read()) != null) { samples.add(sample); } } }