opennlp.tools.util.PlainTextByLineStream.<init> java code examples

public EvalitaNameSampleStream(LANGUAGE lang, InputStreamFactory in, int types) throws IOException {
 this.lang = lang;
 try {
  this.lineStream = new PlainTextByLineStream(in, StandardCharsets.UTF_8);
  System.setOut(new PrintStream(System.out, true, "UTF-8"));
 } catch (UnsupportedEncodingException e) {
  // UTF-8 is available on all JVMs, will never happen
  throw new IllegalStateException(e);
 }
 this.types = types;
}

public BioNLP2004NameSampleStream(InputStreamFactory in, int types) throws IOException {
 try {
  this.lineStream = new PlainTextByLineStream(in, StandardCharsets.UTF_8);
  System.setOut(new PrintStream(System.out, true, "UTF-8"));
 } catch (UnsupportedEncodingException e) {
  // UTF-8 is available on all JVMs, will never happen
  throw new IllegalStateException(e);
 }
 this.types = types;
}

 private static ADSentenceStream openData() throws IOException {
  InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class,
    "/opennlp/tools/formats/ad.sample");

  return new ADSentenceStream(new PlainTextByLineStream(in, "UTF-8"));
 }
}

private static ObjectStream<POSSample> createSampleStream() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class,
   "/opennlp/tools/postag/AnnotatedSentences.txt");
 return new WordTagSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

private static ObjectStream<SentenceSample> createSampleStream()
  throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   SentenceDetectorFactoryTest.class,
   "/opennlp/tools/sentdetect/Sentences.txt");
 return new SentenceSampleStream(new PlainTextByLineStream(
   in, StandardCharsets.UTF_8));
}

private static ObjectStream<POSSample> createSampleStream()
  throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   POSTaggerFactoryTest.class,
   "/opennlp/tools/postag/AnnotatedSentences.txt");
 return new WordTagSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

private static ObjectStream<Parse> createParseSampleStream() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   ParseSampleStreamTest.class, "/opennlp/tools/parser/test.parse");
 return new ParseSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

private static ObjectStream<ChunkSample> createSampleStream()
  throws IOException {
 ResourceAsStreamFactory in = new ResourceAsStreamFactory(
   ChunkerFactoryTest.class, "/opennlp/tools/chunker/test.txt");
 return new ChunkSampleStream(
   new PlainTextByLineStream(in, StandardCharsets.UTF_8));
}

private static ObjectStream<DocumentSample> createSampleStream()
  throws IOException {
 InputStreamFactory isf = new ResourceAsStreamFactory(
   DoccatFactoryTest.class, "/opennlp/tools/doccat/DoccatSample.txt");
 return new DocumentSampleStream(new PlainTextByLineStream(isf, "UTF-8"));
}

public FootballEventStream() throws IOException {
 textStream = new PlainTextByLineStream(
   new URLInputStreamFactory(this.getClass().getResource("/opennlp/tools/ml/maxent/football.dat")),
   StandardCharsets.US_ASCII );    
}

@Before
public void setup() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(
   ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample");
 try (ADChunkSampleStream stream = new ADChunkSampleStream(new PlainTextByLineStream(in, "UTF-8"))) {
  ChunkSample sample;
  while ((sample = stream.read()) != null) {
   samples.add(sample);
  }
 }
}

private static ObjectStream<String> getLineSample(String corpus)
  throws IOException {
 return new PlainTextByLineStream(new MarkableFileInputStreamFactory(
   new File(getOpennlpDataDir(), corpus)), StandardCharsets.ISO_8859_1);
}

@Before
public void setup() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(ADParagraphStreamTest.class,
   "/opennlp/tools/formats/ad.sample");
 try (ADNameSampleStream stream =
     new ADNameSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8), true)) {
  NameSample sample;
  while ((sample = stream.read()) != null) {
   samples.add(sample);
  }
 }
}

 public static LanguageDetectorSampleStream createSampleStream() throws IOException {

  ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory(
    LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt");

  PlainTextByLineStream lineStream = new PlainTextByLineStream(streamFactory, "UTF-8");

  return new LanguageDetectorSampleStream(lineStream);
 }
}

private static ChunkerModel train(File trainFile, TrainingParameters params)
  throws IOException {
 ObjectStream<ChunkSample> samples = new ChunkSampleStream(
   new PlainTextByLineStream(
     new MarkableFileInputStreamFactory(trainFile), StandardCharsets.UTF_8));
 return ChunkerME.train("eng", samples, params, new ChunkerFactory());
}

@Test(expected = InsufficientTrainingDataException.class)
public void testInsufficientData() throws IOException {
 ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream(
   new PlainTextByLineStream(new MockInputStreamFactory(
     new File("opennlp/tools/lemmatizer/trial.old-insufficient.tsv")),
       "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
 params.put(TrainingParameters.CUTOFF_PARAM, 5);
 LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory());
}

@Test(expected = InsufficientTrainingDataException.class)
public void testInsufficientData() throws IOException {
 ResourceAsStreamFactory in = new ResourceAsStreamFactory(getClass(),
   "/opennlp/tools/chunker/test-insufficient.txt");
 ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream(
   new PlainTextByLineStream(in, StandardCharsets.UTF_8));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 ChunkerME.train("eng", sampleStream, params, new ChunkerFactory());
}

@Test(expected = InsufficientTrainingDataException.class)
public void testInsufficientData() throws IOException {
 InputStreamFactory trainDataIn = new ResourceAsStreamFactory(
   TokenizerModel.class, "/opennlp/tools/tokenize/token-insufficient.train");
 ObjectStream<TokenSample> samples = new TokenSampleStream(
   new PlainTextByLineStream(trainDataIn, StandardCharsets.UTF_8));
 TrainingParameters mlParams = new TrainingParameters();
 mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100);
 mlParams.put(TrainingParameters.CUTOFF_PARAM, 5);
 TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams);
}

@Test(expected = InsufficientTrainingDataException.class)
public void insufficientTestData() throws IOException {
 InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class,
   "/opennlp/tools/postag/AnnotatedSentencesInsufficient.txt");
 ObjectStream<POSSample> stream = new WordTagSampleStream(
   new PlainTextByLineStream(in, StandardCharsets.UTF_8));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name());
 params.put(TrainingParameters.ITERATIONS_PARAM, 100);
 params.put(TrainingParameters.CUTOFF_PARAM, 5);
 POSTaggerME.train("eng", stream, params, new POSTaggerFactory());
}

@Test
public void testLineSegmentation() throws IOException {
 ObjectStream<String> stream =
     new PlainTextByLineStream(new MockInputStreamFactory(testString), StandardCharsets.UTF_8);
 Assert.assertEquals("line1", stream.read());
 Assert.assertEquals("line2", stream.read());
 Assert.assertEquals("line3", stream.read());
 Assert.assertEquals("line4", stream.read());
 Assert.assertNull(stream.read());
 stream.close();
}

Popular methods of PlainTextByLineStream

reset

Popular in Java

Running tasks concurrently on multiple threads
compareTo (BigDecimal)
scheduleAtFixedRate (Timer)
startActivity (Activity)
URLConnection (java.net)
A connection to a URL for reading or writing. For HTTP connections, see HttpURLConnection for docume
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Kernel (java.awt.image)
Github Copilot alternatives

How to use opennlp.tools.util.PlainTextByLineStreamconstructor

Best Java code snippets using opennlp.tools.util.PlainTextByLineStream.<init> (Showing top 20 results out of 315)

How to use
opennlp.tools.util.PlainTextByLineStream
constructor