public T read() throws IOException { T object = null; while (streamIndex < streams.length && object == null) { object = streams[streamIndex].read(); if (object == null) streamIndex++; } return object; }
private void skipSamples() throws IOException { int i = 0; while (i < samplesToSkip && (samples.read()) != null) { i++; } } }
@Override public T read() throws IOException { T object = null; while (currentStream != null && object == null) { object = currentStream.read(); if (object == null) { currentStream = (iterator.hasNext()) ? iterator.next() : null; } } return object; }
public Event read() throws IOException { Event event = samples.read(); if (event != null) { writer.write(event.toString()); writer.write("\n"); } return event; }
@Override public String read() throws IOException { File sampleFile = samples.read(); if (sampleFile != null) { return readFile(sampleFile, encoding); } else { return null; } }
SampleShuffleStream(ObjectStream<T> samples) throws IOException { T sample; while ((sample = samples.read()) != null) { bufferedSamples.add(sample); } Collections.shuffle(bufferedSamples, new Random(23)); reset(); }
public TokenSample read() throws IOException { String sampleString = samples.read(); if (sampleString != null) { return TokenSample.parse(sampleString, separatorChars); } else { return null; } } }
public TokenSample read() throws IOException { NameSample nameSample = samples.read(); TokenSample tokenSample = null; if (nameSample != null ) { tokenSample = new TokenSample(detokenizer, nameSample.getSentence()); } return tokenSample; }
public TokenSample read() throws IOException { String inputString = input.read(); if (inputString != null) { Span[] tokens = tokenizer.tokenizePos(inputString); return new TokenSample(inputString, tokens); } return null; }
public TokenSample read() throws IOException { POSSample posSample = samples.read(); TokenSample tokenSample = null; if (posSample != null ) { tokenSample = new TokenSample(detokenizer, posSample.getSentence()); } return tokenSample; } }
@Override public Event read() throws IOException { String line = textStream.read(); if (line == null) return null; String[] tokens = line.split("\\s+"); return new Event(tokens[tokens.length - 1], Arrays.copyOf(tokens, tokens.length - 1)); }
@Test public void testLineSegmentation() throws IOException { ObjectStream<String> stream = new PlainTextByLineStream(new MockInputStreamFactory(testString), StandardCharsets.UTF_8); Assert.assertEquals("line1", stream.read()); Assert.assertEquals("line2", stream.read()); Assert.assertEquals("line3", stream.read()); Assert.assertEquals("line4", stream.read()); Assert.assertNull(stream.read()); stream.close(); }
@Test(expected = IOException.class) public void testParsingGermanSampleWithEnglishAsLanguage() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.EN, GERMAN_SAMPLE); sampleStream.read(); }
@Test(expected = IOException.class) public void testParsingEnglishSampleWithGermanAsLanguage() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, ENGLISH_SAMPLE); sampleStream.read(); }
public static Dictionary buildNGramDictionary(ObjectStream<POSSample> samples, int cutoff) throws IOException { NGramModel ngramModel = new NGramModel(); POSSample sample; while ((sample = samples.read()) != null) { String[] words = sample.getSentence(); if (words.length > 0) ngramModel.add(new StringList(words), 1, 1); } ngramModel.cutoff(cutoff, Integer.MAX_VALUE); return ngramModel.toDictionary(true); }
@Test public void testParsingDutchSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample"); NameSample personName = sampleStream.read(); Assert.assertEquals(0, personName.getNames().length); Assert.assertTrue(personName.isClearAdaptiveDataSet()); personName = sampleStream.read(); Assert.assertFalse(personName.isClearAdaptiveDataSet()); Assert.assertNull(sampleStream.read()); }