public T read() throws IOException { T object = null; while (streamIndex < streams.length && object == null) { object = streams[streamIndex].read(); if (object == null) streamIndex++; } return object; }
@Override public void close() throws IOException { for (ObjectStream<T> stream : streams) { stream.close(); } }
public void reset() throws IOException, UnsupportedOperationException { input.reset(); } }
@Test public void testReset() throws IOException { ObjectStream<String> stream = new PlainTextByLineStream(new MockInputStreamFactory(testString), StandardCharsets.UTF_8); Assert.assertEquals("line1", stream.read()); Assert.assertEquals("line2", stream.read()); Assert.assertEquals("line3", stream.read()); stream.reset(); Assert.assertEquals("line1", stream.read()); Assert.assertEquals("line2", stream.read()); Assert.assertEquals("line3", stream.read()); Assert.assertEquals("line4", stream.read()); Assert.assertNull(stream.read()); stream.close(); } }
public static void verifyTrainingData(ObjectStream<?> samples, BigInteger checksum) throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); Object sample; while ((sample = samples.read()) != null) { digest.update(sample.toString().getBytes(StandardCharsets.UTF_8)); } samples.close(); Assert.assertEquals(checksum, new BigInteger(1, digest.digest())); }
@Override protected ParserModel trainAndUpdate(ParserModel originalModel, ObjectStream<Parse> parseSamples, ModelUpdaterParams parameters) throws IOException { Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), 5); parseSamples.reset(); // TODO: training individual models should be in the chunking parser, not here // Training build System.out.println("Training builder"); ObjectStream<Event> bes = new ParserEventStream(parseSamples, originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict); EventTrainer trainer = TrainerFactory.getEventTrainer( ModelUtil.createDefaultTrainingParameters(), null); MaxentModel buildModel = trainer.train(bes); parseSamples.close(); return originalModel.updateBuildModel(buildModel); } }
@Test public void testLineSegmentation() throws IOException { ObjectStream<String> stream = new PlainTextByLineStream(new MockInputStreamFactory(testString), StandardCharsets.UTF_8); Assert.assertEquals("line1", stream.read()); Assert.assertEquals("line2", stream.read()); Assert.assertEquals("line3", stream.read()); Assert.assertEquals("line4", stream.read()); Assert.assertNull(stream.read()); stream.close(); }
@Override protected ParserModel trainAndUpdate(ParserModel originalModel, ObjectStream<Parse> parseSamples, ModelUpdaterParams parameters) throws IOException { Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), 5); parseSamples.reset(); // TODO: Maybe that should be part of the ChunkingParser ... // Training build System.out.println("Training check model"); ObjectStream<Event> bes = new ParserEventStream(parseSamples, originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict); EventTrainer trainer = TrainerFactory.getEventTrainer( ModelUtil.createDefaultTrainingParameters(), null); MaxentModel checkModel = trainer.train(bes); parseSamples.close(); return originalModel.updateCheckModel(checkModel); } }
private void skipSamples() throws IOException { int i = 0; while (i < samplesToSkip && (samples.read()) != null) { i++; } } }
@Override public void close() throws IOException { samples.close(); } }
public void reset() throws IOException, UnsupportedOperationException { streamIndex = 0; for (ObjectStream<T> stream : streams) { stream.reset(); } }
@Override public T read() throws IOException { return stream.read(); }
@Override public void close() throws IOException { psi.close(); } }
@Override public void reset() throws IOException, UnsupportedOperationException { sentenceStream.reset(); } }
@Override public T read() throws IOException { return samples.read(); }
@Override public void close() throws IOException { samples.close(); } }
@Override public void reset() throws IOException, UnsupportedOperationException { samples.reset(); }