private static ObjectStream<SentenceSample> createSampleStream() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory( SentenceDetectorFactoryTest.class, "/opennlp/tools/sentdetect/Sentences.txt"); return new SentenceSampleStream(new PlainTextByLineStream( in, StandardCharsets.UTF_8)); }
public SentenceSample read() throws IOException { StringBuilder sentencesString = new StringBuilder(); List<Span> sentenceSpans = new LinkedList<>(); String sentence; while ((sentence = samples.read()) != null && !sentence.equals("")) { int begin = sentencesString.length(); sentence = sentence.trim(); sentence = replaceNewLineEscapeTags(sentence); sentencesString.append(sentence); int end = sentencesString.length(); sentenceSpans.add(new Span(begin, end)); sentencesString.append(' '); } if (sentenceSpans.size() > 0) { return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()])); } return null; } }
public ObjectStream<SentenceSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new SentenceSampleStream(lineStream); } }
String eosString = SentenceSampleStream.replaceNewLineEscapeTags(params.getEosChars()); eos = eosString.toCharArray();
@Test(expected = InsufficientTrainingDataException.class) public void testInsufficientData() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/sentdetect/SentencesInsufficient.txt"); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null); SentenceDetectorME.train("eng", new SentenceSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams); }
String eosString = SentenceSampleStream.replaceNewLineEscapeTags( params.getEosChars()); eos = eosString.toCharArray();
private static SentenceModel train(InputStream corpus, int iterations, int cut) throws IOException { ObjectStream<String> lineStream = new PlainTextByLineStream(corpus, Charset.forName("UTF-8")); ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream); SentenceModel model; try { model = SentenceDetectorME.train("en", sampleStream, true, null, TrainingParameters.defaultParams()); } finally { sampleStream.close(); } return model; }
public SentenceSample read() throws IOException { StringBuilder sentencesString = new StringBuilder(); List<Span> sentenceSpans = new LinkedList<>(); String sentence; while ((sentence = samples.read()) != null && !sentence.equals("")) { int begin = sentencesString.length(); sentence = sentence.trim(); sentence = replaceNewLineEscapeTags(sentence); sentencesString.append(sentence); int end = sentencesString.length(); sentenceSpans.add(new Span(begin, end)); sentencesString.append(' '); } if (sentenceSpans.size() > 0) { return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()])); } return null; } }
try (ObjectStream<String> lineStream = new PlainTextByLineStream(mfisf, charset)) { ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream);
public SentenceSample read() throws IOException { StringBuilder sentencesString = new StringBuilder(); List<Span> sentenceSpans = new LinkedList<>(); String sentence; while ((sentence = samples.read()) != null && !sentence.equals("")) { int begin = sentencesString.length(); sentence = sentence.trim(); sentence = replaceNewLineEscapeTags(sentence); sentencesString.append(sentence); int end = sentencesString.length(); sentenceSpans.add(new Span(begin, end)); sentencesString.append(' '); } if (sentenceSpans.size() > 0) { return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()])); } return null; } }
ObjectStream<String> lineStream = null; lineStream = new PlainTextByLineStream(mfisf, encoding); ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream);
String eosString = SentenceSampleStream.replaceNewLineEscapeTags(params.getEosChars()); eos = eosString.toCharArray();
ObjectStream<String> lineStream = null; lineStream = new PlainTextByLineStream(mfisf, encoding); ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream);
String eosString = SentenceSampleStream.replaceNewLineEscapeTags(params.getEosChars()); eos = eosString.toCharArray();
try (ObjectStream<String> lineStream = new PlainTextByLineStream(mfisf, charset)) { ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream);
String eosString = SentenceSampleStream.replaceNewLineEscapeTags( params.getEosChars()); eos = eosString.toCharArray();
SentenceSampleStream samples = new SentenceSampleStream( new ObjectStream<String>() { boolean done = false;
String eosString = SentenceSampleStream.replaceNewLineEscapeTags( params.getEosChars()); eos = eosString.toCharArray();
public ObjectStream<SentenceSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new SentenceSampleStream(lineStream); } }
public ObjectStream<SentenceSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new SentenceSampleStream(lineStream); } }