public SentenceSample read() throws IOException { StringBuilder sentencesString = new StringBuilder(); List<Span> sentenceSpans = new LinkedList<>(); String sentence; for (int i = 0; i < 25 && (sentence = samples.read()) != null; i++) { int begin = sentencesString.length(); sentence = sentence.trim(); sentencesString.append(sentence); int end = sentencesString.length(); sentenceSpans.add(new Span(begin, end)); sentencesString.append(' '); } if (sentenceSpans.size() > 0) { return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()])); } return null; } }
@Override public SentenceSample read() throws IOException { StringBuilder documentText = new StringBuilder(); List<Span> sentenceSpans = new ArrayList<>(); ConlluSentence sentence; for (int i = 0; i < sentencesPerSample && (sentence = samples.read()) != null; i++) { int startIndex = documentText.length(); documentText.append(sentence.getTextComment()).append(' '); sentenceSpans.add(new Span(startIndex, documentText.length() - 1)); } if (documentText.length() > 0) { documentText.setLength(documentText.length() - 1); return new SentenceSample(documentText, sentenceSpans.toArray(new Span[sentenceSpans.size()])); } return null; } }
public SentenceSample read() throws IOException { StringBuilder sentencesString = new StringBuilder(); List<Span> sentenceSpans = new LinkedList<>(); String sentence; while ((sentence = samples.read()) != null && !sentence.equals("")) { int begin = sentencesString.length(); sentence = sentence.trim(); sentence = replaceNewLineEscapeTags(sentence); sentencesString.append(sentence); int end = sentencesString.length(); sentenceSpans.add(new Span(begin, end)); sentencesString.append(' '); } if (sentenceSpans.size() > 0) { return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()])); } return null; } }
@Override public SentenceSample read() throws IOException { StringBuilder sentencesString = new StringBuilder(); List<Span> sentenceSpans = new LinkedList<>(); while (sentenceIt.hasNext()) { IrishSentenceBankDocument.IrishSentenceBankSentence sentence = sentenceIt.next(); int begin = sentencesString.length(); if (sentence.getOriginal() != null) { sentencesString.append(sentence.getOriginal()); } sentenceSpans.add(new Span(begin, sentencesString.length())); sentencesString.append(' '); } // end of stream is reached, indicate that with null return value if (sentenceSpans.size() == 0) { return null; } return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()])); }
public SentenceSample read() throws IOException { List<String[]> sentences = new ArrayList<>(); T posSample; int chunks = 0; while ((posSample = samples.read()) != null && chunks < chunkSize) { sentences.add(toSentence(posSample)); chunks++; } if (sentences.size() > 0) { return new SentenceSample(detokenizer, sentences.toArray(new String[sentences.size()][])); } else if (posSample != null) { return read(); // filter out empty line } return null; // last sample was read } }
@Override public SentenceSample read() throws IOException { StringBuilder sentencesString = new StringBuilder(); List<Span> sentenceSpans = new LinkedList<>(); for (int i = 0; sentenceIt.hasNext() && i < 25 ; i++) { LetsmtDocument.LetsmtSentence sentence = sentenceIt.next(); int begin = sentencesString.length(); if (sentence.getTokens() != null) { sentencesString.append(String.join(" ", sentence.getTokens())); } else if (sentence.getNonTokenizedText() != null) { sentencesString.append(sentence.getNonTokenizedText()); } sentenceSpans.add(new Span(begin, sentencesString.length())); sentencesString.append(' '); } // end of stream is reached, indicate that with null return value if (sentenceSpans.size() == 0) { return null; } return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()])); }
@Override public SentenceSample read() throws IOException { SentenceSample sample = samples.read(); if (sample != null) { List<String> sentenceTexts = new ArrayList<>(); for (Span sentenceSpan : sample.getSentences()) { sentenceTexts.add(sample.getDocument().substring(sentenceSpan.getStart(), sentenceSpan.getEnd())); } StringBuilder documentText = new StringBuilder(); List<Span> newSentenceSpans = new ArrayList<>(); for (String sentenceText : sentenceTexts) { String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(sentenceText); int begin = documentText.length(); documentText.append(detokenizer.detokenize(tokens, null)); newSentenceSpans.add(new Span(begin, documentText.length())); documentText.append(' '); } return new SentenceSample(documentText, newSentenceSpans.toArray(new Span[newSentenceSpans.size()])); } return null; } }
return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()]));
return new SentenceSample(doc, sentences.toArray(new Span[sentences.size()]));
public static SentenceSample createPredSample() { return new SentenceSample("1. 2.", new Span(0, 1), new Span(4, 5)); } }
@Override protected SentenceSample processSample(SentenceSample sample) { Span[] predictions = trimSpans(sample.getDocument(), sentenceDetector.sentPosDetect(sample.getDocument())); Span[] references = trimSpans(sample.getDocument(), sample.getSentences()); fmeasure.updateScores(references, predictions); return new SentenceSample(sample.getDocument(), predictions); }
public static SentenceSample createGoldSample() { return new SentenceSample("1. 2.", new Span(0, 2), new Span(3, 5)); }
@Test(expected = IllegalArgumentException.class) public void testInvalidSpansFailFast() { SentenceSample sample = new SentenceSample("1. 2.", new Span(0, 2), new Span(5, 7)); }
@Override public SentenceSample produce(JCas aJCas) { SentenceSample sample = new SentenceSample(text, sentences.toArray(new Span[sentences.size()])); documentComplete(); sentences = null; text = null; return sample; } }
@Override public SentenceSample read() throws IOException { StringBuilder documentText = new StringBuilder(); List<Span> sentenceSpans = new ArrayList<>(); ConlluSentence sentence; for (int i = 0; i < sentencesPerSample && (sentence = samples.read()) != null; i++) { int startIndex = documentText.length(); documentText.append(sentence.getTextComment()).append(' '); sentenceSpans.add(new Span(startIndex, documentText.length() - 1)); } if (documentText.length() > 0) { documentText.setLength(documentText.length() - 1); return new SentenceSample(documentText, sentenceSpans.toArray(new Span[sentenceSpans.size()])); } return null; } }
@Test public void testRetrievingContent() { SentenceSample sample = new SentenceSample("1. 2.", new Span(0, 2), new Span(3, 5)); Assert.assertEquals("1. 2.", sample.getDocument()); Assert.assertEquals(new Span(0, 2), sample.getSentences()[0]); Assert.assertEquals(new Span(3, 5), sample.getSentences()[1]); }
/** * Process the given CAS object. */ public void processCas(CAS cas) { FSIndex<AnnotationFS> sentenceIndex = cas.getAnnotationIndex(mSentenceType); Span[] sentSpans = new Span[sentenceIndex.size()]; int i = 0; for (AnnotationFS sentenceAnnotation : sentenceIndex) { sentSpans[i++] = new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd()); } // TODO: The line cleaning should be done more carefully sentenceSamples.add(new SentenceSample(cas.getDocumentText().replace('\n', ' '), sentSpans)); }
@Test public void testEventOutcomes() throws IOException { // Sample with two sentences SentenceSample sample = new SentenceSample("Test sent. one. Test sent. 2?", new Span(0, 15), new Span(16, 29)); ObjectStream<SentenceSample> sampleStream = ObjectStreamUtils.createObjectStream(sample); Factory factory = new Factory(); ObjectStream<Event> eventStream = new SDEventStream(sampleStream, factory.createSentenceContextGenerator("eng"), factory.createEndOfSentenceScanner("eng")); Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome()); Assert.assertNull(eventStream.read()); } }
@Override protected SentenceSample processSample(SentenceSample sample) { Span[] predictions = trimSpans(sample.getDocument(), sentenceDetector.sentPosDetect(sample.getDocument())); Span[] references = trimSpans(sample.getDocument(), sample.getSentences()); fmeasure.updateScores(references, predictions); return new SentenceSample(sample.getDocument(), predictions); }
@Override protected SentenceSample processSample(SentenceSample sample) { Span[] predictions = trimSpans(sample.getDocument(), sentenceDetector.sentPosDetect(sample.getDocument())); Span[] references = trimSpans(sample.getDocument(), sample.getSentences()); fmeasure.updateScores(references, predictions); return new SentenceSample(sample.getDocument(), predictions); }