public CasAssert containsText(String string) { if(!actual.getDocumentText().contains(string)) failWithMessage("Expect document text to contain <%s>. Acutal text is: <%s>", string, actual.getDocumentText()); return this; }
private static String getRightContext(JCas jcas, TextClassificationTarget unit) { int rightOffset = unit.getEnd() + CONTEXT_WIDTH; if (rightOffset > jcas.getDocumentText().length()) { rightOffset = jcas.getDocumentText().length(); } String context = jcas.getDocumentText().substring(unit.getEnd(), rightOffset); context = context.replaceAll("\n", " "); return context; } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) { IOUtils.write(aJCas.getDocumentText(), docOS, targetEncoding); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
/** * Get a {@link TextClassificationTarget} covering the full JCAS. * * @param aJCas * the JCas. * @return the {@link TextClassificationTarget} covering the full JCAS. */ public static TextClassificationTarget get(final JCas aJCas) { return new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length()); } }
/** * Mark a document as corrupt. * * @param jCas the jCas */ private void setCorrupt(JCas jCas) { if (Strings.isNullOrEmpty(jCas.getDocumentText())) { jCas.setDocumentText(CORRUPT_FILE_TEXT); } }
private void printSectionStart(String desc, JCas cas, int begin) { String str = cas.getDocumentText(); if (begin < 0) { System.out.println("AAAA - " + desc + " - -1 - " + BlueCasUtil.getHeaderDocId(cas)); } else if (begin < str.length()) { int end = begin + 50; if (end > str.length()) { end = str.length() - 1; } System.out.println("AAAA - "+desc+" ("+ BlueCasUtil.getHeaderDocId(cas)+":"+begin+"): "+str.substring(begin, end)); } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { String id = new File(ViewUriUtil.getURI(jCas)).getName(); File outFile = new File(this.outputDirectory, id + ".txt"); try { FileUtils.saveString2File(jCas.getDocumentText(), outFile); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }
private void addToken(JCas aJCas, int begin, int end) { if (aJCas.getDocumentText().substring(begin, end).trim().length() > 0) { new Token(aJCas, begin, end).addToIndexes(); } } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { logger.info("beginning of ConceptConverterAnalysisEngine.process()"); String contents = jcas.getDocumentText(); processForEntityType(jcas, EntityMention.type, EntityMention.class); processForEntityType(jcas, EventMention.type, EventMention.class); logger.info("end of ConceptConverterAnalysisEngine.process()"); }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { DocumentMetaData dmd = JCasUtil.selectSingle(aJCas, DocumentMetaData.class); try { bw.write(dmd.getDocumentId() + "\t" + aJCas.getDocumentText() + "\n"); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }
/** * Checks if is whole document (ie the JCas vs a Text annotation). * * <p>Note that if a text annotation covers the entire document this will still be true. * * @return true, if is whole document */ public boolean isWholeDocument() { return text == null || (text.getBegin() == 0 && text.getEnd() == jCas.getDocumentText().length()); }
@Override public void process( JCas jCas ) throws AnalysisEngineProcessException { try { JCas goldView = jCas.getView( GOLD_VIEW_NAME ); goldView.setDocumentText( jCas.getDocumentText() ); } catch ( CASException e ) { throw new AnalysisEngineProcessException( e ); } } }
@Override protected FrequencyDistribution<String> getNgramsFD(JCas jcas) throws TextClassificationException { FrequencyDistribution<String> fd = null; TextClassificationTarget target = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length()); fd = NGramUtils.getDocumentNgrams(jcas, target, ngramLowerCase, filterPartialStopwordMatches, 1, 1, stopwords); return fd; }
@Override public void init(JCas aJCas) { text = aJCas.getDocumentText(); tokens = new ArrayList<>(); for (Token token : select(aJCas, Token.class)) { Span s = new Span(token.getBegin(), token.getEnd()); tokens.add(s); } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { if (segmentAnnotationClass != null) { for (final Annotation segAnno : JCasUtil.select(jcas, segmentAnnotationClass)) { doAnnotations(jcas, JCasUtil.selectCovered(jcas, baseAnnotation, segAnno), segAnno.getEnd()); } } else { doAnnotations(jcas, JCasUtil.select(jcas, baseAnnotation), jcas.getDocumentText().length()); } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { // Create the NER instance and find all named entities in the text final AbstractDatabase db = DatabaseSingleton.getInstance(); final NamedEntityRecognition ner = new NamedEntityRecognition(db); for (NamedEntity entity : ner.findEntities(jcas.getDocumentText())) { createOntologyReferenceAnnotation(jcas, entity); } long t2 = System.nanoTime(); } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { int end = jCas.getDocumentText().length(); getContext().getLogger().log( Level.CONFIG, "Entering " + getClass().getSimpleName() + ". Adding segment of length " + end + "."); Segment segment = new Segment(jCas); segment.setBegin(0); segment.setEnd(end); segment.addToIndexes(); }
@Override public void process(JCas aInput, JCas aOutput) throws AnalysisEngineProcessException { Tokenizer<CoreLabel> tokenizer = new PTBTokenizer<CoreLabel>(new StringReader( aInput.getDocumentText()), new CoreLabelTokenFactory(), "invertible"); for (CoreLabel label : tokenizer.tokenize()) { replace(label.beginPosition(), label.endPosition(), label.word()); } } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { // Create the Java parser and parse the source code into an abstract syntax tree ASTParser parser = ASTParser.newParser(AST.JLS3); parser.setSource(aJCas.getDocumentText().toCharArray()); CompilationUnit result = (CompilationUnit) parser.createAST(null); // Generate JavaDoc related annotations result.accept(new JavaDocVisitor(aJCas)); // Generate Java code related annotations result.accept(new CodeVisitor(aJCas)); }
public static double getRelativeDist(Annotation a, Annotation b, JCas jcas) { final double dist; if (a.getEnd() < b.getBegin()) { dist = b.getBegin() - a.getEnd(); } else if (b.getEnd() < a.getBegin()) { dist = a.getBegin() - b.getEnd(); } else { dist = 0; } return dist / (double) jcas.getDocumentText().length(); }