public void parse(byte[] data, byte[] identifier, JCas jcas) { try { VTDGen vg = new VTDGen(); // needed for extraction of mixed-content-XML // when there is a whitespace only between two // tags, e.g. ...</s> <s id=".">... vg.enableIgnoredWhiteSpace(true); vg.setDoc(data); vg.parse(true); VTDNav vn = vg.getNav(); buildTypes(identifier, jcas, vn); } catch (EncodingException e) { e.printStackTrace(); } catch (EOFException e) { e.printStackTrace(); } catch (EntityException e) { LOG.error(String.format("Document %s could not be parsed due to an EntityError. Document text is:\n%s", new String(identifier), new String(data)), e); } catch (CollectionException e) { e.printStackTrace(); } catch (ParseException e) { LOG.error(String.format("Document %s could not be parsed due to a general parsing error. Document text is:\n%s", new String(identifier), new String(data)), e); } }
gen.enableIgnoredWhiteSpace(true); gen.parseFile(project.getFile().getAbsolutePath(), true);
private static void updateAllConfigurations(File pomXml, Map<String, ModelNode> extensionSchemas, int indentationSize) throws Exception { VTDGen gen = new VTDGen(); gen.enableIgnoredWhiteSpace(true); gen.parseFile(pomXml.getAbsolutePath(), true);
void updateProjectParentVersion(MavenProject project, Version version) throws MojoExecutionException { try { VTDGen gen = new VTDGen(); gen.enableIgnoredWhiteSpace(true); gen.parseFile(project.getFile().getAbsolutePath(), true); VTDNav nav = gen.getNav(); AutoPilot ap = new AutoPilot(nav); ap.selectXPath("namespace-uri(.)"); String ns = ap.evalXPathToString(); nav.toElementNS(VTDNav.FIRST_CHILD, ns, "parent"); nav.toElementNS(VTDNav.FIRST_CHILD, ns, "version"); int pos = nav.getText(); XMLModifier mod = new XMLModifier(nav); mod.updateToken(pos, version.toString()); try (OutputStream out = new FileOutputStream(project.getFile())) { mod.output(out); } } catch (IOException | ModifyException | NavException | XPathParseException | TranscodeException e) { throw new MojoExecutionException("Failed to update the parent version of project " + project, e); } }
public void reset(InputStream is, JCas cas) throws DocumentParsingException { this.cas = cas; try { byte[] bytes = JulieXMLTools.readStream(is, 8192); VTDGen vg = new VTDGen(); vg.setDoc(bytes); // If we don't set this to true, some whitespaces, for example // directly after closing tags, would be omitted. We don't want // this, the NXML format is very specific in its whitespaces. vg.enableIgnoredWhiteSpace(true); vg.parse(false); vn = vg.getNav(); setTagset(); setupParserRegistry(); } catch (IOException | VTDException e) { throw new DocumentParsingException(e); } }
private void updateAllConfigurationFiles(MavenProject project, Map<String, ModelNode> extensionSchemas, int indentationSize) throws Exception { VTDGen gen = new VTDGen(); gen.enableIgnoredWhiteSpace(true); gen.parseFile(project.getFile().getAbsolutePath(), true);
@Override public void index() throws Exception { super.index(); // Parse use VTD-XML vg = new VTDGen(); vg.setDoc(inputDocument); // Whitespace in between elements is normally ignored, // but we explicitly allow whitespace in between elements to be collected here. // This allows punctuation xpath to match this whitespace, in case punctuation/whitespace in the document isn't contained in a dedicated element or attribute. // This doesn't mean that this whitespace is always used, it just enables the punctuation xpath to find this whitespace if it explicitly matches it. vg.enableIgnoredWhiteSpace(true); vg.parse(config.isNamespaceAware()); nav = vg.getNav(); // Find all documents AutoPilot documents = acquireAutoPilot(config.getDocumentPath()); while(documents.evalXPath() != -1) { indexDocument(); } releaseAutoPilot(documents); }