public void parse(byte[] data, byte[] identifier, JCas jcas) { try { VTDGen vg = new VTDGen(); // needed for extraction of mixed-content-XML // when there is a whitespace only between two // tags, e.g. ...</s> <s id=".">... vg.enableIgnoredWhiteSpace(true); vg.setDoc(data); vg.parse(true); VTDNav vn = vg.getNav(); buildTypes(identifier, jcas, vn); } catch (EncodingException e) { e.printStackTrace(); } catch (EOFException e) { e.printStackTrace(); } catch (EntityException e) { LOG.error(String.format("Document %s could not be parsed due to an EntityError. Document text is:\n%s", new String(identifier), new String(data)), e); } catch (CollectionException e) { e.printStackTrace(); } catch (ParseException e) { LOG.error(String.format("Document %s could not be parsed due to a general parsing error. Document text is:\n%s", new String(identifier), new String(data)), e); } }
public static VTDNav getVTDNav(InputStream is, int bufferSize) throws ParseException, FileTooBigException { VTDGen vg = null; try { byte[] data = readStream(is, bufferSize); vg = new VTDGen(); vg.setDoc(data); vg.parse(true); } catch (EncodingException e) { e.printStackTrace(); } catch (EOFException e) { e.printStackTrace(); } catch (EntityException e) { e.printStackTrace(); } catch (FileTooBigException e) { throw e; } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { String message = e.getMessage(); if (message.contains("file size too big")) throw new FileTooBigException(message); } return vg.getNav(); }