/** * Read the contents of a file using the default charset and return as a single string */ public static String slurp(String fileName) throws FileNotFoundException { return slurp(fileName, Charset.defaultCharset().name()); }
/** * Read the contents of a file using the default charset and return as a single string */ public static String slurp(String fileName) throws FileNotFoundException { return slurp(fileName, Charset.defaultCharset().name()); }
public TextAnnotation RunPipelineOnFile(String fileName) throws FileNotFoundException, AnnotatorException { String text = LineIO.slurp(fileName); boolean forceUpdate = true; // in actual use, this will usually be 'false' return pipeline.createAnnotatedTextAnnotation("", "", text); }
public TextAnnotation RunPipelineOnFile(String fileName) throws FileNotFoundException, AnnotatorException { String text = LineIO.slurp(fileName); boolean forceUpdate = true; // in actual use, this will usually be 'false' return pipeline.createAnnotatedTextAnnotation("", "", text); }
/** * Read serialized record from file and deserialize it. Expects Thrift serialization format, one * record in a single file. * * @param fileName Name of file to read from * @return A text annotation */ public static TextAnnotation deserializeTextAnnotationFromFile(String fileName, boolean isJson) throws Exception { File file = new File(fileName); if (!file.exists()) throw new IOException("ERROR: " + NAME + ".deserializeTextAnnotationFromFile(): file '" + fileName + "' does not exist."); if (!isJson) return deserializeTextAnnotationFromBytes(FileUtils.readFileToByteArray(file)); else return SerializationHelper.deserializeFromJson(LineIO.slurp(fileName)); }
/** * Read serialized record from file and deserialize it. Expects Thrift serialization format, one * record in a single file. * * @param fileName Name of file to read from * @return A text annotation */ public static TextAnnotation deserializeTextAnnotationFromFile(String fileName, boolean isJson) throws Exception { File file = new File(fileName); if (!file.exists()) throw new IOException("ERROR: " + NAME + ".deserializeTextAnnotationFromFile(): file '" + fileName + "' does not exist."); if (!isJson) return deserializeTextAnnotationFromBytes(FileUtils.readFileToByteArray(file)); else return SerializationHelper.deserializeFromJson(LineIO.slurp(fileName)); }
/** * given an entry from the corpus file list generated by {@link #getFileListing()} , parse its * contents and get zero or more TextAnnotation objects. * * @param corpusFileListEntry corpus file containing content to be processed * @return List of TextAnnotation objects extracted from the corpus file */ @Override public List<TextAnnotation> getAnnotationsFromFile(List<Path> corpusFileListEntry) throws Exception { Path sourceTextAndAnnotationFile = corpusFileListEntry.get(0); fileId = sourceTextAndAnnotationFile.getName(sourceTextAndAnnotationFile.getNameCount() - 1) .toString(); logger.debug("read source file {}", fileId); numFiles++; String fileText = LineIO.slurp(sourceTextAndAnnotationFile.toString()); newFileText = this.stripText(fileText); List<TextAnnotation> taList = new ArrayList<>(1); TextAnnotation ta = taBuilder.createTextAnnotation(corpusName, fileId, newFileText.toString()); if (null != ta) { taList.add(ta); numTextAnnotations++; } return taList; }
/** * given an entry from the corpus file list generated by {@link #getFileListing()} , parse its * contents and get zero or more TextAnnotation objects. This allows for the case where corpus * annotations are provided in standoff format in one or more files separate from the source * document. In such cases, the first file in the list should contain the source document * and the rest should be the corresponding markup files. * * In this default implementation, it is assumed that a single file contains both source and markup. * * @param corpusFileListEntry a list of files, the first of which is a source file. * @return List of TextAnnotation objects extracted from the corpus file. */ @Override public List<XmlTextAnnotation> getAnnotationsFromFile(List<Path> corpusFileListEntry) throws Exception { Path sourceTextAndAnnotationFile = corpusFileListEntry.get(0); fileId = sourceTextAndAnnotationFile.getName(sourceTextAndAnnotationFile.getNameCount() - 1) .toString(); logger.debug("read source file {}", fileId); numFiles++; String fileText = LineIO.slurp(sourceTextAndAnnotationFile.toString()); List<XmlTextAnnotation> xmlTaList = new ArrayList<>(1); XmlTextAnnotation xmlTa = xmlTextAnnotationMaker.createTextAnnotation(fileText, this.corpusName, fileId); if (null != xmlTa) { xmlTaList.add(xmlTa); numTextAnnotations++; } return xmlTaList; }
/** * return the next annotation object. Don't forget to increment currentAnnotationId. * * @return an annotation object. */ @Override public XmlTextAnnotation next() { String data; try { data = LineIO.slurp(currentfile); } catch (FileNotFoundException e1) { this.badFiles.add(this.currentfile); return null; } catch (Throwable e1) { e1.printStackTrace(); return null; } try { XmlTextAnnotation ta = nextAnnotation(data, currentfile); return ta; } catch (AnnotatorException e) { e.printStackTrace(); throw new IllegalStateException(e); } }
/** * given an entry from the corpus file list generated by {@link #getFileListing()} , parse its * contents and get zero or more TextAnnotation objects. This allows for the case where corpus * annotations are provided in standoff format in one or more files separate from the source * document. In such cases, the first file in the list should contain the source document * and the rest should be the corresponding markup files. * * In this default implementation, it is assumed that a single file contains both source and markup. * * @param corpusFileListEntry a list of files, the first of which is a source file. * @return List of TextAnnotation objects extracted from the corpus file. */ @Override public List<XmlTextAnnotation> getAnnotationsFromFile(List<Path> corpusFileListEntry) throws Exception { Path sourceTextAndAnnotationFile = corpusFileListEntry.get(0); fileId = sourceTextAndAnnotationFile.getName(sourceTextAndAnnotationFile.getNameCount() - 1) .toString(); logger.debug("read source file {}", fileId); numFiles++; String fileText = LineIO.slurp(sourceTextAndAnnotationFile.toString()); List<XmlTextAnnotation> xmlTaList = new ArrayList<>(1); XmlTextAnnotation xmlTa = xmlTextAnnotationMaker.createTextAnnotation(fileText, this.corpusName, fileId); if (null != xmlTa) { xmlTaList.add(xmlTa); numTextAnnotations++; } return xmlTaList; }
/** * return the next annotation object. Don't forget to increment currentAnnotationId. * * @return an annotation object. */ @Override public XmlTextAnnotation next() { String data; try { data = LineIO.slurp(currentfile); } catch (FileNotFoundException e1) { this.badFiles.add(this.currentfile); return null; } catch (Throwable e1) { e1.printStackTrace(); return null; } try { XmlTextAnnotation ta = nextAnnotation(data, currentfile); return ta; } catch (AnnotatorException e) { e.printStackTrace(); throw new IllegalStateException(e); } }
/** * given an entry from the corpus file list generated by {@link #getFileListing()} , parse its * contents and get zero or more TextAnnotation objects. * * @param corpusFileListEntry corpus file containing content to be processed * @return List of TextAnnotation objects extracted from the corpus file */ @Override public List<TextAnnotation> getAnnotationsFromFile(List<Path> corpusFileListEntry) throws Exception { Path sourceTextAndAnnotationFile = corpusFileListEntry.get(0); fileId = sourceTextAndAnnotationFile.getName(sourceTextAndAnnotationFile.getNameCount() - 1) .toString(); logger.debug("read source file {}", fileId); numFiles++; String fileText = LineIO.slurp(sourceTextAndAnnotationFile.toString()); newFileText = this.stripText(fileText); List<TextAnnotation> taList = new ArrayList<>(1); TextAnnotation ta = taBuilder.createTextAnnotation(corpusName, fileId, newFileText.toString()); if (null != ta) { taList.add(ta); numTextAnnotations++; } return taList; }
String file = LineIO.slurp(parallelpath + "/en-" + parid); System.out.println("Trying to read: " + parallelpath + "/en-" + parid);
TextAnnotation ta; try { ta = SerializationHelper.deserializeFromJson(LineIO.slurp(file)); } catch (Exception e) { logger.error("Error while reading file {}\n{}", file, e.getMessage());
private List<TextAnnotation> buildTextAnnotation(Path textPath, Path sentPath, Path tokPath, Path pennPath) throws FileNotFoundException, XMLStreamException { String text = LineIO.slurp(textPath.toFile().getAbsolutePath()); List<Pair<String, IntPair>> tokenInfo = tokenParser.parseFile(tokPath.toFile().getAbsolutePath()); Pair<List<SentenceStaxParser.MascSentence>, List<SentenceStaxParser.MascSentenceGroup>> sentenceInfo =
private List<TextAnnotation> buildTextAnnotation(Path textPath, Path sentPath, Path tokPath, Path pennPath) throws FileNotFoundException, XMLStreamException { String text = LineIO.slurp(textPath.toFile().getAbsolutePath()); List<Pair<String, IntPair>> tokenInfo = tokenParser.parseFile(tokPath.toFile().getAbsolutePath()); Pair<List<SentenceStaxParser.MascSentence>, List<SentenceStaxParser.MascSentenceGroup>> sentenceInfo =
try input = LineIO.slurp( inFile );
String input = null; try { input = LineIO.slurp(inFile); } catch (FileNotFoundException e) { e.printStackTrace();
String document = LineIO.slurp(file.getCanonicalPath());
String document = LineIO.slurp(file.getCanonicalPath());