/** * Recursively reads all documents with .xml extension in the source directory */ @Override public List<List<Path>> getFileListing() throws IOException { List<List<Path>> fileList = new ArrayList<>(); String[] xmlDocs = IOUtils.lsFilesRecursive(sourceDirectory, new FileFilter() { @Override public boolean accept(File f) { return f.getName().endsWith(EXT_XML) || f.isDirectory(); } }); for (String doc : xmlDocs) { List<Path> docFile = new ArrayList<>(); Path docPath = Paths.get(doc); docFile.add(docPath); fileList.add(docFile); } return fileList; }
/** * Recursively reads all documents with .xml extension in the source directory */ @Override public List<List<Path>> getFileListing() throws IOException { List<List<Path>> fileList = new ArrayList<>(); String[] xmlDocs = IOUtils.lsFilesRecursive(sourceDirectory, new FileFilter() { @Override public boolean accept(File f) { return f.getName().endsWith(EXT_XML) || f.isDirectory(); } }); for (String doc : xmlDocs) { List<Path> docFile = new ArrayList<>(); Path docPath = Paths.get(doc); docFile.add(docPath); fileList.add(docFile); } return fileList; }
/** * Filters the files contained in a directory or in its subdirectory structure. Returns all * files (not directories) that pass the filter. */ public static String[] lsFilesRecursive(String directory, FileFilter filter) throws IOException { File dir = new File(directory); ArrayList<String> files = new ArrayList<>(); for (File filepath : dir.listFiles(filter)) { if (filepath.isFile()) files.add(filepath.getAbsolutePath()); else if (filepath.isDirectory()) files.addAll(Arrays.asList(lsFilesRecursive(filepath.getAbsolutePath(), filter))); } return files.toArray(new String[files.size()]); }
/** * Filters the files contained in a directory or in its subdirectory structure. Returns all * files (not directories) that pass the filter. */ public static String[] lsFilesRecursive(String directory, FileFilter filter) throws IOException { File dir = new File(directory); ArrayList<String> files = new ArrayList<>(); for (File filepath : dir.listFiles(filter)) { if (filepath.isFile()) files.add(filepath.getAbsolutePath()); else if (filepath.isDirectory()) files.addAll(Arrays.asList(lsFilesRecursive(filepath.getAbsolutePath(), filter))); } return files.toArray(new String[files.size()]); }
/** * Filters the files contained in a directory or in its subdirectory structure. Returns all * files (not directories) that pass the filter. */ public static String[] lsFilesRecursive(String directory, FilenameFilter filter) throws IOException { File dir = new File(directory); ArrayList<String> files = new ArrayList<>(); for (File filepath : dir.listFiles(filter)) { if (isFile(filepath.getAbsolutePath())) files.add(filepath.getAbsolutePath()); else if (isDirectory(filepath.getAbsolutePath())) files.addAll(Arrays.asList(lsFilesRecursive(filepath.getAbsolutePath(), filter))); } return files.toArray(new String[files.size()]); }
/** * Filters the files contained in a directory or in its subdirectory structure. Returns all * files (not directories) that pass the filter. */ public static String[] lsFilesRecursive(String directory, FilenameFilter filter) throws IOException { File dir = new File(directory); ArrayList<String> files = new ArrayList<>(); for (File filepath : dir.listFiles(filter)) { if (isFile(filepath.getAbsolutePath())) files.add(filepath.getAbsolutePath()); else if (isDirectory(filepath.getAbsolutePath())) files.addAll(Arrays.asList(lsFilesRecursive(filepath.getAbsolutePath(), filter))); } return files.toArray(new String[files.size()]); }
/** * generate a list of files comprising the corpus. Each is expected to generate one or more * TextAnnotation objects, though the way the iterator is implemented allows for corpus files to * generate zero TextAnnotations if you are feeling picky. * * @return a list of Path objects corresponding to files containing corpus documents to process. */ @Override public List<List<Path>> getFileListing() throws IOException { FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(getRequiredFileExtension()); } }; String[] fileList = IOUtils.lsFilesRecursive(super.getSourceDirectory(), filter); List<List<Path>> pathList = new ArrayList<>(fileList.length); for (String file : fileList) pathList.add(Collections.singletonList(Paths.get(file))); return pathList; }
/** * generate a list of files comprising the corpus. Each is expected to generate one or more * TextAnnotation objects, though the way the iterator is implemented allows for corpus files to * generate zero TextAnnotations if you are feeling picky. * * @return a list of Path objects corresponding to files containing corpus documents to process. */ @Override public List<List<Path>> getFileListing() throws IOException { FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(getRequiredFileExtension()); } }; String[] fileList = IOUtils.lsFilesRecursive(super.getSourceDirectory(), filter); List<List<Path>> pathList = new ArrayList<>(fileList.length); for (String file : fileList) pathList.add(Collections.singletonList(Paths.get(file))); return pathList; }
/** * generate a list of lists of files comprising the corpus. Each entry is expected to generate one or more * TextAnnotation objects, though the way the iterator is implemented allows for corpus files to * generate zero TextAnnotations if you are feeling picky. Each entry in the list is itself a list in * which the first file contains the source document. If that file does not also contain the annotation * info, the remaining entries in the list name the file(s) containing the annotation markup. * * The default implementation assumes only a single self-contained file is provided for each document. * * @return a List of Lists of Path objects, each containing a source file and corresponding markup files. */ @Override public List<List<Path>> getFileListing() throws IOException { FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(getRequiredSourceFileExtension()); } }; String[] fileList = IOUtils.lsFilesRecursive(super.getSourceDirectory(), filter); List<List<Path>> pathList = new ArrayList<>(fileList.length); for (String file : fileList) pathList.add(Collections.singletonList(Paths.get(file))); return pathList; }
/** * generate a list of lists of files comprising the corpus. Each entry is expected to generate one or more * TextAnnotation objects, though the way the iterator is implemented allows for corpus files to * generate zero TextAnnotations if you are feeling picky. Each entry in the list is itself a list in * which the first file contains the source document. If that file does not also contain the annotation * info, the remaining entries in the list name the file(s) containing the annotation markup. * * The default implementation assumes only a single self-contained file is provided for each document. * * @return a List of Lists of Path objects, each containing a source file and corresponding markup files. */ @Override public List<List<Path>> getFileListing() throws IOException { FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(getRequiredSourceFileExtension()); } }; String[] fileList = IOUtils.lsFilesRecursive(super.getSourceDirectory(), filter); List<List<Path>> pathList = new ArrayList<>(fileList.length); for (String file : fileList) pathList.add(Collections.singletonList(Paths.get(file))); return pathList; }
/** * This is overridden to handle the multiple subdirectories of the TAC KBP data. * * @return a list of lists of paths: each element is a singleton list containing a TAC source file * @throws IOException if the paths are not specified correctly, causing failure to read * files expected to be present */ @Override public List<List<Path>> getFileListing() throws IOException { String sourceDir = resourceManager.getString(CorpusReaderConfigurator.SOURCE_DIRECTORY); List<List<Path>> corpusPaths = new ArrayList<>(); for (String st : SOURCE_TYPES) { String dir = sourceDir + "/" + st; FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(getRequiredSourceFileExtension()); } }; String[] fileList = IOUtils.lsFilesRecursive(dir, filter); List<List<Path>> pathList = new ArrayList<>(fileList.length); for (String file : fileList) pathList.add(Collections.singletonList(Paths.get(file))); corpusPaths.addAll(pathList); } return corpusPaths; } }
/** * This is overridden to handle the multiple subdirectories of the TAC KBP data. * * @return a list of lists of paths: each element is a singleton list containing a TAC source file * @throws IOException if the paths are not specified correctly, causing failure to read * files expected to be present */ @Override public List<List<Path>> getFileListing() throws IOException { String sourceDir = resourceManager.getString(CorpusReaderConfigurator.SOURCE_DIRECTORY); List<List<Path>> corpusPaths = new ArrayList<>(); for (String st : SOURCE_TYPES) { String dir = sourceDir + "/" + st; FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(getRequiredSourceFileExtension()); } }; String[] fileList = IOUtils.lsFilesRecursive(dir, filter); List<List<Path>> pathList = new ArrayList<>(fileList.length); for (String file : fileList) pathList.add(Collections.singletonList(Paths.get(file))); corpusPaths.addAll(pathList); } return corpusPaths; } }
sourceFiles = IOUtils.lsFilesRecursive(corpusDirectory, file -> file.isDirectory() || file.getAbsolutePath().endsWith(fileExtension)); } catch (IOException e) {
File sectionDir = new File(this.aceCorpusHome + File.separator + section); String[] xmlFiles = IOUtils.lsFilesRecursive(sectionDir.getAbsolutePath(), apfFileFilter);
File sectionDir = new File(this.aceCorpusHome + File.separator + section); String[] xmlFiles = IOUtils.lsFilesRecursive(sectionDir.getAbsolutePath(), apfFileFilter);
String[] files = IOUtils.lsFilesRecursive(dataDirectory.toString(), filter);
String[] files = IOUtils.lsFilesRecursive(dataDirectory.toString(), filter);
String[] inFiles = IOUtils.lsFilesRecursive(inDir, filter);
List<String> sourceFileList = Arrays.asList(IOUtils.lsFilesRecursive(sourceDir, sourceFilter)); LinkedList<String> annotationFileList = new LinkedList<>(); annotationFileList.addAll(Arrays.stream(IOUtils.lsFilesRecursive(annotationDir, annotationFilter)).map(IOUtils::getFileName).collect(Collectors.toList()));
File sectionDir = new File(this.aceCorpusHome + File.separator + section); String[] xmlFiles = IOUtils.lsFilesRecursive(sectionDir.getAbsolutePath(), apfFileFilter);