/** * Attempts to determine the type of the reference file and return an instance * of ReferenceSequenceFile that is appropriate to read it. * * @param path the reference sequence file on disk * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name */ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace) { return getReferenceSequenceFile(path, truncateNamesAtWhitespace, true); }
/** * Returns the default dictionary name for a FASTA file. * * @param file the reference sequence file on disk. */ public static File getDefaultDictionaryForReferenceSequence(final File file) { return getDefaultDictionaryForReferenceSequence(IOUtil.toPath(file)).toFile(); }
public SamtoolsFastaIndex(String fastaFileName) throws IOException { if (!ReferenceSequenceFileFactory.canCreateIndexedFastaReader(Paths.get(fastaFileName))) { throw new IOException("Fasta file '" + fastaFileName + "' is not indexed."); } this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(Paths.get(fastaFileName)); }
/** * Attempts to determine the type of the reference file and return an instance * of ReferenceSequenceFile that is appropriate to read it. * * @param path the reference sequence file path * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader */ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { // this should thrown an exception if the fasta file is not supported getFastaExtension(path); // Using faidx requires truncateNamesAtWhitespace if (truncateNamesAtWhitespace && preferIndexed && canCreateIndexedFastaReader(path)) { try { return IOUtil.isBlockCompressed(path, true) ? new BlockCompressedIndexedFastaSequenceFile(path) : new IndexedFastaSequenceFile(path); } catch (final IOException e) { throw new SAMException("Error opening FASTA: " + path, e); } } else { return new FastaSequenceFile(path, truncateNamesAtWhitespace); } }
/** * Checks if the provided FASTA file can be open as indexed. * * <p>For a FASTA file to be indexed, it requires to have: * <ul> * <li>Associated .fai index ({@link FastaSequenceIndex}).</li> * <li>Associated .gzi index if it is block-compressed ({@link GZIIndex}).</li> * </ul> * * @param fastaFile the reference sequence file path. * @return {@code true} if the file can be open as indexed; {@code false} otherwise. */ public static boolean canCreateIndexedFastaReader(final Path fastaFile) { // this should thrown an exception if the fasta file is not supported getFastaExtension(fastaFile); // both the FASTA file should exists and the .fai index should exist if (Files.exists(fastaFile) && Files.exists(getFastaIndexFileName(fastaFile))) { // open the file for checking for block-compressed input try { // if it is bgzip, it requires the .gzi index return !IOUtil.isBlockCompressed(fastaFile, true) || Files.exists(GZIIndex.resolveIndexNameForBgzipFile(fastaFile)); } catch (IOException e) { return false; } } return false; }
protected static Path findFastaIndex(Path fastaFile) { Path indexFile = ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile); if (!Files.exists(indexFile)) return null; return indexFile; }
@Test(dataProvider = "canCreateIndexedFastaParams") public void testCanCreateIndexedFastaReader(final File path, final boolean indexed) { Assert.assertEquals(ReferenceSequenceFileFactory.canCreateIndexedFastaReader(path.toPath()), indexed); }
/** * Returns the default dictionary name for a FASTA file. * * @param path the reference sequence file path. */ public static Path getDefaultDictionaryForReferenceSequence(final Path path) { final String name = path.getFileName().toString(); final int extensionIndex = name.length() - getFastaExtension(path).length(); return path.resolveSibling(name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION); }
/** * Finds and loads the sequence file dictionary. * @param path Fasta file to read. Also acts as a prefix for supporting files. */ AbstractFastaSequenceFile(final Path path) { this.path = path; this.source = path == null ? "unknown" : path.toAbsolutePath().toString(); final Path dictionary = findSequenceDictionary(path); if (dictionary != null) { IOUtil.assertFileIsReadable(dictionary); try (InputStream dictionaryIn = Files.newInputStream(dictionary)) { this.sequenceDictionary = ReferenceSequenceFileFactory.loadDictionary(dictionaryIn); } catch (Exception e) { throw new SAMException("Could not open sequence dictionary file: " + dictionary, e); } } }
/** * Attempts to determine the type of the reference file and return an instance * of ReferenceSequenceFile that is appropriate to read it. * * @param path the reference sequence file path * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader */ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { // this should thrown an exception if the fasta file is not supported getFastaExtension(path); // Using faidx requires truncateNamesAtWhitespace if (truncateNamesAtWhitespace && preferIndexed && canCreateIndexedFastaReader(path)) { try { return IOUtil.isBlockCompressed(path, true) ? new BlockCompressedIndexedFastaSequenceFile(path) : new IndexedFastaSequenceFile(path); } catch (final IOException e) { throw new SAMException("Error opening FASTA: " + path, e); } } else { return new FastaSequenceFile(path, truncateNamesAtWhitespace); } }
/** * Checks if the provided FASTA file can be open as indexed. * * <p>For a FASTA file to be indexed, it requires to have: * <ul> * <li>Associated .fai index ({@link FastaSequenceIndex}).</li> * <li>Associated .gzi index if it is block-compressed ({@link GZIIndex}).</li> * </ul> * * @param fastaFile the reference sequence file path. * @return {@code true} if the file can be open as indexed; {@code false} otherwise. */ public static boolean canCreateIndexedFastaReader(final Path fastaFile) { // this should thrown an exception if the fasta file is not supported getFastaExtension(fastaFile); // both the FASTA file should exists and the .fai index should exist if (Files.exists(fastaFile) && Files.exists(getFastaIndexFileName(fastaFile))) { // open the file for checking for block-compressed input try { // if it is bgzip, it requires the .gzi index return !IOUtil.isBlockCompressed(fastaFile, true) || Files.exists(GZIIndex.resolveIndexNameForBgzipFile(fastaFile)); } catch (IOException e) { return false; } } return false; }
protected static Path findFastaIndex(Path fastaFile) { Path indexFile = ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile); if (!Files.exists(indexFile)) return null; return indexFile; }
/** * Returns the default dictionary name for a FASTA file. * * @param path the reference sequence file path. */ public static Path getDefaultDictionaryForReferenceSequence(final Path path) { final String name = path.getFileName().toString(); final int extensionIndex = name.length() - getFastaExtension(path).length(); return path.resolveSibling(name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION); }
/** * Finds and loads the sequence file dictionary. * @param path Fasta file to read. Also acts as a prefix for supporting files. */ AbstractFastaSequenceFile(final Path path) { this.path = path; this.source = path == null ? "unknown" : path.toAbsolutePath().toString(); final Path dictionary = findSequenceDictionary(path); if (dictionary != null) { IOUtil.assertFileIsReadable(dictionary); try (InputStream dictionaryIn = Files.newInputStream(dictionary)) { this.sequenceDictionary = ReferenceSequenceFileFactory.loadDictionary(dictionaryIn); } catch (Exception e) { throw new SAMException("Could not open sequence dictionary file: " + dictionary, e); } } }
/** * Attempts to determine the type of the reference file and return an instance * of ReferenceSequenceFile that is appropriate to read it. * * @param file the reference sequence file on disk * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name */ public static ReferenceSequenceFile getReferenceSequenceFile(final File file, final boolean truncateNamesAtWhitespace) { return getReferenceSequenceFile(file, truncateNamesAtWhitespace, true); }
/** * Returns the default dictionary name for a FASTA file. * * @param file the reference sequence file on disk. */ public static File getDefaultDictionaryForReferenceSequence(final File file) { return getDefaultDictionaryForReferenceSequence(IOUtil.toPath(file)).toFile(); }
protected static Path findRequiredFastaIndexFile(Path fastaFile) throws FileNotFoundException { Path ret = findFastaIndex(fastaFile); if (ret == null) throw new FileNotFoundException(ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile) + " not found."); return ret; }
/** * Attempts to determine the type of the reference file and return an instance * of ReferenceSequenceFile that is appropriate to read it. * * @param file the reference sequence file on disk * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name */ public static ReferenceSequenceFile getReferenceSequenceFile(final File file, final boolean truncateNamesAtWhitespace) { return getReferenceSequenceFile(file, truncateNamesAtWhitespace, true); }
protected static Path findSequenceDictionary(final Path path) { if (path == null) { return null; } // Try and locate the dictionary with the default method final Path dictionary = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(path); path.toAbsolutePath(); if (Files.exists(dictionary)) { return dictionary; } // try without removing the file extension final Path dictionaryExt = path.resolveSibling(path.getFileName().toString() + IOUtil.DICT_FILE_EXTENSION); if (Files.exists(dictionaryExt)) { return dictionaryExt; } else return null; }
protected static Path findRequiredFastaIndexFile(Path fastaFile) throws FileNotFoundException { Path ret = findFastaIndex(fastaFile); if (ret == null) throw new FileNotFoundException(ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile) + " not found."); return ret; }