/** * Write the index to a file; little endian. * @param idx * @param idxFile * @throws IOException * @deprecated use {@link Index#write(File)} instead */ @Deprecated public static void writeIndex(final Index idx, final File idxFile) throws IOException { idx.write(idxFile); }
/** * Return the sequence (chromosome/contig) names in this file, if known. * * @return list of strings of the contig names */ public List<String> getSequenceNames() { return !this.hasIndex() ? new ArrayList<String>() : new ArrayList<String>(index.getSequenceNames()); }
/** * get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index * @param index the index file to use * @return a SAMSequenceDictionary if available, null if unavailable */ public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) { SAMSequenceDictionary dict = new SAMSequenceDictionary(); for (Map.Entry<String,String> entry : index.getProperties().entrySet()) { if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate)) dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()), Integer.valueOf(entry.getValue()))); } return dict; }
@Test(dataProvider = "writeIndexData") public void testWriteIndex(final File inputFile, final IndexFactory.IndexType type, final FeatureCodec codec) throws Exception { // temp index file for this test final File tempIndex = File.createTempFile("index", (type == IndexFactory.IndexType.TABIX) ? TabixUtils.STANDARD_INDEX_EXTENSION : Tribble.STANDARD_INDEX_EXTENSION); tempIndex.delete(); tempIndex.deleteOnExit(); // create the index final Index index = IndexFactory.createIndex(inputFile, codec, type); Assert.assertFalse(tempIndex.exists()); // write the index to a file index.write(tempIndex); Assert.assertTrue(tempIndex.exists()); // load the generated index final Index loadedIndex = IndexFactory.loadIndex(tempIndex.getAbsolutePath()); //TODO: This is just a smoke test; it can pass even if the generated index is unusable for queries. // test that the sequences and properties are the same Assert.assertEquals(loadedIndex.getSequenceNames(), index.getSequenceNames()); Assert.assertEquals(loadedIndex.getProperties(), index.getProperties()); // test that write to a stream does not blows ip index.write(new LittleEndianOutputStream(nullOutputStrem)); }
@Test(dataProvider = "bedDataProvider") public void testCreateLinearIndexFromBED(final File inputBEDFIle) throws Exception { Index index = IndexFactory.createLinearIndex(inputBEDFIle, new BEDCodec()); String chr = "chr2"; Assert.assertTrue(index.getSequenceNames().contains(chr)); Assert.assertTrue(index.containsChromosome(chr)); Assert.assertEquals(1, index.getSequenceNames().size()); List<Block> blocks = index.getBlocks(chr, 1, 50); Assert.assertEquals(1, blocks.size()); Block block = blocks.get(0); Assert.assertEquals(78, block.getSize()); }
/** * Return an iterator to iterate over features overlapping the specified interval * <p/> * Note that TribbleIndexedFeatureReader only supports issuing and manipulating a single query * for each reader. That is, the behavior of the following code is undefined: * <p/> * reader = new TribbleIndexedFeatureReader() * Iterator it1 = reader.query("x", 10, 20) * Iterator it2 = reader.query("x", 1000, 1010) * <p/> * As a consequence of this, the TribbleIndexedFeatureReader are also not thread-safe. * * @param chr contig * @param start start position * @param end end position * @return an iterator of records in this interval * @throws IOException */ @Override public CloseableTribbleIterator<T> query(final String chr, final int start, final int end) throws IOException { if (!this.hasIndex()) { throw new TribbleException("Index not found for: " + path); } if (index.containsChromosome(chr)) { final List<Block> blocks = index.getBlocks(chr, start - 1, end); return new QueryIterator(chr, start, end, blocks); } else { return new EmptyIterator<>(); } }
/** * attempt to close the VCF file */ public void close() { try { // close the underlying output stream outputStream.close(); // close the index stream (keep it separate to help debugging efforts) if (indexer != null) { if (indexer instanceof TribbleIndexCreator) { setIndexSequenceDictionary((TribbleIndexCreator)indexer, refDict); } final Index index = indexer.finalizeIndex(locationSource.getPosition()); index.writeBasedOnFeatureFile(location); } } catch (final IOException e) { throw new RuntimeException("Unable to close index for " + getStreamName(), e); } }
@Test public void testLinearIndex() throws IOException { String bedFile = TestUtils.DATA_DIR + "bed/test.bed"; String idxPath = doStandardIndex(bedFile, "idx"); Index idx = IndexFactory.loadIndex(idxPath); List<Block> blocks = idx.getBlocks("chr1", 100, 200); Block block = blocks.get(0); assertEquals("Unexpected start position ", 46, block.getStartPosition()); }
/** * Write an appropriately named and located Index file based on the name and location of the featureFile. * * Default implementation delegates to {@link #writeBasedOnFeaturePath(Path)} * * @param featureFile * @throws IOException if featureFile is not a normal file. */ public default void writeBasedOnFeatureFile(File featureFile) throws IOException { writeBasedOnFeaturePath(IOUtil.toPath(featureFile)); }
/** * Return an iterator to iterate over features overlapping the specified interval * <p/> * Note that TribbleIndexedFeatureReader only supports issuing and manipulating a single query * for each reader. That is, the behavior of the following code is undefined: * <p/> * reader = new TribbleIndexedFeatureReader() * Iterator it1 = reader.query("x", 10, 20) * Iterator it2 = reader.query("x", 1000, 1010) * <p/> * As a consequence of this, the TribbleIndexedFeatureReader are also not thread-safe. * * @param chr contig * @param start start position * @param end end position * @return an iterator of records in this interval * @throws IOException */ public CloseableTribbleIterator<T> query(final String chr, final int start, final int end) throws IOException { if (!this.hasIndex()) { throw new TribbleException("Index not found for: " + path); } if (index.containsChromosome(chr)) { final List<Block> blocks = index.getBlocks(chr, start - 1, end); return new QueryIterator(chr, start, end, blocks); } else { return new EmptyIterator<T>(); } }
index.writeBasedOnFeatureFile(outFile); writer.close();
/** * Write an appropriately named and located Index file based on the name and location of the featureFile. * * Default implementation delegates to {@link #writeBasedOnFeaturePath(Path)} * * @param featureFile * @throws IOException if featureFile is not a normal file. */ public default void writeBasedOnFeatureFile(File featureFile) throws IOException { writeBasedOnFeaturePath(IOUtil.toPath(featureFile)); }
/** * Write the index to a file; little endian. * @param idx * @param idxFile * @throws IOException * @deprecated use {@link Index#write(File)} instead */ @Deprecated public static void writeIndex(final Index idx, final File idxFile) throws IOException { idx.write(idxFile); }
/** * Return an iterator to iterate over features overlapping the specified interval * <p/> * Note that TribbleIndexedFeatureReader only supports issuing and manipulating a single query * for each reader. That is, the behavior of the following code is undefined: * <p/> * reader = new TribbleIndexedFeatureReader() * Iterator it1 = reader.query("x", 10, 20) * Iterator it2 = reader.query("x", 1000, 1010) * <p/> * As a consequence of this, the TribbleIndexedFeatureReader are also not thread-safe. * * @param chr contig * @param start start position * @param end end position * @return an iterator of records in this interval * @throws IOException */ @Override public CloseableTribbleIterator<T> query(final String chr, final int start, final int end) throws IOException { if (!this.hasIndex()) { throw new TribbleException("Index not found for: " + path); } if (index.containsChromosome(chr)) { final List<Block> blocks = index.getBlocks(chr, start - 1, end); return new QueryIterator(chr, start, end, blocks); } else { return new EmptyIterator<>(); } }
/** * Return the sequence (chromosome/contig) names in this file, if known. * * @return list of strings of the contig names */ @Override public List<String> getSequenceNames() { return !this.hasIndex() ? new ArrayList<>() : new ArrayList<>(index.getSequenceNames()); }
@Test(dataProvider = "writeIndexData") public void testWriteBasedOnNonRegularFeatureFile(final File inputFile, final IndexFactory.IndexType type, final FeatureCodec codec) throws Exception { final File tmpFolder = IOUtil.createTempDir("NonRegultarFeatureFile", null); // create the index final Index index = IndexFactory.createIndex(inputFile, codec, type); // try to write based on the tmpFolder Assert.assertThrows(IOException.class, () -> index.writeBasedOnFeatureFile(tmpFolder)); } }
public String[] getSamples() { if (this.tribbleFeatureSource.isIndexed()) { // Load the index for meta information. Its already loaded, but not public in the htsjdk class. Index idx = loadIndex(this.locator.getPath()); if (idx != null) { Map<String, String> map = idx.getProperties(); if (map != null && map.containsKey("samples")) { return Globals.commaPattern.split(map.get("samples")); } } } // Try to fetch features from codec. This is to support a deprecated option to // specify sample names in the .mut or .maf file header. MUTCodec codec = new MUTCodec(locator.getPath(), null); return codec.getSamples(); }
/** * attempt to close the VCF file */ @Override public void close() { try { // close the underlying output stream outputStream.close(); // close the index stream (keep it separate to help debugging efforts) if (indexer != null) { indexer.setIndexSequenceDictionary(refDict); final Index index = indexer.finalizeIndex(locationSource.getPosition()); index.writeBasedOnFeaturePath(location); } } catch (final IOException e) { throw new RuntimeIOException("Unable to close index for " + getStreamName(), e); } }
/** * Writes the index into a file. * * Default implementation delegates to {@link #write(Path)} * * @param idxFile Where to write the index. * @throws IOException if the index is unable to write to the specified file */ public default void write(final File idxFile) throws IOException { write(IOUtil.toPath(idxFile)); }
/** * Return the sequence (chromosome/contig) names in this file, if known. * * @return list of strings of the contig names */ @Override public List<String> getSequenceNames() { return !this.hasIndex() ? new ArrayList<>() : new ArrayList<>(index.getSequenceNames()); }