public static List<String> vcfHeaderToStrings(VCFHeader header) { File fakeVCFFile; List<String> str= new ArrayList<String>(); try { fakeVCFFile = Utils.createTempFile(".vcfheader", ".vcf"); final VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(fakeVCFFile) .setReferenceDictionary(header.getSequenceDictionary()) .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.WRITE_FULL_FORMAT_FIELD)) .build(); writer.writeHeader(header); writer.close(); BufferedReader br= new BufferedReader(new FileReader(fakeVCFFile)); String line= br.readLine(); while(line != null){ str.add(line); line= br.readLine(); } br.close(); } catch (IOException e) { e.printStackTrace(); } return str; }
/** * This method makes a copy of the input VCF and creates an index file for it in the same location. * This is done so that we don't need to store the index file in the same repo * The copy of the input is done so that it and its index are in the same directory which is typically required. * * @param vcfFile the vcf file to index * @return File a vcf file (index file is created in same path). */ public static File createTemporaryIndexedVcfFromInput(final File vcfFile, final String tempFilePrefix) throws IOException { final String extension; if (vcfFile.getAbsolutePath().endsWith(IOUtil.VCF_FILE_EXTENSION)) extension = IOUtil.VCF_FILE_EXTENSION; else if (vcfFile.getAbsolutePath().endsWith(IOUtil.COMPRESSED_VCF_FILE_EXTENSION)) extension = IOUtil.COMPRESSED_VCF_FILE_EXTENSION; else throw new IllegalArgumentException("couldn't find a " + IOUtil.VCF_FILE_EXTENSION + " or " + IOUtil.COMPRESSED_VCF_FILE_EXTENSION + " ending for input file " + vcfFile.getAbsolutePath()); File output = createTemporaryIndexedVcfFile(tempFilePrefix, extension); try (final VCFFileReader in = new VCFFileReader(vcfFile, false); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(in.getFileHeader().getSequenceDictionary()). setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)). setOutputFile(output).build()) { out.writeHeader(in.getFileHeader()); for (final VariantContext ctx : in) { out.add(ctx); } } return output; }
/** * This method makes a copy of the input VCF and creates an index file for it in the same location. * This is done so that we don't need to store the index file in the same repo * The copy of the input is done so that it and its index are in the same directory which is typically required. * * @param vcfFile the vcf file to index * @return File a vcf file (index file is created in same path). */ public static File createTemporaryIndexedVcfFromInput(final File vcfFile, final String tempFilePrefix) throws IOException { final String extension; if (vcfFile.getAbsolutePath().endsWith(IOUtil.VCF_FILE_EXTENSION)) extension = IOUtil.VCF_FILE_EXTENSION; else if (vcfFile.getAbsolutePath().endsWith(IOUtil.COMPRESSED_VCF_FILE_EXTENSION)) extension = IOUtil.COMPRESSED_VCF_FILE_EXTENSION; else throw new IllegalArgumentException("couldn't find a " + IOUtil.VCF_FILE_EXTENSION + " or " + IOUtil.COMPRESSED_VCF_FILE_EXTENSION + " ending for input file " + vcfFile.getAbsolutePath()); File output = createTemporaryIndexedVcfFile(tempFilePrefix, extension); try (final VCFFileReader in = new VCFFileReader(vcfFile, false); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(in.getFileHeader().getSequenceDictionary()). setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)). setOutputFile(output).build()) { out.writeHeader(in.getFileHeader()); for (final VariantContext ctx : in) { out.add(ctx); } } return output; }
.setOptions(options) .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build(); out.writeHeader(outHeader);
.setOptions(options) .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build(); out.writeHeader(outHeader);
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) { final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records"); final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(outputHeader.getSequenceDictionary()). setOptions(options). setOutputFile(OUTPUT).build(); out.writeHeader(outputHeader); for (final VariantContext variantContext : sortedOutput) { out.add(variantContext); writeProgress.record(variantContext.getContig(), variantContext.getStart()); } out.close(); } }
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) { final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records"); final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(outputHeader.getSequenceDictionary()). setOptions(options). setOutputFile(OUTPUT).build(); out.writeHeader(outputHeader); for (final VariantContext variantContext : sortedOutput) { out.add(variantContext); writeProgress.record(variantContext.getContig(), variantContext.getStart()); } out.close(); } }
@Test public void testVcf42Roundtrip() throws Exception { // this test ensures that source/version fields are round-tripped properly // read an existing VCF File expectedFile = new File("src/test/resources/htsjdk/variant/Vcf4.2WithSourceVersionInfoFields.vcf"); // write the file out into a new copy final File actualFile = File.createTempFile("testVcf4.2roundtrip.", IOUtil.VCF_FILE_EXTENSION); actualFile.deleteOnExit(); try (final VCFFileReader originalFileReader = new VCFFileReader(expectedFile, false); final VariantContextWriter copyWriter = new VariantContextWriterBuilder() .setOutputFile(actualFile) .setReferenceDictionary(createArtificialSequenceDictionary()) .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY)) .build() ) { final VCFHeader originalHeader = originalFileReader.getFileHeader(); copyWriter.writeHeader(originalHeader); for (final VariantContext variantContext : originalFileReader) { copyWriter.add(variantContext); } } final String actualContents = new String(Files.readAllBytes(actualFile.toPath()), StandardCharsets.UTF_8); final String expectedContents = new String(Files.readAllBytes(expectedFile.toPath()), StandardCharsets.UTF_8); Assert.assertEquals(actualContents, expectedContents); }
private VariantContextWriter getWriter() { SAMSequenceDictionary seqDict = new SAMSequenceDictionary(); EnumSet<Options> options = VariantContextWriterBuilder.DEFAULT_OPTIONS; options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER); VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setReferenceDictionary(seqDict) .setOptions(options); VariantContextWriter writer = builder.setOutputFile(outFile).build(); return writer; }
/** * * A test to check that we can't write VCF with missing header. */ @Test(dataProvider = "vcfExtensionsDataProvider", expectedExceptions = IllegalStateException.class) public void testWriteWithEmptyHeader(final String extension) throws IOException { final File fakeVCFFile = File.createTempFile("testWriteAndReadVCFHeaderless.", extension, tempDir); metaData = new HashSet<>(); additionalColumns = new HashSet<>(); final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict); try (final VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(fakeVCFFile).setReferenceDictionary(sequenceDict) .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY)) .build()) { writer.add(createVC(header)); } } }
@Override public VariantContextWriter makeWriter(final File file, final EnumSet<Options> baseOptions) { return new VariantContextWriterBuilder() .setOutputFile(file) .setReferenceDictionary(dictionary) .setOptions(baseOptions) .build(); }
@Override public VariantContextWriter makeWriter(final File file, final EnumSet<Options> baseOptions) { return new VariantContextWriterBuilder() .setOutputFile(file) .setReferenceDictionary(dictionary) .setOptions(baseOptions) .build(); }
try (final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(header.getSequenceDictionary()). setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)). setOutputFile(output).build()) { out.writeHeader(header);
/** * test, with index-on-the-fly option, that we can output and input BCF without problems */ @Test public void testWriteAndReadBCFWithIndex() throws IOException { final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir); bcfOutputFile.deleteOnExit(); Tribble.indexFile(bcfOutputFile).deleteOnExit(); final VCFHeader header = createFakeHeader(); try (final VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary()) .setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)) .build()) { writer.writeHeader(header); writer.add(createVC(header)); writer.add(createVC(header)); } VariantContextTestProvider.VariantContextContainer container = VariantContextTestProvider .readAllVCs(bcfOutputFile, new BCF2Codec()); int counter = 0; final Iterator<VariantContext> it = container.getVCs().iterator(); while (it.hasNext()) { it.next(); counter++; } Assert.assertEquals(counter, 2); }
@Test public void simpleTest() throws Exception { final VCF3Codec codec = new VCF3Codec(); final FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(SMALL_VCF.getAbsolutePath(), codec, false); final VCFHeader headerFromFile = (VCFHeader)reader.getHeader(); final File vcf = File.createTempFile("TabixOnTheFlyIndexCreationTest.", IOUtil.COMPRESSED_VCF_FILE_EXTENSION); final File tabix = new File(vcf.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION); vcf.deleteOnExit(); tabix.deleteOnExit(); final VariantContextWriter vcfWriter = new VariantContextWriterBuilder() .setOutputFile(vcf) .setReferenceDictionary(headerFromFile.getSequenceDictionary()) .setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY, Options.ALLOW_MISSING_FIELDS_IN_HEADER)) .build(); vcfWriter.writeHeader(headerFromFile); final CloseableTribbleIterator<VariantContext> it = reader.iterator(); while (it.hasNext()) { vcfWriter.add(it.next()); } it.close(); vcfWriter.close(); // Hard to validate, so just confirm that index can be read. new TabixIndex(tabix); } }
.setOutputFile(outputFile) .setReferenceDictionary(dict) .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER)) .build(); writer.writeHeader((VCFHeader)source.getHeader());
/** * A test to ensure that if we add a line to a VCFHeader it will persist through * a round-trip write/read cycle via VariantContextWriter/VCFFileReader */ @Test public void testModifyHeader() { final File originalVCF = new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"); final VCFFileReader reader = new VCFFileReader(originalVCF, false); final VCFHeader header = reader.getFileHeader(); reader.close(); header.addMetaDataLine(new VCFHeaderLine("FOOBAR", "foovalue")); final File outputVCF = createTempFile("testModifyHeader", IOUtil.VCF_FILE_EXTENSION); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVCF).setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER)).build(); writer.writeHeader(header); writer.close(); final VCFFileReader roundtripReader = new VCFFileReader(outputVCF, false); final VCFHeader roundtripHeader = roundtripReader.getFileHeader(); roundtripReader.close(); Assert.assertNotNull(roundtripHeader.getOtherHeaderLine("FOOBAR"), "Could not find FOOBAR header line after a write/read cycle"); Assert.assertEquals(roundtripHeader.getOtherHeaderLine("FOOBAR").getValue(), "foovalue", "Wrong value for FOOBAR header line after a write/read cycle"); }
/** test, using the writer and reader, that we can output and input a VCF body without problems */ @Test public void testWriteAndReadAsyncVCFHeaderless() throws IOException { final File fakeVCFFile = VariantBaseTest.createTempFile("testWriteAndReadAsyncVCFHeaderless.", IOUtil.VCF_FILE_EXTENSION); fakeVCFFile.deleteOnExit(); Tribble.indexFile(fakeVCFFile).deleteOnExit(); final Set<VCFHeaderLine> metaData = new HashSet<>(); final Set<String> additionalColumns = new HashSet<>(); final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict); try (final VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(fakeVCFFile).setReferenceDictionary(sequenceDict) .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY, Options.USE_ASYNC_IO)) .build()) { writer.setHeader(header); writer.add(createVC(header)); writer.add(createVC(header)); } final VCFCodec codec = new VCFCodec(); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); try (final FileInputStream fis = new FileInputStream(fakeVCFFile)) { final AsciiLineReaderIterator iterator = new AsciiLineReaderIterator(new AsciiLineReader(fis)); int counter = 0; while (iterator.hasNext()) { VariantContext context = codec.decode(iterator.next()); counter++; } Assert.assertEquals(counter, 2); } }
/** * Constructs an object which will write directly into the output file provided by the stub. * Intentionally delaying the writing of the header -- this should be filled in by the walker. * * Respecs the isCompressed() request in stub, so if isCompressed() is true then this * will create a storage output that dumps output to a BlockCompressedOutputStream. * * @param stub Stub to use when constructing the output file. */ public VariantContextWriterStorage(VariantContextWriterStub stub) { if ( stub.getOutputFile() != null ) { this.file = stub.getOutputFile(); writer = vcfWriterToFile(stub,stub.getOutputFile(),true,true); } else if ( stub.getOutputStream() != null ) { this.file = null; this.stream = stub.getOutputStream(); writer = new VariantContextWriterBuilder() .setOutputVCFStream(stream) .setReferenceDictionary(stub.getMasterSequenceDictionary()) .setOptions(stub.getWriterOptions(false)) .build(); } else throw new ReviewedGATKException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); }
private static File createBgzipVcfsWithVariableSize(final int firstRecordAttributeLength, final int nSmallRecords) throws Exception { final VariantContext longRecord = new VariantContextBuilder("long", TEST_CHR, 1, 1, alleles) .attribute(RANDOM_ATTRIBUTE, generateRandomString(firstRecordAttributeLength)) .make(); final File tempFile = Files.createTempFile("test" + firstRecordAttributeLength + "_" + nSmallRecords, ".vcf.gz").toFile(); try (final VariantContextWriter writer = new VariantContextWriterBuilder() .setOptions(VariantContextWriterBuilder.NO_OPTIONS) .setOutputFile(tempFile) .setOutputFileType(VariantContextWriterBuilder.OutputType.BLOCK_COMPRESSED_VCF) .build()) { writer.setHeader(createTestHeader()); // do not write the header writer.add(longRecord); for (int i = 2; i <= nSmallRecords + 1; i++) { final VariantContext smallRecord = new VariantContextBuilder("short", TEST_CHR, i, i, alleles).attribute(RANDOM_ATTRIBUTE, ".").make(); writer.add(smallRecord); } } return tempFile; }