public GenomeRegionSequenceExtractor(JannovarData jannovarData, IndexedFastaSequenceFile indexedFile) { super(); this.jannovarData = jannovarData; this.indexedFile = indexedFile; if (this.indexedFile.getSequenceDictionary() == null) { throw new UncheckedJannovarException( "FASTA file has no sequence dictionary. Are you missing the REFERENCE.dict file? " + "Hint: create with samtools dict (version >=1.2) or Picard."); } }
/** * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. * @param file The file to open. * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk. * @throws FileNotFoundException If the fasta or any of its supporting files cannot be found. */ public IndexedFastaSequenceFile(final File file, final FastaSequenceIndex index) { super(file); if (index == null) throw new IllegalArgumentException("Null index for fasta " + file); this.index = index; IOUtil.assertFileIsReadable(file); final FileInputStream in; try { in = new FileInputStream(file); } catch (FileNotFoundException e) { throw new SAMException("Fasta file should be readable but is not: " + file, e); } channel = in.getChannel(); reset(); if(getSequenceDictionary() != null) sanityCheckDictionaryAgainstIndex(file.getAbsolutePath(),sequenceDictionary,index); }
private void loadFASTAIndex() { try { this.fasta = new IndexedFastaSequenceFile(new File(options.getPathReferenceFASTA())); } catch (FileNotFoundException e) { throw new UncheckedJannovarException("Could not load FASTA index", e); } if (this.fasta.getSequenceDictionary() == null) { throw new UncheckedJannovarException( "FASTA sequence dictionary empty, you have a REFERENCE.dict file (create with Picard " + "or samtools dict, version >=1.3)"); } this.translator = new NucleotideChangeToGenomeVariantTranslator(jannovarData, fasta); }
/** Map contig name (from genome variant) to contig name in FASTA */ private String mapContigToFasta(String contigName) { // Map genome variant's contig to unique ID Integer contigID = jannovarData.getRefDict().getContigNameToID().get(contigName); if (contigID == null) throw new UncheckedJannovarException("Unknown contig name " + contigName); // Try to find matching contig in fasta String nameInFasta = null; for (SAMSequenceRecord record : indexedFile.getSequenceDictionary().getSequences()) { if (jannovarData.getRefDict().getContigNameToID().containsKey(record.getSequenceName())) { String contigInFasta = record.getSequenceName(); if (jannovarData.getRefDict().getContigNameToID().get(contigInFasta) == contigID) { nameInFasta = contigInFasta; break; } } } if (nameInFasta == null) throw new UncheckedJannovarException("Could not find corresponding contig in FASTA for " + contigName); return nameInFasta; }
/** Map contig name (from genome variant) to contig name in FASTA */ private String mapContigToFasta(String contigName) { // Map genome variant's contig to unique ID Integer contigID = jannovarData.getRefDict().getContigNameToID().get(contigName); if (contigID == null) throw new UncheckedJannovarException("Unknown contig name " + contigName); // Try to find matching contig in fasta String nameInFasta = null; for (SAMSequenceRecord record : fasta.getSequenceDictionary().getSequences()) { if (jannovarData.getRefDict().getContigNameToID().containsKey(record.getSequenceName())) { nameInFasta = record.getSequenceName(); break; } } if (nameInFasta == null) throw new UncheckedJannovarException("Could not find corresponding contig in FASTA for " + contigName); return nameInFasta; }
private VariantContextWriter openOutputFile() { VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setReferenceDictionary(fasta.getSequenceDictionary()).setOutputFile(options.getPathOutputVCF()); if (options.getPathOutputVCF().endsWith(".gz") || options.getPathOutputVCF().endsWith(".bcf")) builder.setOption(Options.INDEX_ON_THE_FLY); else builder.unsetOption(Options.INDEX_ON_THE_FLY); VariantContextWriter writer = builder.build(); VCFHeader header = new VCFHeader(); int i = 0; for (SAMSequenceRecord record : fasta.getSequenceDictionary().getSequences()) { Map<String, String> mapping = new TreeMap<String, String>(); mapping.put("ID", record.getSequenceName()); mapping.put("length", Integer.toString(record.getSequenceLength())); header.addMetaDataLine(new VCFContigHeaderLine(mapping, i++)); } header.addMetaDataLine(new VCFSimpleHeaderLine("ALT", "ERROR", "Error in conversion")); header.addMetaDataLine(new VCFFilterHeaderLine("PARSE_ERROR", "Problem in parsing original HGVS variant string, written out as variant at 1:g.1N>N")); header.addMetaDataLine(new VCFInfoHeaderLine("ERROR_MESSAGE", 1, VCFHeaderLineType.String, "Error message")); header.addMetaDataLine(new VCFInfoHeaderLine("ORIG_VAR", 1, VCFHeaderLineType.String, "Original HGVS variant string from input file to hgvs-to-vcf")); writer.writeHeader(header); return writer; }
} else { SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);