private Integer getIntValueOfChromosomeOrZero(String contig) { return referenceDictionary.getContigNameToID().getOrDefault(contig, UNKNOWN_CHROMOSOME); }
private GenomeVariant parseGenomeChange(String changeStr) throws JannovarException { Pattern pat = Pattern.compile("(chr[0-9MXY]+):([0-9]+)([ACGTN]*)>([ACGTN]*)"); Matcher match = pat.matcher(changeStr); if (!match.matches()) { System.err.println("[ERROR] Input string for the chromosomal change " + changeStr + " does not fit the regular expression ... :("); System.exit(3); } int chr = refDict.getContigNameToID().get(match.group(1)); int pos = Integer.parseInt(match.group(2)); String ref = match.group(3); String alt = match.group(4); return new GenomeVariant(new GenomePosition(refDict, Strand.FWD, chr, pos, PositionType.ONE_BASED), ref, alt); }
/** * Returns a new GeneticInterval from the parsed string. Strings are to be * of the format: <li>chr1:123-456 chrY:1234-1220 chr19:345-567</li> * * @param refDict * @param interval * @return */ public static GeneticInterval parseString(ReferenceDictionary refDict, String interval) { String intervalPattern = "chr(1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|X|Y|M):[0-9]+-[0-9]+"; if (!Pattern.matches(intervalPattern, interval)) { throw new IllegalArgumentException(String.format("Genetic interval %s does not match expected pattern %s", interval, intervalPattern)); } String[] intervalSections = interval.split(":"); int localChr = refDict.getContigNameToID().get(intervalSections[0]); String positions = intervalSections[1]; String[] startEnd = positions.split("-"); int localStart = Integer.parseInt(startEnd[0]); int localEnd = Integer.parseInt(startEnd[1]); return new GeneticInterval(localChr, localStart, localEnd); }
private GenomeVariant parseGenomeChange(String changeStr) throws JannovarException { Pattern pat = Pattern.compile("(chr[0-9MXY]+):([0-9]+)([ACGTN]*)>([ACGTN]*)"); Matcher match = pat.matcher(changeStr); if (!match.matches()) throw new JannovarException("[ERROR] Input string for the chromosomal change " + changeStr + " does not fit the regular expression ... :("); int chr = refDict.getContigNameToID().get(match.group(1)); int pos = Integer.parseInt(match.group(2)); String ref = match.group(3); String alt = match.group(4); return new GenomeVariant(new GenomePosition(refDict, Strand.FWD, chr, pos, PositionType.ONE_BASED), ref, alt); }
private static Function<String, ChromosomalRegion> toChromosomalRegion() { return line -> { String[] tokens = line.split("\t"); if (tokens.length < 3) { throw new BedFileParseException("BED file requires at least 3 columns invalid line: '" + line + "'"); } if (tokens.length > 3) { logger.warn("Line contains more than 3 columns - ignoring optional columns 4+. Therefore STRAND will all be +"); } int chr = referenceDictionary.getContigNameToID().get(tokens[0]); //BED format is 0-based - we use 1-based in the exomiser. int start = Integer.parseInt(tokens[1]) + 1; int end = Integer.parseInt(tokens[2]); return new GeneticInterval(chr, start, end); }; }
/** Map contig name (from genome variant) to contig name in FASTA */ private String mapContigToFasta(String contigName) { // Map genome variant's contig to unique ID Integer contigID = jannovarData.getRefDict().getContigNameToID().get(contigName); if (contigID == null) throw new UncheckedJannovarException("Unknown contig name " + contigName); // Try to find matching contig in fasta String nameInFasta = null; for (SAMSequenceRecord record : indexedFile.getSequenceDictionary().getSequences()) { if (jannovarData.getRefDict().getContigNameToID().containsKey(record.getSequenceName())) { String contigInFasta = record.getSequenceName(); if (jannovarData.getRefDict().getContigNameToID().get(contigInFasta) == contigID) { nameInFasta = contigInFasta; break; } } } if (nameInFasta == null) throw new UncheckedJannovarException("Could not find corresponding contig in FASTA for " + contigName); return nameInFasta; }
final long startTime = System.nanoTime(); if (data == null || data.getRefDict().getContigNameToID().isEmpty()) throw new SerializationException("Attempting to serialize empty data set");
int chr = refDict.getContigNameToID().get(arr[0]); int beginPos = Integer.parseInt(arr[1]); int endPos = Integer.parseInt(arr[2]);
/** Map contig name (from genome variant) to contig name in FASTA */ private String mapContigToFasta(String contigName) { // Map genome variant's contig to unique ID Integer contigID = jannovarData.getRefDict().getContigNameToID().get(contigName); if (contigID == null) throw new UncheckedJannovarException("Unknown contig name " + contigName); // Try to find matching contig in fasta String nameInFasta = null; for (SAMSequenceRecord record : fasta.getSequenceDictionary().getSequences()) { if (jannovarData.getRefDict().getContigNameToID().containsKey(record.getSequenceName())) { nameInFasta = record.getSequenceName(); break; } } if (nameInFasta == null) throw new UncheckedJannovarException("Could not find corresponding contig in FASTA for " + contigName); return nameInFasta; }
boolean wrongContig = false; for (FeatureRecord record : featureRecords) { ImmutableMap<String, Integer> dict = refDict.getContigNameToID(); final String seqID = record.getSeqID(); if (!dict.containsKey(seqID)) { } else if (record.getType().equals("CDS")) { GenomeInterval cds = new GenomeInterval(refDict, Strand.FWD, refDict.getContigNameToID().get(record.getSeqID()), record.getBegin(), record.getEnd()); cds = cds.withStrand(strand); if (cdsRegion == null)
Optional<Integer> contigID = Optional.ofNullable(refDict.getContigNameToID().get(vc.getContig())); Optional<IntervalArray<Gene>> iTree = contigID.map(x -> geneList.getGeneIntervalTree().get(x));
private static JannovarProto.ReferenceDictionary toProtoReferenceDictionary(ReferenceDictionary referenceDictionary) { return JannovarProto.ReferenceDictionary.newBuilder() .putAllContigNameToId(referenceDictionary.getContigNameToID()) .putAllContigIdToLength(referenceDictionary.getContigIDToLength()) .putAllContigIdToName(referenceDictionary.getContigIDToName()) .build(); }
boolean wrongContig = false; for (FeatureRecord record : featureRecords) { ImmutableMap<String, Integer> dict = refDict.getContigNameToID(); final String seqID = record.getSeqID(); if (!dict.containsKey(seqID)) { } else if ("CDS".equals(record.getType()) || "stop_codon".equals(record.getType())) { GenomeInterval cds = new GenomeInterval(refDict, Strand.FWD, refDict.getContigNameToID().get(record.getSeqID()), record.getBegin(), record.getEnd()); cds = cds.withStrand(strand); if (cdsRegion == null)
Integer chrID = refDict.getContigNameToID().get(A[1]); if (chrID == null) // scaffolds such as chrUn_gl000243 cause Exception
/** * Build a {@link GenomeVariant} from a {@link VariantContext} object. * * In the case of exceptions, you can use {@link #buildErrorAnnotations} to build an {@link VariantAnnotations} with * an error message. * * @param vc * {@link VariantContext} describing the variant * @param alleleID * numeric identifier of the allele * @return {@link GenomeVariant} corresponding to <code>vc</code>, guaranteed to be on {@link Strand#FWD}. * @throws InvalidCoordinatesException * in the case that the reference in <code>vc</code> is not known in {@link #refDict}. */ public GenomeVariant buildGenomeVariant(VariantContext vc, int alleleID) throws InvalidCoordinatesException { // Catch the case that vc.getChr() is not in ChromosomeMap.identifier2chromosom. This is the case // for the "random" and "alternative locus" contigs etc. Integer boxedInt = refDict.getContigNameToID().get(vc.getContig()); if (boxedInt == null) throw new InvalidCoordinatesException("Unknown reference " + vc.getContig(), AnnotationMessage.ERROR_CHROMOSOME_NOT_FOUND); int chr = boxedInt.intValue(); // Build the GenomeChange object. final String ref = vc.getReference().getBaseString(); final Allele altAllele = vc.getAlternateAllele(alleleID); final String alt = altAllele.getBaseString(); final int pos = vc.getStart(); return new GenomeVariant(new GenomePosition(refDict, Strand.FWD, chr, pos, PositionType.ONE_BASED), ref, alt); }
/** * @return <code>true</code> if <code>gene</code> is affected by <code>variantContext</code> */ private boolean isGeneAffectedByChange(Gene gene, VariantContext vc) { final ReferenceDictionary refDict = jannovarData.getRefDict(); final int contigID = refDict.getContigNameToID().get(vc.getContig()); final GenomeInterval changeInterval = new GenomeInterval(refDict, Strand.FWD, contigID, vc.getStart() - 1, vc.getEnd()); if (changeInterval.length() == 0 && gene.getRegion().contains(changeInterval.getGenomeBeginPos()) && gene.getRegion().contains(changeInterval.getGenomeBeginPos().shifted(-1))) return false; else if (changeInterval.length() != 0 && gene.getRegion().overlapsWith(changeInterval)) return true; else return false; }