/** * Combines the recalibration data for table1 and table2 into table1 * * Note that table1 is the destination, so it is modified * * @param table1 the destination table to merge table2 into * @param table2 the source table to merge into table1 */ public static void combineTables(final NestedIntegerArray<RecalDatum> table1, final NestedIntegerArray<RecalDatum> table2) { if ( table1 == null ) throw new IllegalArgumentException("table1 cannot be null"); if ( table2 == null ) throw new IllegalArgumentException("table2 cannot be null"); if ( ! Arrays.equals(table1.getDimensions(), table2.getDimensions())) throw new IllegalArgumentException("Table1 " + Utils.join(",", table1.getDimensions()) + " not equal to " + Utils.join(",", table2.getDimensions())); for (final NestedIntegerArray.Leaf<RecalDatum> row : table2.getAllLeaves()) { final RecalDatum myDatum = table1.get(row.keys); if (myDatum == null) table1.put(row.value, row.keys); else myDatum.combine(row.value); } }
public NestedIntegerArray<RecalDatum> makeTable(final List<Row> rows) { final NestedIntegerArray<RecalDatum> x = new NestedIntegerArray<RecalDatum>(3, 3); for ( final Row r : rows ) x.put(new RecalDatum((long)r.no, (double)r.ne, (byte)10), r.rg, r.qual); return x; } }
/** * @return true if all the tables contain no RecalDatums */ public boolean isEmpty() { for( final NestedIntegerArray<RecalDatum> table : tables ) { if( !table.getAllValues().isEmpty() ) { return false; } } return true; }
private void fillAllLeaves(final Object[] array, final int[] path, final List<Leaf<T>> result) { for ( int key = 0; key < array.length; key++ ) { final Object value = array[key]; if ( value == null ) continue; final int[] newPath = appendToPath(path, key); if ( value instanceof Object[] ) { fillAllLeaves((Object[]) value, newPath, result); } else { result.add(new Leaf<T>(newPath, (T)value)); } } }
/** * Updates the current RecalDatum element in the delta table. * * If it doesn't have an element yet, it creates an RecalDatum element and adds it to the delta table. * * @param deltaTable the delta table * @param deltaKey the key to the table * @param recalDatum the recal datum to combine with the accuracyDatum element in the table */ private static void addToDeltaTable(final NestedIntegerArray<RecalDatum> deltaTable, final int[] deltaKey, final RecalDatum recalDatum) { final RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key if (deltaDatum == null) // if we don't have a key yet, create a new one with the same values as the current datum deltaTable.put(new RecalDatum(recalDatum), deltaKey); else // if we do have a datum, combine it with this one deltaDatum.combine(recalDatum); }
/** * Finalize, if appropriate, all derived data in recalibrationTables. * * Called once after all calls to updateDataForRead have been issued. * * Assumes that all of the principal tables (by quality score) have been completely updated, * and walks over this data to create summary data tables like by read group table. */ public void finalizeData() { if ( finalized ) throw new IllegalStateException("FinalizeData() has already been called"); // merge all of the thread-local tables finalRecalibrationTables = mergeThreadLocalRecalibrationTables(); final NestedIntegerArray<RecalDatum> byReadGroupTable = finalRecalibrationTables.getReadGroupTable(); final NestedIntegerArray<RecalDatum> byQualTable = finalRecalibrationTables.getQualityScoreTable(); // iterate over all values in the qual table for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) { final int rgKey = leaf.keys[0]; final int eventIndex = leaf.keys[2]; final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex); final RecalDatum qualDatum = leaf.value; if ( rgDatum == null ) { // create a copy of qualDatum, and initialize byReadGroup table with it byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex); } else { // combine the qual datum with the existing datum in the byReadGroup table rgDatum.combine(qualDatum); } } finalized = true; }
@Test(dataProvider = "CombineTablesProvider") public void testCombineTables(final List<Row> table1, final List<Row> table2) { final NestedIntegerArray<RecalDatum> nia1 = makeTable(table1); final NestedIntegerArray<RecalDatum> nia2 = makeTable(table2); final List<Row> expectedRows = makeExpected(table1, table2); final NestedIntegerArray<RecalDatum> expected = makeTable(expectedRows); RecalUtils.combineTables(nia1, nia2); Assert.assertEquals(nia1.getDimensions(), expected.getDimensions()); Assert.assertEquals(nia1.getAllValues().size(), expected.getAllValues().size()); for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : expected.getAllLeaves() ) { final RecalDatum actual = nia1.get(leaf.keys); Assert.assertEquals(actual.getNumMismatches(), leaf.value.getNumMismatches()); Assert.assertEquals(actual.getNumObservations(), leaf.value.getNumObservations()); } }
@Test public void testCombineEmptyOther() { final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups); merged.combine(tables); for ( int i = 0; i < tables.numTables(); i++ ) { NestedIntegerArray<RecalDatum> table = tables.getTable(i); NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i); Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size()); for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) { final RecalDatum mergedValue = mergedTable.get(leaf.keys); Assert.assertNotNull(mergedValue); Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations()); Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches()); } } }
@Override public boolean put( final T value, final int... keys ) { StringBuilder logEntry = new StringBuilder(); logEntry.append(logEntryLabel); logEntry.append("\t"); logEntry.append(NestedIntegerArrayOperation.PUT); logEntry.append("\t"); logEntry.append(value); for ( int key : keys ) { logEntry.append("\t"); logEntry.append(key); } // PrintStream methods all use synchronized blocks internally, so our logging is thread-safe log.println(logEntry.toString()); return super.put(value, keys); } }
@Override public T get( final int... keys ) { StringBuilder logEntry = new StringBuilder(); logEntry.append(logEntryLabel); logEntry.append("\t"); logEntry.append(NestedIntegerArrayOperation.GET); logEntry.append("\t"); // empty field for the datum value for ( int key : keys ) { logEntry.append("\t"); logEntry.append(key); } log.println(logEntry.toString()); return super.get(keys); }
@Requires("recalibrationTables != null && numCovariates > 0") @Ensures("result != null") private static NestedIntegerArray<RecalDatum> createDeltaTable(final RecalibrationTables recalibrationTables, final int numCovariates) { final int[] dimensionsForDeltaTable = new int[4]; // initialize the dimensions with those of the qual table to start with final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable(); final int[] dimensionsOfQualTable = qualTable.getDimensions(); dimensionsForDeltaTable[0] = dimensionsOfQualTable[0]; // num read groups dimensionsForDeltaTable[1] = numCovariates + 1; // num covariates dimensionsForDeltaTable[2] = dimensionsOfQualTable[1]; dimensionsForDeltaTable[3] = dimensionsOfQualTable[2]; // now, update the dimensions based on the optional covariate tables as needed for ( int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < numCovariates; i++ ) { final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(i); final int[] dimensionsOfCovTable = covTable.getDimensions(); dimensionsForDeltaTable[2] = Math.max(dimensionsForDeltaTable[2], dimensionsOfCovTable[2]); dimensionsForDeltaTable[3] = Math.max(dimensionsForDeltaTable[3], dimensionsOfCovTable[3]); } return new NestedIntegerArray<RecalDatum>(dimensionsForDeltaTable); }
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table final int[] newCovs = new int[4]; newCovs[0] = leaf.keys[0]; for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) { final int[] covs = new int[4]; covs[0] = leaf.keys[0]; for (final NestedIntegerArray.Leaf<RecalDatum> leaf : deltaTable.getAllLeaves()) { final List<Object> deltaKeys = generateValuesFromKeys(leaf.keys, requestedCovariates, covariateNameMap); final RecalDatum deltaDatum = leaf.value;
/** * Allocate a new quality score table, based on requested parameters * in this set of tables, without any data in it. The return result * of this table is suitable for acting as a thread-local cache * for quality score values * @return a newly allocated, empty read group x quality score table */ public NestedIntegerArray<RecalDatum> makeQualityScoreTable() { return log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, eventDimension) : new LoggingNestedIntegerArray<RecalDatum>(log, "QUALITY_SCORE_TABLE", numReadGroups, qualDimension, eventDimension); }
private void testDimensions(final NestedIntegerArray<RecalDatum> table, final int ... dimensions) { final int[] dim = new int[dimensions.length+1]; System.arraycopy(dimensions, 0, dim, 0, dimensions.length); dim[dimensions.length] = EventType.values().length; Assert.assertEquals(table.getDimensions().length, dim.length); for ( int i = 0; i < dim.length; i++ ) { Assert.assertEquals(table.getDimensions()[i], dim[i], "Table dimensions not expected at dim " + i); } }
@Test public void testCombine1() { final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups); fillTable(merged); merged.combine(tables); for ( int i = 0; i < tables.numTables(); i++ ) { NestedIntegerArray<RecalDatum> table = tables.getTable(i); NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i); Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size()); for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) { final RecalDatum mergedValue = mergedTable.get(leaf.keys); Assert.assertNotNull(mergedValue); Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() * 2); Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() * 2); } } }
/** * Increments the RecalDatum at the specified position in the specified table, or put a new item there * if there isn't already one. * * Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put() * to return false if another thread inserts a new item at our position in the middle of our put operation. * * @param table the table that holds/will hold our item * @param qual qual for this event * @param isError error value for this event * @param keys location in table of our item */ public static void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final double isError, final int... keys ) { final RecalDatum existingDatum = table.get(keys); if ( existingDatum == null ) { // No existing item, try to put a new one if ( ! table.put(createDatumObject(qual, isError), keys) ) { // Failed to put a new item because another thread came along and put an item here first. // Get the newly-put item and increment it (item is guaranteed to exist at this point) table.get(keys).increment(1L, isError); } } else { // Easy case: already an item here, so increment it existingDatum.increment(1L, isError); } }
/** * Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table * * @param reportTable the GATKReport table containing data for this table * @param rgTable the map representing this table */ private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> rgTable) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempRGarray[0] = requestedCovariates[0].keyFromValue(rg); final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempRGarray[1] = event.ordinal(); rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray); } }
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal()); final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(keySet[0], keySet[1], errorModel.ordinal()); final List<RecalDatum> empiricalQualCovs = new ArrayList<RecalDatum>(); for (int i = 2; i < requestedCovariates.length; i++) { continue; empiricalQualCovs.add(recalibrationTables.getTable(i).get(keySet[0], keySet[1], keySet[i], errorModel.ordinal()));
for (final NestedIntegerArray.Leaf row : table.getAllLeaves()) { final RecalDatum datum = (RecalDatum)row.value; final int[] keys = row.keys;
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups, final PrintStream log) { tables = new ArrayList<NestedIntegerArray<RecalDatum>>(covariates.length); for ( int i = 0; i < covariates.length; i++ ) tables.add(i, null); // initialize so we can set below qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.ordinal()].maximumKeyValue() + 1; this.numReadGroups = numReadGroups; this.log = log; tables.set(TableType.READ_GROUP_TABLE.ordinal(), log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, eventDimension) : new LoggingNestedIntegerArray<RecalDatum>(log, "READ_GROUP_TABLE", numReadGroups, eventDimension)); tables.set(TableType.QUALITY_SCORE_TABLE.ordinal(), makeQualityScoreTable()); for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < covariates.length; i++) tables.set(i, log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) : new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + 1), numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension)); }