protected static List<List<KafkaPartition>> getMultiWorkUnitPartitions(MultiWorkUnit mwu) { List<List<KafkaPartition>> partitions = Lists.newArrayList(); for (WorkUnit workUnit : mwu.getWorkUnits()) { partitions.add(KafkaUtils.getPartitions(workUnit)); } return partitions; }
private static List<KafkaPartition> getPartitionsFromMultiWorkUnit(MultiWorkUnit multiWorkUnit) { List<KafkaPartition> partitions = Lists.newArrayList(); for (WorkUnit workUnit : multiWorkUnit.getWorkUnits()) { partitions.add(KafkaUtils.getPartition(workUnit)); } return partitions; }
protected static double getWorkUnitEstLoad(WorkUnit workUnit) { if (workUnit instanceof MultiWorkUnit) { MultiWorkUnit mwu = (MultiWorkUnit) workUnit; return Math.max(getWorkUnitEstSize(workUnit), EPS) * Math.log10(Math.max(mwu.getWorkUnits().size(), 2)); } return Math.max(getWorkUnitEstSize(workUnit), EPS) * Math.log10(2.0); }
/** Seek to the next available work unit, skipping all empty work units */ private void seekNext() { if (!needSeek) { return; } // First, iterate all if (this.currentIterator != null && this.currentIterator.hasNext()) { needSeek = false; return; } // Then, find the next available work unit nextWu = null; this.currentIterator = null; while (nextWu == null && workUnits.hasNext()) { nextWu = workUnits.next(); if (nextWu instanceof MultiWorkUnit) { this.currentIterator = ((MultiWorkUnit) nextWu).getWorkUnits().iterator(); if (!this.currentIterator.hasNext()) { nextWu = null; } } } needSeek = false; }
/** * Utility method that takes in a {@link List} of {@link WorkUnit}s, and flattens them. It builds up * the flattened list by checking each element of the given list, and seeing if it is an instance of * {@link MultiWorkUnit}. If it is then it calls itself on the {@link WorkUnit}s returned by * {@link MultiWorkUnit#getWorkUnits()}. If not, then it simply adds the {@link WorkUnit} to the * flattened list. * * @param workUnits is a {@link List} containing either {@link WorkUnit}s or {@link MultiWorkUnit}s * @return a {@link List} of flattened {@link WorkUnit}s */ public static List<WorkUnit> flattenWorkUnits(Collection<WorkUnit> workUnits) { List<WorkUnit> flattenedWorkUnits = Lists.newArrayList(); for (WorkUnit workUnit : workUnits) { if (workUnit instanceof MultiWorkUnit) { flattenedWorkUnits.addAll(flattenWorkUnits(((MultiWorkUnit) workUnit).getWorkUnits())); } else { flattenedWorkUnits.add(workUnit); } } return flattenedWorkUnits; }
@Nullable @Override public WorkUnit apply(WorkUnit input) { if (input instanceof MultiWorkUnit) { for (WorkUnit wu : ((MultiWorkUnit) input).getWorkUnits()) { forWorkUnit(wu); } } else { forWorkUnit(input); } return input; }
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { WorkUnit workUnit = (value.toString().endsWith(MULTI_WORK_UNIT_FILE_EXTENSION) ? MultiWorkUnit.createEmpty() : WorkUnit.createEmpty()); SerializationUtils.deserializeState(this.fs, new Path(value.toString()), workUnit); if (workUnit instanceof MultiWorkUnit) { List<WorkUnit> flattenedWorkUnits = JobLauncherUtils.flattenWorkUnits(((MultiWorkUnit) workUnit).getWorkUnits()); this.workUnits.addAll(flattenedWorkUnits); } else { this.workUnits.add(workUnit); } } }
private List<WorkUnit> getWorkUnits() throws IOException { String fileName = _workUnitFilePath.getName(); String storeName = _workUnitFilePath.getParent().getName(); WorkUnit workUnit; if (_workUnitFilePath.getName().endsWith(AbstractJobLauncher.MULTI_WORK_UNIT_FILE_EXTENSION)) { workUnit = _stateStores.getMwuStateStore().getAll(storeName, fileName).get(0); } else { workUnit = _stateStores.getWuStateStore().getAll(storeName, fileName).get(0); } // The list of individual WorkUnits (flattened) to run List<WorkUnit> workUnits = Lists.newArrayList(); if (workUnit instanceof MultiWorkUnit) { // Flatten the MultiWorkUnit so the job configuration properties can be added to each individual WorkUnits List<WorkUnit> flattenedWorkUnits = JobLauncherUtils.flattenWorkUnits(((MultiWorkUnit) workUnit).getWorkUnits()); workUnits.addAll(flattenedWorkUnits); } else { workUnits.add(workUnit); } return workUnits; }
@SuppressWarnings("deprecation") protected static WatermarkInterval getWatermarkIntervalFromMultiWorkUnit(MultiWorkUnit multiWorkUnit) { List<Long> lowWatermarkValues = Lists.newArrayList(); List<Long> expectedHighWatermarkValues = Lists.newArrayList(); for (WorkUnit workUnit : multiWorkUnit.getWorkUnits()) { lowWatermarkValues.add(workUnit.getLowWaterMark()); expectedHighWatermarkValues.add(workUnit.getHighWaterMark()); } return new WatermarkInterval(new MultiLongWatermark(lowWatermarkValues), new MultiLongWatermark(expectedHighWatermarkValues)); }
@Test public void testOverflows() throws Exception { // Test overflows List<WorkUnit> workUnitList = Lists.newArrayList( getWorkUnitWithWeight(Long.MAX_VALUE), getWorkUnitWithWeight(Long.MAX_VALUE), getWorkUnitWithWeight(10)); List<WorkUnit> multiWorkUnits = new WorstFitDecreasingBinPacking(100).pack(workUnitList, weighter); Assert.assertEquals(multiWorkUnits.size(), 3); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(0)).getWorkUnits().size(), 1); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(1)).getWorkUnits().size(), 1); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(2)).getWorkUnits().size(), 1); }
@Test public void testLargeWorkUnits() throws Exception { // Accept even large work units that don't fit in a single bucket List<WorkUnit> workUnitList = Lists.newArrayList( getWorkUnitWithWeight(10), getWorkUnitWithWeight(30)); List<WorkUnit> multiWorkUnits = new WorstFitDecreasingBinPacking(20).pack(workUnitList, weighter); Assert.assertEquals(multiWorkUnits.size(), 2); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(0)).getWorkUnits().size(), 1); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(1)).getWorkUnits().size(), 1); }
private void addTopicSpecificPropsToWorkUnit(WorkUnit workUnit, Map<String, State> topicSpecificStateMap) { if (workUnit instanceof MultiWorkUnit) { for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { addTopicSpecificPropsToWorkUnit(wu, topicSpecificStateMap); } } else if (!workUnit.contains(TOPIC_NAME)) { return; } else { addDatasetUrnOptionally(workUnit); if (topicSpecificStateMap == null) { return; } else if (!topicSpecificStateMap.containsKey(workUnit.getProp(TOPIC_NAME))) { return; } else { workUnit.addAll(topicSpecificStateMap.get(workUnit.getProp(TOPIC_NAME))); } } }
@Override public List<WorkUnit> pack(Map<String, List<WorkUnit>> workUnitsByTopic, int numContainers) { setWorkUnitEstSizes(workUnitsByTopic); List<WorkUnit> workUnits = Lists.newArrayList(); for (List<WorkUnit> workUnitsForTopic : workUnitsByTopic.values()) { // For each topic, merge all empty workunits into a single workunit, so that a single // empty task will be created instead of many. MultiWorkUnit zeroSizeWorkUnit = MultiWorkUnit.createEmpty(); for (WorkUnit workUnit : workUnitsForTopic) { if (DoubleMath.fuzzyEquals(getWorkUnitEstSize(workUnit), 0.0, EPS)) { addWorkUnitToMultiWorkUnit(workUnit, zeroSizeWorkUnit); } else { workUnit.setWatermarkInterval(getWatermarkIntervalFromWorkUnit(workUnit)); workUnits.add(workUnit); } } if (!zeroSizeWorkUnit.getWorkUnits().isEmpty()) { workUnits.add(squeezeMultiWorkUnit(zeroSizeWorkUnit)); } } return worstFitDecreasingBinPacking(workUnits, numContainers); } }
@Test public void testOneLargeUnitManySmallUnits() throws Exception { // Check that a large work unit doesn't prevent small work units from being packed together // (this was an issue in a previous implementation of the algorithm) List<WorkUnit> workUnitList = Lists.newArrayList( getWorkUnitWithWeight(10), getWorkUnitWithWeight(10), getWorkUnitWithWeight(10), getWorkUnitWithWeight(10000)); List<WorkUnit> multiWorkUnits = new WorstFitDecreasingBinPacking(50).pack(workUnitList, weighter); Assert.assertEquals(multiWorkUnits.size(), 2); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(0)).getWorkUnits().size(), 3); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(1)).getWorkUnits().size(), 1); }
@Override public boolean apply(WorkUnit workUnit) { if (workUnit instanceof MultiWorkUnit) { Preconditions.checkArgument(!workUnit.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot be skipped"); for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { Preconditions.checkArgument(!wu.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot contain skipped WorkUnit"); } } if (workUnit.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_SKIP_KEY, false)) { WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState); workUnitState.setWorkingState(WorkUnitState.WorkingState.SKIPPED); this.jobState.addSkippedTaskState(new TaskState(workUnitState)); return false; } return true; } }
@Test public void testBasicPacking() throws Exception { List<WorkUnit> workUnitList = Lists.newArrayList( getWorkUnitWithWeight(10), getWorkUnitWithWeight(10)); List<WorkUnit> multiWorkUnits = new WorstFitDecreasingBinPacking(20).pack(workUnitList, weighter); Assert.assertEquals(multiWorkUnits.size(), 1); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(0)).getWorkUnits().size(), 2); workUnitList = Lists.newArrayList( getWorkUnitWithWeight(10), getWorkUnitWithWeight(20)); multiWorkUnits = new WorstFitDecreasingBinPacking(20).pack(workUnitList, weighter); Assert.assertEquals(multiWorkUnits.size(), 2); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(0)).getWorkUnits().size(), 1); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(1)).getWorkUnits().size(), 1); workUnitList = Lists.newArrayList( getWorkUnitWithWeight(10), getWorkUnitWithWeight(10), getWorkUnitWithWeight(20)); multiWorkUnits = new WorstFitDecreasingBinPacking(20).pack(workUnitList, weighter); Assert.assertEquals(multiWorkUnits.size(), 2); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(0)).getWorkUnits().size(), 1); Assert.assertEquals(((MultiWorkUnit) multiWorkUnits.get(1)).getWorkUnits().size(), 2); }
/** * Test for {@link MultiWorkUnitWeightedQueue#MultiWorkUnitWeightedQueue()}. It adds a series of WorkUnits to an * instance of MultiWorkUnitWeightedQueue of checks the size of all the WorkUnits returned by * {@link MultiWorkUnitWeightedQueue#getQueueAsList()}. */ @Test public void testDefaultConstructor() { int numWorkUnits = 10; int weight = 1; MultiWorkUnitWeightedQueue multiWorkUnitWeightedQueue = new MultiWorkUnitWeightedQueue(); WorkUnit workUnit = WorkUnit.createEmpty(); for (int i = 0; i < numWorkUnits; i++) { multiWorkUnitWeightedQueue.addWorkUnit(workUnit, weight); } List<WorkUnit> multiWorkUnitWeightedQueueList = multiWorkUnitWeightedQueue.getQueueAsList(); Assert.assertEquals(multiWorkUnitWeightedQueueList.size(), numWorkUnits); MultiWorkUnit multiWorkUnit; for (WorkUnit workUnitElement : multiWorkUnitWeightedQueueList) { multiWorkUnit = (MultiWorkUnit) workUnitElement; Assert.assertEquals(multiWorkUnit.getWorkUnits().size(), 1); } }
copy.readFields(dis); List<WorkUnit> workUnitList = copy.getWorkUnits(); Assert.assertEquals(workUnitList.size(), 2);
/** * Test for {@link MultiWorkUnitWeightedQueue#MultiWorkUnitWeightedQueue(int)}. It sets a limit on the maximum number * of MultiWorkUnits that can be created, adds a series of WorkUnits to the list, and checks the results of * {@link MultiWorkUnitWeightedQueue#getQueueAsList()} to ensure each MultiWorkUnit created is of proper length. */ @Test public void testWithQueueSizeLimit() { int maxMultiWorkUnits = 10; int numWorkUnits = 100; int weight = 1; MultiWorkUnitWeightedQueue multiWorkUnitWeightedQueue = new MultiWorkUnitWeightedQueue(maxMultiWorkUnits); WorkUnit workUnit = WorkUnit.createEmpty(); for (int i = 0; i < numWorkUnits; i++) { multiWorkUnitWeightedQueue.addWorkUnit(workUnit, weight); } MultiWorkUnit multiWorkUnit; for (WorkUnit workUnitElement : multiWorkUnitWeightedQueue.getQueueAsList()) { multiWorkUnit = (MultiWorkUnit) workUnitElement; Assert.assertEquals(multiWorkUnit.getWorkUnits().size(), numWorkUnits / maxMultiWorkUnits); } } }
private void verifyWorkUnits(List<WorkUnit> workunits, int expectedSize) throws DataRecordException, IOException { for (int i = 0; i < expectedSize; i++) { WorkUnit workUnit = ((MultiWorkUnit) workunits.get(i)).getWorkUnits().get(0); WorkUnitState wuState = new WorkUnitState(workunits.get(i), new State()); wuState.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI); wuState.setProp(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL, workUnit.getProp(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL)); try (DatePartitionedAvroFileExtractor extractor = new DatePartitionedAvroFileExtractor(wuState);) { GenericRecord record = extractor.readRecord(null); Assert.assertEquals(recordTimestamps[i], record.get(PARTITION_COLUMN_NAME)); Assert.assertEquals(recordTimestamps[i], workUnit.getPropAsLong(ConfigurationKeys.WORK_UNIT_DATE_PARTITION_KEY)); } } }