@SuppressWarnings("deprecation") @Test @Ignore("not maintaining") public void testMapper2() throws IOException { mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cube_name); mapDriver.addInput(new Text("52010tech"), new Text("35.432")); List<Pair<RowKeyWritable, KeyValue>> result = mapDriver.run(); assertEquals(2, result.size()); byte[] bytes = { 0, 0, 0, 0, 0, 0, 0, 119, 33, 0, 22, 1, 0, 121, 7 }; ImmutableBytesWritable key = new ImmutableBytesWritable(bytes); Pair<RowKeyWritable, KeyValue> p1 = result.get(0); Pair<RowKeyWritable, KeyValue> p2 = result.get(1); assertEquals(key, p1.getFirst()); assertEquals("cf1", new String(p1.getSecond().getFamily(), StandardCharsets.UTF_8)); assertEquals("usd_amt", new String(p1.getSecond().getQualifier(), StandardCharsets.UTF_8)); assertEquals("35.43", new String(p1.getSecond().getValue(), StandardCharsets.UTF_8)); assertEquals(key, p2.getFirst()); assertEquals("cf1", new String(p2.getSecond().getFamily(), StandardCharsets.UTF_8)); assertEquals("item_count", new String(p2.getSecond().getQualifier(), StandardCharsets.UTF_8)); assertEquals("2", new String(p2.getSecond().getValue(), StandardCharsets.UTF_8)); } }
@Before public void setUp() { CubeHFileMapper mapper = new CubeHFileMapper(); mapDriver = MapDriver.newMapDriver(mapper); }
@SuppressWarnings("unchecked") @Test public void testMapperWithoutHeader() throws IOException { Text inputKey1 = new Text(new byte[] { 0, 0, 0, 0, 0, 0, 0, 127, 11, 56, -23, 0, 22, 98, 1, 0, 121, 7 }); Text inputKey2 = new Text(new byte[] { 0, 0, 0, 0, 0, 0, 0, 127, 11, 122, 1, 0, 22, 98, 1, 0, 121, 7 }); Text inputKey3 = new Text(new byte[] { 2, 2, 2, 2, 2, 2, 2, 127, 11, 56, -23, 0, 22, 98, 1, 0, 121, 7 }); Text inputKey4 = new Text(new byte[] { 3, 3, 3, 3, 3, 3, 3, 127, 11, 56, -23, 0, 22, 98, 1, 0, 121, 7 }); Text inputKey5 = new Text(new byte[] { 4, 4, 4, 4, 4, 4, 4, 127, 11, 56, -23, 0, 22, 98, 1, 0, 121, 7 }); Text inputKey6 = new Text(new byte[] { 5, 5, 5, 5, 5, 5, 5, 127, 11, 56, -23, 0, 22, 98, 1, 0, 121, 7 }); Text inputKey7 = new Text(new byte[] { 6, 6, 6, 6, 6, 6, 6, 127, 11, 56, -23, 0, 22, 98, 1, 0, 121, 7 }); mapDriver.addInput(inputKey1, new Text("abc")); mapDriver.addInput(inputKey2, new Text("abc")); mapDriver.addInput(inputKey3, new Text("abc")); mapDriver.addInput(inputKey4, new Text("abc")); mapDriver.addInput(inputKey5, new Text("abc")); mapDriver.addInput(inputKey6, new Text("abc")); mapDriver.addInput(inputKey7, new Text("abc")); List<Pair<Text, LongWritable>> result = mapDriver.run(); assertEquals(1, result.size()); byte[] key1 = result.get(0).getFirst().getBytes(); LongWritable value1 = result.get(0).getSecond(); assertArrayEquals(new byte[] { 6, 6, 6, 6, 6, 6, 6, 127, 11, 56, -23, 0, 22, 98, 1, 0, 121, 7 }, key1); assertEquals(147, value1.get()); }
@SuppressWarnings("unchecked") @Test public void testMapperOnComma() throws IOException { mapDriver.clearInput(); LongWritable inputKey1 = new LongWritable(1); LongWritable inputKey2 = new LongWritable(2); LongWritable inputKey3 = new LongWritable(3); LongWritable inputKey4 = new LongWritable(4); LongWritable inputKey5 = new LongWritable(5); LongWritable inputKey6 = new LongWritable(6); LongWritable inputKey7 = new LongWritable(7); mapDriver.addInput(inputKey1, new Text()); mapDriver.addInput(inputKey2, new Text(strArr)); mapDriver.addInput(inputKey3, new Text(strArr)); mapDriver.addInput(inputKey4, new Text(strArr)); mapDriver.addInput(inputKey5, new Text(strArr)); mapDriver.addInput(inputKey6, new Text(strArr)); mapDriver.addInput(inputKey7, new Text(strArr)); List<Pair<IntWritable, BytesWritable>> result = mapDriver.run(); assertEquals(9, result.size()); int key1 = result.get(0).getFirst().get(); BytesWritable value1 = result.get(0).getSecond(); byte[] bytes = value1.getBytes(); HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(); hllc.readRegisters(ByteBuffer.wrap(bytes)); System.out.println("ab\177ab".length()); assertTrue(key1 > 0); assertEquals(1, hllc.getCountEstimate()); }
@Test public void testHypercubeMapper() throws IOException { MapDriver<Writable, VectorWritable, IntWritable, CentroidWritable> mapDriver = MapDriver.newMapDriver(new StreamingKMeansMapper()); configure(mapDriver.getConfiguration()); System.out.printf("%s mapper test\n", mapDriver.getConfiguration().get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION)); for (Centroid datapoint : syntheticData.getFirst()) { mapDriver.addInput(new IntWritable(0), new VectorWritable(datapoint)); } List<org.apache.hadoop.mrunit.types.Pair<IntWritable,CentroidWritable>> results = mapDriver.run(); BruteSearch resultSearcher = new BruteSearch(new SquaredEuclideanDistanceMeasure()); for (org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable> result : results) { resultSearcher.add(result.getSecond().getCentroid()); } System.out.printf("Clustered the data into %d clusters\n", results.size()); for (Vector mean : syntheticData.getSecond()) { WeightedThing<Vector> closest = resultSearcher.search(mean, 1).get(0); assertTrue("Weight " + closest.getWeight() + " not less than 0.5", closest.getWeight() < 0.5); } }
@Test public void testMapperWithHeader() throws Exception { String cubeName = "test_kylin_cube_with_slr_1_new_segment"; String segmentName = "20130331080000_20131212080000"; mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL, // metadata); mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.33")); List<Pair<Text, Text>> result = mapDriver.run(); CubeManager cubeMgr = CubeManager.getInstance(getTestConfig()); CubeInstance cube = cubeMgr.getCube(cubeName); assertEquals(1, result.size()); Text rowkey = result.get(0).getFirst(); byte[] key = rowkey.getBytes(); byte[] header = Bytes.head(key, 26); byte[] sellerId = Bytes.tail(header, 18); byte[] cuboidId = Bytes.head(header, 8); byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26); RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment()); decoder.decode(key); assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString()); assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertEquals(22, restKey.length); verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33"); }
public List<Pair<K3, V3>> run() throws IOException { List<Pair<K2, V2>> mapOutputs = new ArrayList<Pair<K2, V2>>(); // run map component for (Pair<K1, V1> input : inputList) { LOG.debug("Mapping input " + input.toString() + ")"); mapOutputs.addAll(new MapDriver<K1, V1, K2, V2>(myMapper).withInput( input).withCounters(getCounters()).withConfiguration(configuration).run()); } List<Pair<K2, List<V2>>> reduceInputs = shuffle(mapOutputs); List<Pair<K3, V3>> reduceOutputs = new ArrayList<Pair<K3, V3>>(); for (Pair<K2, List<V2>> input : reduceInputs) { K2 inputKey = input.getFirst(); List<V2> inputValues = input.getSecond(); StringBuilder sb = new StringBuilder(); formatValueList(inputValues, sb); LOG.debug("Reducing input (" + inputKey.toString() + ", " + sb.toString() + ")"); reduceOutputs.addAll(new ReduceDriver<K2, V2, K3, V3>(myReducer) .withCounters(getCounters()).withConfiguration(configuration) .withInputKey(inputKey).withInputValues(inputValues).run()); } return reduceOutputs; }
MapDriver mapDriver = MapDriver.newMapDriver(mapper); mapDriver.setCounters(counters); mapDriver.setConfiguration(getConfiguration()); mapDriver.addAll(inputs.get(mapper)); mapDriver.withMapInputPath(getMapInputPath(mapper)); outputs.addAll(mapDriver.run());
@Override public List<Pair<K3, V3>> run() throws IOException { try { preRunChecks(myMapper, myReducer); initDistributedCache(); List<Pair<K2, V2>> mapOutputs = new ArrayList<Pair<K2, V2>>(); // run map component LOG.debug("Starting map phase with mapper: " + myMapper); mapOutputs.addAll(MapDriver.newMapDriver(myMapper) .withCounters(getCounters()).withConfiguration(getConfiguration()) .withAll(inputList).withMapInputPath(getMapInputPath()).run()); if (myCombiner != null) { // User has specified a combiner. Run this and replace the mapper // outputs // with the result of the combiner. LOG.debug("Starting combine phase with combiner: " + myCombiner); mapOutputs = new ReducePhaseRunner<K2, V2, K2, V2>(inputFormatClass, getConfiguration(), counters, getOutputSerializationConfiguration(), outputFormatClass) .runReduce(sortAndGroup(mapOutputs), myCombiner); } // Run the reduce phase. LOG.debug("Starting reduce phase with reducer: " + myReducer); return new ReducePhaseRunner<K2, V2, K3, V3>(inputFormatClass, getConfiguration(), counters, getOutputSerializationConfiguration(), outputFormatClass).runReduce(sortAndGroup(mapOutputs), myReducer); } finally { cleanupDistributedCache(); } }
/** * Generates data for use in tests * * @param driver * Driver * @param num * Number of tuples to generate */ protected void generateData(MapDriver<LongWritable, T, NodeWritable, T> driver, int num) { for (int i = 0; i < num; i++) { LongWritable inputKey = new LongWritable(i); T value = this.createValue(i); NodeWritable outputKey = this.getOutputKey(value); driver.addInput(inputKey, value); driver.addOutput(outputKey, value); } }
@Test public void shouldHandleNullRawData() throws IOException { // some RecordReaders may null out raw data entirely because they pass data to their // handlers in other ways. Verify that the EventMapper can handle this case. record.setRawData(null); driver.setInput(new LongWritable(1), record); driver.run(); Multimap<BulkIngestKey,Value> written = TestContextWriter.getWritten(); // two fields mutations + LOAD_DATE + ORIG_FILE + RAW_FILE assertEquals(5, written.size()); }
@Before public void setUp() { long eventTime = System.currentTimeMillis(); EventMapper<LongWritable,RawRecordContainer,BulkIngestKey,Value> mapper = new EventMapper<>(); driver = new MapDriver<>(mapper); conf = driver.getConfiguration(); conf.setClass(EventMapper.CONTEXT_WRITER_CLASS, TestContextWriter.class, ContextWriter.class); Type type = new Type("file", null, null, new String[] {SimpleDataTypeHandler.class.getName()}, 10, null); TypeRegistry registry = TypeRegistry.getInstance(conf); registry.put(type.typeName(), type); Multimap<String,NormalizedContentInterface> fields = HashMultimap.create(); fields.put("fileExtension", new BaseNormalizedContent("fileExtension", "gz")); fields.put("lastModified", new BaseNormalizedContent("lastModified", "2016-01-01")); SimpleDataTypeHelper.registerFields(fields); record = new SimpleRawRecord(); record.setRawFileTimestamp(eventTime); record.setDataType(type); record.setDate(eventTime); record.setRawFileName("/some/filename"); record.setRawData("some data".getBytes()); record.generateId(null); }
/** * Tests quads to triples conversion * * @throws IOException */ @Test public void triples_to_quads_mapper_01() throws IOException { MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> driver = this.getMapDriver(); Triple t = new Triple(NodeFactory.createURI("http://s"), NodeFactory.createURI("http://p"), NodeFactory.createLiteral("test")); Quad q = new Quad(Quad.defaultGraphNodeGenerated, t); driver.withInput(new Pair<LongWritable, TripleWritable>(new LongWritable(1), new TripleWritable(t))).withOutput( new Pair<LongWritable, QuadWritable>(new LongWritable(1), new QuadWritable(q))); driver.runTest(); }
/** * Returns a new MapDriver without having to specify the generic types on the * right hand side of the object create statement. * * @return new MapDriver */ public static <K1, V1, K2, V2> MapDriver<K1, V1, K2, V2> newMapDriver() { return new MapDriver<K1, V1, K2, V2>(); }
private MockMapContextWrapper<K1, V1, K2, V2> getContextWrapper() { if(wrapper == null) { wrapper = new MockMapContextWrapper<K1, V1, K2, V2>(getConfiguration(), inputs, mockOutputCreator, this); } return wrapper; }
/** Sets the counters to use and returns self for fluent style */ public MapDriver<K1, V1, K2, V2> withCounters(final Counters ctrs) { setCounters(ctrs); return this; }
/** * Works like addOutput(), but returns self for fluent style * * @param outputRecord * @return this */ public MapDriver<K1, V1, K2, V2> withOutput(Pair<K2, V2> outputRecord) { addOutput(outputRecord); return this; }
/** * @param configuration The configuration object that will given to the mapper * associated with the driver * @return this object for fluent coding */ public MapDriver<K1, V1, K2, V2> withConfiguration(Configuration configuration) { setConfiguration(configuration); return this; } }
MapDriver.newMapDriver(new StreamingKMeansMapper()); Configuration configuration = mapDriver.getConfiguration(); configure(configuration); System.out.printf("%s mapper vs local test\n", mapDriver.getConfiguration().get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION)); mapDriver.addInput(new IntWritable(0), new VectorWritable(datapoint)); for (org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable> pair : mapDriver.run()) { mapperCentroids.add(pair.getSecond().getCentroid()); mapDriver.getConfiguration().getInt("estimatedNumMapClusters", -1), DISTANCE_CUTOFF); batchClusterer.cluster(syntheticData.getFirst()); List<Centroid> batchCentroids = Lists.newArrayList();
String cubeName = "test_kylin_cube_with_slr_1_new_segment"; String segmentName = "20130331080000_20131212080000"; mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N")); List<Pair<Text, Text>> result = mapDriver.run();