/** * Converts a byte array to a long value. Reverses * {@link #toBytes(long)} * * @param bytes array * @return the long value */ public static long toLong(byte[] bytes) { return toLong(bytes, 0, SIZEOF_LONG); }
/** * Converts a byte array to a long value. Assumes there will be * {@link #SIZEOF_LONG} bytes available. * * @param bytes bytes * @param offset offset * @return the long value */ public static long toLong(byte[] bytes, int offset) { return toLong(bytes, offset, SIZEOF_LONG); }
public long parseCuboid(byte[] bytes) { int offset = enableSharding ? RowConstants.ROWKEY_SHARDID_LEN : 0; return Bytes.toLong(bytes, offset, RowConstants.ROWKEY_CUBOIDID_LEN); } /**
/** * @param bytes byte array * @param offset offset where double is * @return Return double made from passed bytes. */ public static double toDouble(final byte[] bytes, final int offset) { return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG)); }
private long getTimestamp(Result r) { if (r == null || r.getValue(B_FAMILY, B_COLUMN_TS) == null) { return 0; } else { return Bytes.toLong(r.getValue(B_FAMILY, B_COLUMN_TS)); } }
@Override public int getPartition(Text key, Text value, int numReduceTasks) { Long cuboidId = Bytes.toLong(key.getBytes()); int shard = cuboidId.hashCode() % hllShardBase; if (shard < 0) { shard += hllShardBase; } return numReduceTasks - shard - 1; }
public static Cuboid findById(CuboidScheduler cuboidScheduler, byte[] cuboidID) { return findById(cuboidScheduler, Bytes.toLong(cuboidID)); }
@Override public int getPartition(SelfDefineSortableKey skey, Text value, int numReduceTasks) { Text key = skey.getText(); if (key.getBytes()[0] == FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER) { Long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG); return reducerMapping.getReducerIdForCuboidRowCount(cuboidId); } else { return BytesUtil.readUnsigned(key.getBytes(), 0, 1); } }
private void putRowKeyToHLLNew(String[] row) { //generate hash for each row key column for (int i = 0; i < nRowKey; i++) { Hasher hc = hf.newHasher(); String colValue = row[rowkeyColIndex[i]]; if (colValue == null) colValue = "0"; byte[] bytes = hc.putString(colValue).hash().asBytes(); rowHashCodesLong[i] = (Bytes.toLong(bytes) + i);//add column ordinal to the hash value to distinguish between (a,b) and (b,a) } // user the row key column hash to get a consolidated hash for each cuboid for (int i = 0, n = cuboidsBitSet.length; i < n; i++) { long value = 0; for (int position = 0; position < cuboidsBitSet[i].length; position++) { value += rowHashCodesLong[cuboidsBitSet[i][position]]; } cuboidsHLL[i].addHashDirectly(value); } }
private void putRowKeyToHLLNew(String[] row) { //generate hash for each row key column for (int i = 0; i < nRowKey; i++) { Hasher hc = hf.newHasher(); String colValue = row[rowkeyColIndex[i]]; if (colValue == null) colValue = "0"; byte[] bytes = hc.putString(colValue).hash().asBytes(); rowHashCodesLong[i] = (Bytes.toLong(bytes) + i);//add column ordinal to the hash value to distinguish between (a,b) and (b,a) } // user the row key column hash to get a consolidated hash for each cuboid for (int i = 0, n = cuboidsBitSet.length; i < n; i++) { long value = 0; for (int position = 0; position < cuboidsBitSet[i].length; position++) { value += rowHashCodesLong[cuboidsBitSet[i][position]]; } cuboidsHLL[i].addHashDirectly(value); } }
private void putRowKeyToHLLNew(String[] row) { //generate hash for each row key column for (int i = 0; i < nRowKey; i++) { Hasher hc = hf.newHasher(); String colValue = row[i]; if (colValue == null) colValue = "0"; byte[] bytes = hc.putString(colValue).hash().asBytes(); rowHashCodesLong[i] = (Bytes.toLong(bytes) + i);//add column ordinal to the hash value to distinguish between (a,b) and (b,a) } // user the row key column hash to get a consolidated hash for each cuboid for (int i = 0, n = allCuboidsBitSet.length; i < n; i++) { long value = 0; for (int position = 0; position < allCuboidsBitSet[i].length; position++) { value += rowHashCodesLong[allCuboidsBitSet[i][position]]; } allCuboidsHLL[i].addHashDirectly(value); } }
@Override public void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { long cuboidId = Bytes.toLong(key.getBytes()); logger.info("Cuboid id to be processed: " + cuboidId); for (Text value : values) { HLLCounter hll = new HLLCounter(cubeConfig.getCubeStatsHLLPrecision()); ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, value.getLength()); hll.readRegisters(bf); if (cuboidId == baseCuboidId) { baseCuboidRowCountInMappers.add(hll.getCountEstimate()); } totalRowsBeforeMerge += hll.getCountEstimate(); if (cuboidHLLMap.get(cuboidId) != null) { cuboidHLLMap.get(cuboidId).merge(hll); } else { cuboidHLLMap.put(cuboidId, hll); } } }
@Override public int getPartition(Object o) { if (initialized == false) { synchronized (SparkFactDistinct.class) { if (initialized == false) { init(); } } } SelfDefineSortableKey skey = (SelfDefineSortableKey) o; Text key = skey.getText(); if (key.getBytes()[0] == FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER) { Long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG); return reducerMapping.getReducerIdForCuboidRowCount(cuboidId); } else { return BytesUtil.readUnsigned(key.getBytes(), 0, 1); } } }
public CubeStatsResult(Path path, int precision) throws IOException { Configuration hadoopConf = HadoopUtil.getCurrentConfiguration(); Option seqInput = SequenceFile.Reader.file(path); try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) { LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf); while (reader.next(key, value)) { if (key.get() == 0L) { percentage = Bytes.toInt(value.getBytes()); } else if (key.get() == -1) { mapperOverlapRatio = Bytes.toDouble(value.getBytes()); } else if (key.get() == -2) { mapperNumber = Bytes.toInt(value.getBytes()); } else if (key.get() == -3) { sourceRecordCount = Bytes.toLong(value.getBytes()); } else if (key.get() > 0) { HLLCounter hll = new HLLCounter(precision); ByteArray byteArray = new ByteArray(value.getBytes()); hll.readRegisters(byteArray.asBuffer()); counterMap.put(key.get(), hll); } } } }
private void putRowKeyToHLLNew(List<String> row, long[] hashValuesLong, HLLCounter[] cuboidCounters, HashFunction hashFunction) { int x = 0; for (String field : row) { Hasher hc = hashFunction.newHasher(); byte[] bytes = hc.putString(x + field).hash().asBytes(); hashValuesLong[x++] = Bytes.toLong(bytes); } for (int i = 0, n = allCuboidsBitSet.length; i < n; i++) { long value = 0; for (int position = 0; position < allCuboidsBitSet[i].length; position++) { value += hashValuesLong[allCuboidsBitSet[i][position]]; } cuboidCounters[i].addHashDirectly(value); } }
@Override public void run() throws Exception { HLLCounter counter = new HLLCounter(14, RegisterType.DENSE); HashFunction hf2 = Hashing.murmur3_128(); long[] valueHashLong = new long[allCuboidsBitSet.length]; for (List<String> row : rows) { int x = 0; for (String field : row) { Hasher hc = hf2.newHasher(); byte[] bytes = hc.putString(x + field).hash().asBytes(); valueHashLong[x++] = Bytes.toLong(bytes); } long value = 0; for (int position = 0; position < row.size(); position++) { value += valueHashLong[position]; } counter.addHashDirectly(value); } long estimate = counter.getCountEstimate(); System.out.println("new method finished. Estimate cardinality : " + estimate + ". Error rate : " + countErrorRate(estimate, realCardinality)); } });
this.splitBuffers[this.bufferSize++] = cuboidIdSplit; offset += RowConstants.ROWKEY_CUBOIDID_LEN; long lastSplittedCuboidId = Bytes.toLong(cuboidIdSplit.array(), cuboidIdSplit.offset(), RowConstants.ROWKEY_CUBOIDID_LEN); Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, lastSplittedCuboidId);
assertEquals(0, Bytes.toShort(shard)); assertEquals(511, Bytes.toLong(cuboidId)); assertArrayEquals(new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, rest);
assertEquals(0, Bytes.toShort(shard)); assertEquals(511, Bytes.toLong(cuboidId)); assertArrayEquals(new byte[] { 11, 55, -13, 49, 49, 56, 52, 56, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }, rest);
@Test public void testEncodeWithoutSlr() throws Exception { CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITHOUT_SLR_READY"); // CubeSegment seg = cube.getTheOnlySegment(); CubeDesc cubeDesc = cube.getDescriptor(); // String data = // "2013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular"; String[] data = new String[8]; data[0] = "2012-12-15"; data[1] = "11848"; data[2] = "Health & Beauty"; data[3] = "Fragrances"; data[4] = "Women"; data[5] = "FP-GTC"; data[6] = "0"; data[7] = "15"; long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); RowKeyEncoder rowKeyEncoder = new RowKeyEncoder(cube.getFirstSegment(), baseCuboid); byte[] encodedKey = rowKeyEncoder.encode(data); assertEquals(22 + rowKeyEncoder.getHeaderLength(), encodedKey.length); byte[] cuboidId = Arrays.copyOfRange(encodedKey, RowConstants.ROWKEY_SHARDID_LEN, rowKeyEncoder.getHeaderLength()); byte[] rest = Arrays.copyOfRange(encodedKey, rowKeyEncoder.getHeaderLength(), encodedKey.length); assertEquals(255, Bytes.toLong(cuboidId)); assertArrayEquals(new byte[] { 11, 55, -13, 13, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }, rest); }