/** * Merges the IP addresses of same Status. */ @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Set<String> addressSet = new HashSet<>(); for (Text val : values) { addressSet.add(val.toString()); } context.write(key, new Text(String.join(" ", addressSet))); } }
@SuppressWarnings("deprecation") public static void writeToSequenceFile(Configuration conf, String outputPath, Map<String, String> counterMap) throws IOException { try (SequenceFile.Writer writer = SequenceFile.createWriter(getWorkingFileSystem(conf), conf, new Path(outputPath), Text.class, Text.class)) { for (Map.Entry<String, String> counterEntry : counterMap.entrySet()) { writer.append(new Text(counterEntry.getKey()), new Text(counterEntry.getValue())); } } }
@VisibleForTesting @SuppressWarnings("unchecked") public void initTokenManager(Properties config) { Configuration conf = new Configuration(false); for (Map.Entry entry : config.entrySet()) { conf.set((String) entry.getKey(), (String) entry.getValue()); } String tokenKind = conf.get(TOKEN_KIND); if (tokenKind == null) { throw new IllegalArgumentException( "The configuration does not define the token kind"); } tokenKind = tokenKind.trim(); tokenManager = new DelegationTokenManager(conf, new Text(tokenKind)); tokenManager.init(); }
@Test public void testList() { assertEquals("[\"one\",\"two\"]", hiveTypeToJson(new MyHiveType( Arrays.asList(new Object[] { new Text("one"), new Text("two") }), getListTypeInfo(stringTypeInfo)))); }
@Test public void wholeTextFiles() throws Exception { byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8); byte[] content2 = "spark is also easy to use.\n".getBytes(StandardCharsets.UTF_8); String tempDirName = tempDir.getAbsolutePath(); String path1 = new Path(tempDirName, "part-00000").toUri().getPath(); String path2 = new Path(tempDirName, "part-00001").toUri().getPath(); Files.write(content1, new File(path1)); Files.write(content2, new File(path2)); Map<String, String> container = new HashMap<>(); container.put(path1, new Text(content1).toString()); container.put(path2, new Text(content2).toString()); JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName, 3); List<Tuple2<String, String>> result = readRDD.collect(); for (Tuple2<String, String> res : result) { // Note that the paths from `wholeTextFiles` are in URI format on Windows, // for example, file:/C:/a/b/c. assertEquals(res._2(), container.get(new Path(res._1()).toUri().getPath())); } }
private void runAndVerifyStr(String str, Text keyWr, String expResultBase64, GenericUDFAesEncrypt udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null); DeferredObject valueObj1 = new DeferredJavaObject(keyWr); DeferredObject[] args = { valueObj0, valueObj1 }; BytesWritable output = (BytesWritable) udf.evaluate(args); assertEquals("aes_encrypt() test ", expResultBase64, output != null ? copyBytesAndBase64(output) : null); }
/** * Get token from the token sequence file. * @param authPath * @param proxyUserName * @return Token for proxyUserName if it exists. * @throws IOException */ private static Optional<Token<?>> getTokenFromSeqFile(String authPath, String proxyUserName) throws IOException { try (Closer closer = Closer.create()) { FileSystem localFs = FileSystem.getLocal(new Configuration()); SequenceFile.Reader tokenReader = closer.register(new SequenceFile.Reader(localFs, new Path(authPath), localFs.getConf())); Text key = new Text(); Token<?> value = new Token<>(); while (tokenReader.next(key, value)) { LOG.info("Found token for " + key); if (key.toString().equals(proxyUserName)) { return Optional.<Token<?>> of(value); } } } return Optional.absent(); } }
String tableId = connector.tableOperations().tableIdMap().get(table); scanner.fetchColumnFamily(new Text("loc")); location = Optional.of(iter.next().getValue().toString()); Text splitCompareKey = new Text(); key.getRow(splitCompareKey); Text scannedCompareKey = new Text(); byte[] keyBytes = entry.getKey().getRow().copyBytes(); location = Optional.of(entry.getValue().toString()); break; int compareTo = splitCompareKey.compareTo(scannedCompareKey); if (compareTo <= 0) { location = Optional.of(entry.getValue().toString());
@Test public void testMapWithFilterInclude() { TestSettings cfg = new TestSettings(); cfg.setProperty("es.mapping.include", "a*"); Map map = new LinkedHashMap(); map.put(new Text("aaa"), new Text("bbb")); map.put(new Text("ccc"), new Text("ddd")); map.put(new Text("axx"), new Text("zzz")); HiveType type = new MyHiveType(map, getMapTypeInfo(stringTypeInfo, stringTypeInfo)); assertEquals("{\"aaa\":\"bbb\",\"axx\":\"zzz\"}", hiveTypeToJson(type, cfg)); }
@Test public void wholeTextFiles() throws Exception { byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8); byte[] content2 = "spark is also easy to use.\n".getBytes(StandardCharsets.UTF_8); String tempDirName = tempDir.getAbsolutePath(); String path1 = new Path(tempDirName, "part-00000").toUri().getPath(); String path2 = new Path(tempDirName, "part-00001").toUri().getPath(); Files.write(content1, new File(path1)); Files.write(content2, new File(path2)); Map<String, String> container = new HashMap<>(); container.put(path1, new Text(content1).toString()); container.put(path2, new Text(content2).toString()); JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName, 3); List<Tuple2<String, String>> result = readRDD.collect(); for (Tuple2<String, String> res : result) { // Note that the paths from `wholeTextFiles` are in URI format on Windows, // for example, file:/C:/a/b/c. assertEquals(res._2(), container.get(new Path(res._1()).toUri().getPath())); } }
@Test public void testSerialization() throws Exception { MapJoinRowContainer container1 = new MapJoinEagerRowContainer(); container1.addRow(new Object[]{ new Text("f0"), null, new ShortWritable((short)0xf)}); container1.addRow(Arrays.asList(new Object[]{ null, new Text("f1"), new ShortWritable((short)0xf)})); container1.addRow(new Object[]{ null, null, new ShortWritable((short)0xf)}); container1.addRow(Arrays.asList(new Object[]{ new Text("f0"), new Text("f1"), new ShortWritable((short)0x1)})); MapJoinRowContainer container2 = Utilities.serde(container1, "f0,f1,filter", "string,string,smallint"); Utilities.testEquality(container1, container2); Assert.assertEquals(4, container1.rowCount()); Assert.assertEquals(1, container2.getAliasFilter()); }
/** * Gets a set of locality groups that should be added to the index table (not the metrics table). * * @param table Table for the locality groups, see AccumuloClient#getTable * @return Mapping of locality group to column families in the locality group, 1:1 mapping in * this case */ public static Map<String, Set<Text>> getLocalityGroups(AccumuloTable table) { Map<String, Set<Text>> groups = new HashMap<>(); // For each indexed column for (AccumuloColumnHandle columnHandle : table.getColumns().stream().filter(AccumuloColumnHandle::isIndexed).collect(Collectors.toList())) { // Create a Text version of the index column family Text indexColumnFamily = new Text(getIndexColumnFamily(columnHandle.getFamily().get().getBytes(UTF_8), columnHandle.getQualifier().get().getBytes(UTF_8)).array()); // Add this to the locality groups, // it is a 1:1 mapping of locality group to column families groups.put(indexColumnFamily.toString(), ImmutableSet.of(indexColumnFamily)); } return groups; }
@Override public void map(LongWritable k, Text v, OutputCollector<Text, LongWritable> out, Reporter rep) throws IOException { // normalize and split the line String line = v.toString(); String[] tokens = line.toLowerCase().split("\\W+"); // emit the pairs for (String token : tokens) { if (token.length() > 0) { out.collect(new Text(token), new LongWritable(1L)); } } }
@Override public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException { String inputPathString = ((FileSplit) split).getPath().toUri().getPath(); log.info("Input file path:" + inputPathString); Path inputPath = new Path(inputPathString); SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf); SequenceFile.Metadata meta = reader.getMetadata(); try { Text keySchema = meta.get(new Text("key.schema")); Text valueSchema = meta.get(new Text("value.schema")); if(0 == keySchema.getLength() || 0 == valueSchema.getLength()) { throw new Exception(); } // update Joboconf with schemas conf.set("mapper.input.key.schema", keySchema.toString()); conf.set("mapper.input.value.schema", valueSchema.toString()); } catch(Exception e) { throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n"); } return super.getRecordReader(split, conf, reporter); }
public static Pair<byte[], byte[]> getMinMaxRowIds(Connector connector, AccumuloTable table, Authorizations auths) throws TableNotFoundException { Scanner scanner = connector.createScanner(table.getMetricsTableName(), auths); scanner.setRange(new Range(new Text(Indexer.METRICS_TABLE_ROW_ID.array()))); Text family = new Text(Indexer.METRICS_TABLE_ROWS_CF.array()); Text firstRowQualifier = new Text(Indexer.METRICS_TABLE_FIRST_ROW_CQ.array()); Text lastRowQualifier = new Text(Indexer.METRICS_TABLE_LAST_ROW_CQ.array()); scanner.fetchColumn(family, firstRowQualifier); scanner.fetchColumn(family, lastRowQualifier); byte[] firstRow = null; byte[] lastRow = null; for (Entry<Key, Value> entry : scanner) { if (entry.getKey().compareColumnQualifier(firstRowQualifier) == 0) { firstRow = entry.getValue().get(); } if (entry.getKey().compareColumnQualifier(lastRowQualifier) == 0) { lastRow = entry.getValue().get(); } } scanner.close(); return Pair.of(firstRow, lastRow); }
@Test public void testMapWithFilterExclude() { TestSettings cfg = new TestSettings(); cfg.setProperty("es.mapping.exclude", "xxx"); Map map = new LinkedHashMap(); map.put(new Text("aaa"), new Text("bbb")); map.put(new Text("ccc"), new Text("ddd")); map.put(new Text("xxx"), new Text("zzz")); HiveType type = new MyHiveType(map, getMapTypeInfo(stringTypeInfo, stringTypeInfo)); assertEquals("{\"aaa\":\"bbb\",\"ccc\":\"ddd\"}", hiveTypeToJson(type, cfg)); }
@Test public void wholeTextFiles() throws Exception { byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8); byte[] content2 = "spark is also easy to use.\n".getBytes(StandardCharsets.UTF_8); String tempDirName = tempDir.getAbsolutePath(); String path1 = new Path(tempDirName, "part-00000").toUri().getPath(); String path2 = new Path(tempDirName, "part-00001").toUri().getPath(); Files.write(content1, new File(path1)); Files.write(content2, new File(path2)); Map<String, String> container = new HashMap<>(); container.put(path1, new Text(content1).toString()); container.put(path2, new Text(content2).toString()); JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName, 3); List<Tuple2<String, String>> result = readRDD.collect(); for (Tuple2<String, String> res : result) { // Note that the paths from `wholeTextFiles` are in URI format on Windows, // for example, file:/C:/a/b/c. assertEquals(res._2(), container.get(new Path(res._1()).toUri().getPath())); } }