public String getPublicConfig() throws IOException { final String whiteListProperties = KylinConfig.getInstanceFromEnv().getPropertiesWhiteList(); Collection<String> propertyKeys = Lists.newArrayList(); if (StringUtils.isNotEmpty(whiteListProperties)) { propertyKeys.addAll(Arrays.asList(StringUtil.splitByComma(whiteListProperties))); } return KylinConfig.getInstanceFromEnv().exportToString(propertyKeys); } }
@Override public String copyToAnotherMeta(KylinConfig srcConfig, KylinConfig dstConfig) throws IOException { if (baseDir.contains("resources/SegmentDict")) { logger.info("SegmentAppendTrieDict needn't to copy"); return baseDir; } checkArgument(baseDir.startsWith(srcConfig.getHdfsWorkingDirectory()), "Please check why current directory {} doesn't belong to source working directory {}", baseDir, srcConfig.getHdfsWorkingDirectory()); final String dstBaseDir = baseDir.replaceFirst(srcConfig.getHdfsWorkingDirectory(), dstConfig.getHdfsWorkingDirectory()); Long[] versions = listAllVersions(); if (versions.length == 0) { // empty dict, nothing to copy return dstBaseDir; } Path srcVersionDir = getVersionDir(versions[versions.length - 1]); Path dstVersionDir = new Path(srcVersionDir.toString().replaceFirst(srcConfig.getHdfsWorkingDirectory(), dstConfig.getHdfsWorkingDirectory())); FileSystem dstFS = dstVersionDir.getFileSystem(conf); if (dstFS.exists(dstVersionDir)) { dstFS.delete(dstVersionDir, true); } FileUtil.copy(fileSystem, srcVersionDir, dstFS, dstVersionDir, false, true, conf); return dstBaseDir; }
private boolean checkTableExist(String tableName) throws IOException { StorageURL metadataUrl = KylinConfig.getInstanceFromEnv().getMetadataUrl(); try (Admin admin = HBaseConnection.get(metadataUrl).getAdmin()) { return admin.tableExists(TableName.valueOf(tableName)); } }
public BaseBatchCubingInputSide(IJoinedFlatTableDesc flatDesc) { KylinConfig config = KylinConfig.getInstanceFromEnv(); this.flatDesc = flatDesc; this.flatTableDatabase = config.getHiveDatabaseForIntermediateTable(); this.hdfsWorkingDir = config.getHdfsWorkingDirectory(); }
public ClientEnvExtractor() throws IOException { super(); packageType = "client"; kylinConfig = KylinConfig.getInstanceFromEnv(); cmdExecutor = kylinConfig.getCliCommandExecutor(); }
private void init() { this.config = KylinConfig.getInstanceFromEnv(); this.storageUrl = config.getStorageUrl(); this.starSchemaUrl = config.getHiveUrl(); this.starSchemaUser = config.getHiveUser(); this.starSchemaPassword = config.getHivePassword(); }
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBING_JOB_ID); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_CUBE_NAME); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String job_id = getOptionValue(OPTION_CUBING_JOB_ID); job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, job_id); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeSegment segment = cube.getSegmentById(segmentID); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); logger.info("Starting: " + job.getJobName()); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); job.setOutputKeyClass(Text.class);
@Override public int run(String[] args) throws Exception { try { Options options = new Options(); options.addOption(OPTION_PROJECT); options.addOption(OPTION_TABLE); options.addOption(OPTION_OUTPUT_PATH); Configuration conf = getConf(); KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); JobEngineConfig jobEngineConfig = new JobEngineConfig(kylinConfig); conf.addResource(new Path(jobEngineConfig.getHadoopJobConfFilePath(null))); job = Job.getInstance(conf, jobName); job.getConfiguration().set(BatchConstants.CFG_PROJECT_NAME, project); job.getConfiguration().set(BatchConstants.CFG_TABLE_NAME, table); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set("dfs.blocksize", "67108864");
private static List<String> filterByCubes(List<String> allTableNames, List<String> cubeNames) { CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); List<String> result = Lists.newArrayList(); for (String c : cubeNames) { c = c.trim(); if (c.endsWith(",")) c = c.substring(0, c.length() - 1); CubeInstance cubeInstance = cubeManager.getCube(c); for (CubeSegment segment : cubeInstance.getSegments()) { String tableName = segment.getStorageLocationIdentifier(); if (allTableNames.contains(tableName)) { result.add(tableName); } } } return result; }
public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_PARTITION_FILE_PATH); options.addOption(OPTION_INPUT_PATH); parseOptions(options, args); Path partitionFilePath = new Path(getOptionValue(OPTION_PARTITION_FILE_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); Configuration configuration = new Configuration(HBaseConnection.getCurrentHBaseConfiguration()); merge(configuration, getConf()); job = Job.getInstance(configuration, getOptionValue(OPTION_JOB_NAME)); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); attachCubeMetadata(cube, job.getConfiguration()); configuration.set(DFSConfigKeys.DFS_REPLICATION_KEY, "3"); this.deletePath(job.getConfiguration(), output);
@Test public void test() throws Exception { FileSystem mockFs = mock(FileSystem.class); prepareUnusedIntermediateHiveTable(mockFs); prepareUnusedHDFSFiles(mockFs); MockStorageCleanupJob job = new MockStorageCleanupJob(KylinConfig.getInstanceFromEnv(), mockFs, mockFs); job.execute(new String[] { "--delete", "true" }); ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class); verify(mockFs, times(2)).delete(pathCaptor.capture(), eq(true)); ArrayList<Path> expected = Lists.newArrayList( // verifyCleanUnusedIntermediateHiveTable new Path("file:///tmp/examples/test_metadata/kylin-f8edd777-8756-40d5-be19-3159120e4f7b/kylin_intermediate_2838c7fc-722a-48fa-9d1a-8ab37837a952"), // verifyCleanUnusedHdfsFiles new Path("file:///tmp/examples/test_metadata/kylin-to-be-delete") ); assertEquals(expected, pathCaptor.getAllValues()); }
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_CUBING_JOB_ID); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); attachCubeMetadata(cube, job.getConfiguration()); Path path = new Path(input.toString() + "/" + tblColRef.getIdentity()); if (HadoopUtil.getFileSystem(path).exists(path)) { FileInputFormat.addInputPath(job, path); hasUHCValue = true; setupReducer(output, reducerCount); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, job_id); job.getConfiguration().set(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR, KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()); job.getConfiguration().set(BatchConstants.CFG_MAPRED_OUTPUT_COMPRESS, "false"); for (Map.Entry<String, String> entry : cube.getConfig().getUHCMRConfigOverride().entrySet()) { job.getConfiguration().set(entry.getKey(), entry.getValue());
public static void prepareTestDataForStreamingCube(long startTime, long endTime, int numberOfRecords, String cubeName, StreamDataLoader streamDataLoader) throws IOException { CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName); List<String> data = StreamingTableDataGenerator.generate(numberOfRecords, startTime, endTime, cubeInstance.getRootFactTable(), cubeInstance.getProject()); //load into kafka streamDataLoader.loadIntoKafka(data); logger.info("Write {} messages into {}", data.size(), streamDataLoader.toString()); //csv data for H2 use TableRef factTable = cubeInstance.getModel().getRootFactTable(); List<TblColRef> tableColumns = Lists.newArrayList(factTable.getColumns()); TimedJsonStreamParser timedJsonStreamParser = new TimedJsonStreamParser(tableColumns, null); StringBuilder sb = new StringBuilder(); for (String json : data) { List<String> rowColumns = timedJsonStreamParser .parse(ByteBuffer.wrap(json.getBytes(StandardCharsets.UTF_8))).get(0).getData(); sb.append(StringUtils.join(rowColumns, ",")); sb.append(System.getProperty("line.separator")); } appendFactTableData(sb.toString(), cubeInstance.getRootFactTable()); }
private static void initQueryTransformers() { List<IQueryTransformer> transformers = Lists.newArrayList(); String[] classes = KylinConfig.getInstanceFromEnv().getQueryTransformers(); for (String clz : classes) { try { IQueryTransformer t = (IQueryTransformer) ClassUtil.newInstance(clz); transformers.add(t); } catch (Exception e) { throw new IllegalStateException("Failed to init query transformer", e); } } queryTransformers = transformers; }
public int run(String[] args) throws Exception { Options options = new Options(); parseOptions(options, args); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String tableName = getOptionValue(OPTION_TABLE_NAME); String jobId = getOptionValue(OPTION_CUBING_JOB_ID); KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(kylinConfig); CubeInstance cube = cubeMgr.getCube(cubeName); logger.info("created snapshot information at:{}", snapshot.getResourcePath()); job = Job.getInstance(HBaseConfiguration.create(getConf()), getOptionValue(OPTION_JOB_NAME)); HBaseConnection.addHBaseClusterNNHAConfiguration(job.getConfiguration()); job.setMapperClass(LookupTableToHFileMapper.class); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_TABLE_NAME, tableName); job.getConfiguration().set(DFSConfigKeys.DFS_REPLICATION_KEY, "3"); job.getConfiguration().set(BatchConstants.CFG_SHARD_NUM, String.valueOf(hTableNameAndShard.getSecond()));
public CubeMigrationCheckCLI(KylinConfig kylinConfig, Boolean isFix) throws IOException { this.dstCfg = kylinConfig; this.ifFix = isFix; Connection conn = HBaseConnection.get(kylinConfig.getStorageUrl()); hbaseAdmin = conn.getAdmin(); issueExistHTables = Lists.newArrayList(); inconsistentHTables = Lists.newArrayList(); }
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_INPUT_PATH); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME); String segmentID = getOptionValue(OPTION_SEGMENT_ID); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String statistics_sampling_percent = getOptionValue(OPTION_STATISTICS_SAMPLING_PERCENT); String cuboidMode = getOptionValue(OPTION_CUBOID_MODE); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeSegment cubeSegment = cube.getSegmentById(segmentID); job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidMode); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); job.getConfiguration().set(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, statistics_sampling_percent); logger.info("Starting: " + job.getJobName());
oldJarPaths = new HashSet<String>(); Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, KylinConfig.getInstanceFromEnv()); for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) { if (isSame(localCoprocessorFile, fileStatus)) { uploadPath = fileStatus.getPath(); break; String filename = fileStatus.getPath().toString(); if (filename.endsWith(".jar")) { oldJarPaths.add(filename); oldJarNames.add(new Path(path).getName()); try { in = new FileInputStream(localCoprocessorFile); out = fileSystem.create(uploadPath); IOUtils.copy(in, out); } finally { fileSystem.setTimes(uploadPath, localCoprocessorFile.lastModified(), -1);
public SCCreator() { config = KylinConfig.getInstanceFromEnv(); options = new Options(); options.addOption(OPTION_OWNER); options.addOption(OPTION_OUTPUT); options.addOption(OPTION_INPUT_CONFIG); }
public static void addHBaseClusterNNHAConfiguration(Configuration conf) { String hdfsConfigFile = KylinConfig.getInstanceFromEnv().getHBaseClusterHDFSConfigFile(); if (hdfsConfigFile == null || hdfsConfigFile.isEmpty()) { return; } Configuration hdfsConf = new Configuration(false); hdfsConf.addResource(hdfsConfigFile); Collection<String> nameServices = hdfsConf.getTrimmedStringCollection(DFSConfigKeys.DFS_NAMESERVICES); Collection<String> mainNameServices = conf.getTrimmedStringCollection(DFSConfigKeys.DFS_NAMESERVICES); for (String serviceId : nameServices) { mainNameServices.add(serviceId); String serviceConfKey = DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX + "." + serviceId; String proxyConfKey = DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + serviceId; conf.set(serviceConfKey, hdfsConf.get(serviceConfKey, "")); conf.set(proxyConfKey, hdfsConf.get(proxyConfKey, "")); Collection<String> nameNodes = hdfsConf.getTrimmedStringCollection(serviceConfKey); for (String nameNode : nameNodes) { String rpcConfKey = DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY + "." + serviceId + "." + nameNode; conf.set(rpcConfKey, hdfsConf.get(rpcConfKey, "")); } } conf.setStrings(DFSConfigKeys.DFS_NAMESERVICES, mainNameServices.toArray(new String[0])); // See YARN-3021, instruct RM skip renew token of hbase cluster name services conf.setStrings(JOB_NAMENODES_TOKEN_RENEWAL_EXCLUDE, nameServices.toArray(new String[0])); }