public String getFactTableQuoted(String quot) { String database = quot + config.getHiveDatabaseForIntermediateTable() + quot; String table = quot + this.getName() + "_fact" + quot; return database + "." + table; }
public BaseBatchCubingInputSide(IJoinedFlatTableDesc flatDesc) { KylinConfig config = KylinConfig.getInstanceFromEnv(); this.flatDesc = flatDesc; this.flatTableDatabase = config.getHiveDatabaseForIntermediateTable(); this.hdfsWorkingDir = config.getHdfsWorkingDirectory(); }
@Override public ColumnDesc[] evalQueryMetadata(String query) { if (StringUtils.isEmpty(query)) { throw new RuntimeException("Evaluate query shall not be empty."); } KylinConfig config = KylinConfig.getInstanceFromEnv(); String tmpDatabase = config.getHiveDatabaseForIntermediateTable(); String tmpView = "kylin_eval_query_" + UUID.nameUUIDFromBytes(query.getBytes(StandardCharsets.UTF_8)).toString().replace("-", ""); String dropViewSql = "DROP VIEW IF EXISTS " + tmpDatabase + "." + tmpView; String evalViewSql = "CREATE VIEW " + tmpDatabase + "." + tmpView + " as " + query; try { logger.debug("Removing duplicate view {}", tmpView); hiveClient.executeHQL(dropViewSql); logger.debug("Creating view {} for query: {}", tmpView, query); hiveClient.executeHQL(evalViewSql); logger.debug("Evaluating query columns' metadata"); HiveTableMeta hiveTableMeta = hiveClient.getHiveTableMeta(tmpDatabase, tmpView); return extractColumnFromMeta(hiveTableMeta); } catch (Exception e) { throw new RuntimeException("Cannot evaluate metadata of query: " + query, e); } finally { try { logger.debug("Cleaning up."); hiveClient.executeHQL(dropViewSql); } catch (Exception e) { logger.warn("Cannot drop temp view of query: {}", query, e); } } }
protected List<String> getHiveTables() throws Exception { ISourceMetadataExplorer explr = SourceManager.getDefaultSource().getSourceMetadataExplorer(); return explr.listTables(config.getHiveDatabaseForIntermediateTable()); }
String tmpDatabase = config.getHiveDatabaseForIntermediateTable(); String tmpView = tmpDatabase + ".kylin_eval_query_" + UUID.nameUUIDFromBytes(query.getBytes(StandardCharsets.UTF_8)).toString().replaceAll("-", "");
protected static String getTableNameForHCat(TableDesc table, String uuid) { String tableName = (table.isView()) ? table.getMaterializedName(uuid) : table.getName(); String database = (table.isView()) ? KylinConfig.getInstanceFromEnv().getHiveDatabaseForIntermediateTable() : table.getDatabase(); return String.format(Locale.ROOT, "%s.%s", database, tableName).toUpperCase(Locale.ROOT); }
final int uuidLength = 36; final String useDatabaseHql = "USE " + config.getHiveDatabaseForIntermediateTable() + ";"; final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); hiveCmdBuilder.addStatement(useDatabaseHql);
final String uuidPattern = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"; final String useDatabaseHql = "USE " + config.getHiveDatabaseForIntermediateTable() + ";"; final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); hiveCmdBuilder.addStatement(useDatabaseHql);
private String cleanUpIntermediateFlatTable(KylinConfig config) throws IOException { StringBuffer output = new StringBuffer(); final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); final List<String> hiveTables = this.getIntermediateTables(); if (!config.isHiveKeepFlatTable()){ for (String hiveTable : hiveTables) { if (StringUtils.isNotEmpty(hiveTable)) { hiveCmdBuilder.addStatement("USE " + config.getHiveDatabaseForIntermediateTable() + ";"); hiveCmdBuilder.addStatement("DROP TABLE IF EXISTS `" + hiveTable + "`;"); output.append("Hive table " + hiveTable + " is dropped. \n"); } } rmdirOnHDFS(getExternalDataPaths()); } config.getCliCommandExecutor().execute(hiveCmdBuilder.build()); output.append("Path " + getExternalDataPaths() + " is deleted. \n"); return output.toString(); }
public BaseBatchCubingInputSide(CubeSegment seg, IJoinedFlatTableDesc flatDesc) { this.conf = new JobEngineConfig(KylinConfig.getInstanceFromEnv()); this.config = seg.getConfig(); this.flatDesc = flatDesc; this.hiveTableDatabase = config.getHiveDatabaseForIntermediateTable(); this.seg = seg; this.cubeDesc = seg.getCubeDesc(); this.cubeName = seg.getCubeInstance().getName(); }
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); sparkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE); }
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) { final SparkExecutable sparkExecutable = new SparkExecutable(); final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setClassName(SparkFactDistinct.class.getName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent())); sparkExecutable.setJobId(jobId); sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS); sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOuputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
@Override public IReadableTable createReadableTable(TableDesc tableDesc, String uuid) { // hive view must have been materialized already // ref HiveMRInput.createLookupHiveViewMaterializationStep() if (tableDesc.isView()) { KylinConfig config = KylinConfig.getInstanceFromEnv(); String tableName = tableDesc.getMaterializedName(uuid); tableDesc = new TableDesc(); tableDesc.setDatabase(config.getHiveDatabaseForIntermediateTable()); tableDesc.setName(tableName); } return new HiveTable(tableDesc); }
tableName = intermediateTable.substring(intermediateTable.indexOf(".") + 1); } else { database = config.getHiveDatabaseForIntermediateTable(); tableName = intermediateTable;
public String getFactTableQuoted(String quot) { String database = quot + config.getHiveDatabaseForIntermediateTable() + quot; String table = quot + this.getName() + "_fact" + quot; return database + "." + table; }
public BaseBatchCubingInputSide(IJoinedFlatTableDesc flatDesc) { KylinConfig config = KylinConfig.getInstanceFromEnv(); this.flatDesc = flatDesc; this.flatTableDatabase = config.getHiveDatabaseForIntermediateTable(); this.hdfsWorkingDir = config.getHdfsWorkingDirectory(); }
protected String getIntermediateHiveTableName(IJoinedFlatTableDesc intermediateTableDesc, String jobUuid) { return engineConfig.getConfig().getHiveDatabaseForIntermediateTable() + "." + intermediateTableDesc.getTableName(jobUuid); }
protected static String getTableNameForHCat(TableDesc table, String uuid) { String tableName = (table.isView()) ? table.getMaterializedName(uuid) : table.getName(); String database = (table.isView()) ? KylinConfig.getInstanceFromEnv().getHiveDatabaseForIntermediateTable() : table.getDatabase(); return String.format(Locale.ROOT, "%s.%s", database, tableName).toUpperCase(Locale.ROOT); }
public BaseBatchCubingInputSide(CubeSegment seg, IJoinedFlatTableDesc flatDesc) { this.conf = new JobEngineConfig(KylinConfig.getInstanceFromEnv()); this.config = seg.getConfig(); this.flatDesc = flatDesc; this.hiveTableDatabase = config.getHiveDatabaseForIntermediateTable(); this.seg = seg; this.cubeDesc = seg.getCubeDesc(); this.cubeName = seg.getCubeInstance().getName(); }
@Override public IReadableTable createReadableTable(TableDesc tableDesc, String uuid) { // hive view must have been materialized already // ref HiveMRInput.createLookupHiveViewMaterializationStep() if (tableDesc.isView()) { KylinConfig config = KylinConfig.getInstanceFromEnv(); String tableName = tableDesc.getMaterializedName(uuid); tableDesc = new TableDesc(); tableDesc.setDatabase(config.getHiveDatabaseForIntermediateTable()); tableDesc.setName(tableName); } return new HiveTable(tableDesc); }