/** * Specify the table object since sometimes no connections * to the metastore will be opened. * @param table table object. * @return builder */ public Builder withTableObject(Table table) { this.tableObject = table; this.isPartitioned = tableObject.getPartitionKeys() != null && !tableObject.getPartitionKeys().isEmpty(); return this; }
static Map<String, String> createPtnKeyValueMap(Table table, Partition ptn) throws IOException { List<String> values = ptn.getValues(); if (values.size() != table.getPartitionKeys().size()) { throw new IOException( "Partition values in partition inconsistent with table definition, table " + table.getTableName() + " has " + table.getPartitionKeys().size() + " partition keys, partition has " + values.size() + "partition values"); } Map<String, String> ptnKeyValues = new HashMap<String, String>(); int i = 0; for (FieldSchema schema : table.getPartitionKeys()) { // CONCERN : the way this mapping goes, the order *needs* to be // preserved for table.getPartitionKeys() and ptn.getValues() ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); i++; } return ptnKeyValues; } }
/** * return the partition columns from a table instance * * @param table the instance to extract partition columns from * @return HCatSchema instance which contains the partition columns * @throws IOException */ public static HCatSchema getPartitionColumns(Table table) throws IOException { HCatSchema cols = new HCatSchema(new LinkedList<HCatFieldSchema>()); if (table.getPartitionKeys().size() != 0) { for (FieldSchema fs : table.getPartitionKeys()) { cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); } } return cols; }
private static String getColTypeOf(Table tbl, String partKey) throws SemanticException{ for (FieldSchema fs : tbl.getPartitionKeys()) { if (partKey.equalsIgnoreCase(fs.getName())) { return fs.getType().toLowerCase(); } } throw new SemanticException("Unknown partition key : " + partKey); }
public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); if (table.getPartitionKeys().size() != 0) { // add partition keys to table schema // NOTE : this assumes that we do not ever have ptn keys as columns // inside the table schema as well! for (FieldSchema fs : table.getPartitionKeys()) { tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); } } return tableSchema; }
private String getColTypeOf (String partKey) throws SemanticException{ for (FieldSchema fs : tbl.getPartitionKeys()) { if (partKey.equalsIgnoreCase(fs.getName())) { return fs.getType().toLowerCase(); } } throw new SemanticException ("Unknown partition key : " + partKey); }
/** * Convert the partition value map to a value list in the partition key order. * @param table the table being written to * @param valueMap the partition value map * @return the partition value list * @throws java.io.IOException */ static List<String> getPartitionValueList(Table table, Map<String, String> valueMap) throws IOException { if (valueMap.size() != table.getPartitionKeys().size()) { throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Table " + table.getTableName() + " has " + table.getPartitionKeys().size() + " partition keys, got " + valueMap.size()); } List<String> values = new ArrayList<String>(); for (FieldSchema schema : table.getPartitionKeys()) { String value = valueMap.get(schema.getName().toLowerCase()); if (value == null) { throw new HCatException(ErrorType.ERROR_MISSING_PARTITION_KEY, "Key " + schema.getName() + " of table " + table.getTableName()); } values.add(value); } return values; }
protected void preparePartitioningFields() { final int numPartitions = table.getPartitionKeys().size(); this.partitionFieldData = new Object[numPartitions]; this.partitionObjInspectors = new ObjectInspector[numPartitions]; int startIdx = inputRowObjectInspector.getAllStructFieldRefs().size() - numPartitions; int endIdx = inputRowObjectInspector.getAllStructFieldRefs().size(); int j = 0; for (int i = startIdx; i < endIdx; i++) { StructField structField = inputRowObjectInspector.getAllStructFieldRefs().get(i); partitionObjInspectors[j++] = structField.getFieldObjectInspector(); } this.partitionStructFields = new StructField[partitionColumns.size()]; for (int i = 0; i < partitionColumns.size(); i++) { String partCol = partitionColumns.get(i); partitionStructFields[i] = inputRowObjectInspector.getStructFieldRef(partCol); } }
@Override public String[] getPartitionKeys(String location, Job job) throws IOException { Table table = phutil.getTable(location, hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), PigHCatUtil.getHCatServerPrincipal(job), job); // Pass job to initialize metastore conf overrides List<FieldSchema> tablePartitionKeys = table.getPartitionKeys(); String[] partitionKeys = new String[tablePartitionKeys.size()]; for (int i = 0; i < tablePartitionKeys.size(); i++) { partitionKeys[i] = tablePartitionKeys.get(i).getName(); } return partitionKeys; }
private void checkPartitionedTableCompatibility(Table desiredTargetTable, Table existingTargetTable) throws IOException { if (!desiredTargetTable.getDataLocation().equals(existingTargetTable.getDataLocation())) { throw new HiveTableLocationNotMatchException(desiredTargetTable.getDataLocation(), existingTargetTable.getDataLocation()); } if (HiveUtils.isPartitioned(desiredTargetTable) != HiveUtils.isPartitioned(existingTargetTable)) { throw new IOException(String.format( "%s: Desired target table %s partitioned, existing target table %s partitioned. Tables are incompatible.", this.dataset.tableIdentifier, HiveUtils.isPartitioned(desiredTargetTable) ? "is" : "is not", HiveUtils.isPartitioned(existingTargetTable) ? "is" : "is not")); } if (desiredTargetTable.isPartitioned() && !desiredTargetTable.getPartitionKeys().equals(existingTargetTable.getPartitionKeys())) { throw new IOException(String.format( "%s: Desired target table has partition keys %s, existing target table has partition keys %s. " + "Tables are incompatible.", this.dataset.tableIdentifier, gson.toJson(desiredTargetTable.getPartitionKeys()), gson.toJson(existingTargetTable.getPartitionKeys()))); } }
private String getFinalDynamicPartitionDestination(Table table, Map<String, String> partKVs, OutputJobInfo jobInfo) { Path partPath = new Path(table.getTTable().getSd().getLocation()); if (!customDynamicLocationUsed) { // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA for (FieldSchema partKey : table.getPartitionKeys()) { partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); } return partPath.toString(); } else { // if custom root specified, update the parent path if (jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) { partPath = new Path(partPath, jobInfo.getCustomDynamicRoot()); } return new Path(partPath, HCatFileUtil.resolveCustomPath(jobInfo, partKVs, false)).toString(); } }
private void validateTable() throws InvalidTable, ConnectionError { try { tableObject = new Table(getMSC().getTable(database, table)); } catch (Exception e) { LOG.warn("Unable to validate the table for connection: " + toConnectionInfoString(), e); throw new InvalidTable(database, table, e); } // 1 - check that the table is Acid if (!AcidUtils.isFullAcidTable(tableObject)) { LOG.error("HiveEndPoint " + this + " must use an acid table"); throw new InvalidTable(database, table, "is not an Acid table"); } if (tableObject.getPartitionKeys() != null && !tableObject.getPartitionKeys().isEmpty()) { setPartitionedTable(true); } else { setPartitionedTable(false); } // partition values are specified on non-partitioned table if (!isPartitionedTable() && (staticPartitionValues != null && !staticPartitionValues.isEmpty())) { // Invalid if table is not partitioned, but endPoint's partitionVals is not empty String errMsg = this.toString() + " specifies partitions for un-partitioned table"; LOG.error(errMsg); throw new ConnectionError(errMsg); } }
/** * Generate the statement of SELECT compute_stats(col1) compute_stats(col2),..., * similar to the one generated from ANALYZE TABLE t1 COMPUTE STATISTICS FOR COLUMNS, * but t1 is replaced by a TABLE(VALUES(cast(null as int),cast(null as string))) AS t1(col1,col2). * * We use TABLE-VALUES statement for computing stats for CTAS statement because in those cases * the table has not been created yet. Once the plan for the SELECT statement is generated, * we connect it to the existing CTAS plan as we do for INSERT or INSERT OVERWRITE. */ public void insertTableValuesAnalyzePipeline() throws SemanticException { // Instead of starting from analyze statement, we just generate the Select plan boolean isPartitionStats = conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned(); if (isPartitionStats) { partSpec = new HashMap<>(); List<String> partKeys = Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()); for (String partKey : partKeys) { partSpec.put(partKey, null); } } String command = ColumnStatsSemanticAnalyzer.genRewrittenQuery( tbl, Utilities.getColumnNamesFromFieldSchema(tbl.getCols()), conf, partSpec, isPartitionStats, true); insertAnalyzePipeline(command, true); }
public HiveWorkUnit(HiveDataset hiveDataset, Partition partition) { this(hiveDataset); setPartitionName(partition.getName()); setPartitionLocation(partition.getLocation()); setPartitionKeys(partition.getTable().getPartitionKeys()); }
public Properties getSchemaFromTableSchema(Properties tblSchema) { return MetaStoreUtils.getPartSchemaFromTableSchema(tPartition.getSd(), table.getTTable().getSd(), tPartition.getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys(), tblSchema); }
@Test public void testDroppedPartitions() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus .setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015-01", 100l, "2015-02", 101l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Table table = mockTable("test_dataset_urn"); Mockito.when(table.getPartitionKeys()).thenReturn(ImmutableList.of(new FieldSchema("year", "string", ""))); Partition partition2015 = mockPartition(table, ImmutableList.of("2015")); // partition 2015 replaces 2015-01 and 2015-02 Mockito.when(partition2015.getParameters()).thenReturn( ImmutableMap.of(AbstractAvroToOrcConverter.REPLACED_PARTITIONS_HIVE_METASTORE_KEY, "2015-01|2015-02")); watermarker.onPartitionProcessBegin(partition2015, 0l, 0l); Assert.assertEquals(watermarker.getExpectedHighWatermarks().get("db@test_dataset_urn"), ImmutableMap.of("2015", 0l)); }
private boolean createDynPartSpec(ASTNode ast) { if(ast.getToken().getType() != HiveParser.TOK_CREATETABLE && ast.getToken().getType() != HiveParser.TOK_CREATE_MATERIALIZED_VIEW && ast.getToken().getType() != HiveParser.TOK_ALTER_MATERIALIZED_VIEW && tableHandle.getPartitionKeys().size() > 0 && (ast.getParent() != null && (ast.getParent().getType() == HiveParser.TOK_INSERT_INTO || ast.getParent().getType() == HiveParser.TOK_INSERT) || ast.getParent().getType() == HiveParser.TOK_DESTINATION || ast.getParent().getType() == HiveParser.TOK_ANALYZE)) { return true; } return false; } public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec,
public PartitionDesc(final Partition part, final TableDesc tableDesc) throws HiveException { PartitionDescConstructorHelper(part, tableDesc, true); if (Utilities.isInputFileFormatSelfDescribing(this)) { // if IF is self describing no need to send column info per partition, since its not used anyway. Table tbl = part.getTable(); setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys())); } else { setProperties(part.getMetadataFromPartitionSchema()); } }
private DynamicPartitionCtx checkDynPart(QB qb, QBMetaData qbm, Table dest_tab, Map<String, String> partSpec, String dest) throws SemanticException { List<FieldSchema> parts = dest_tab.getPartitionKeys(); if (parts == null || parts.isEmpty()) { return null; // table is not partitioned } if (partSpec == null || partSpec.size() == 0) { // user did NOT specify partition throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg())); } DynamicPartitionCtx dpCtx = qbm.getDPCtx(dest); if (dpCtx == null) { dest_tab.validatePartColumnNames(partSpec, false); dpCtx = new DynamicPartitionCtx(partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); qbm.setDPCtx(dest, dpCtx); } if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); } if ((dest_tab.getNumBuckets() > 0)) { dpCtx.setNumBuckets(dest_tab.getNumBuckets()); } return dpCtx; }
public PartitionDesc(final Partition part) throws HiveException { PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true); if (Utilities.isInputFileFormatSelfDescribing(this)) { // if IF is self describing no need to send column info per partition, since its not used anyway. Table tbl = part.getTable(); setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(), part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys())); } else { setProperties(part.getMetadataFromPartitionSchema()); } }