/** * Returns the null order in the key columns. * * @return null, which means default for all key columns, or a String * of the same length as key columns, that consists of only "a" * (null first) and "z" (null last). */ @Explain(displayName = "null sort order", explainLevels = { Level.EXTENDED }) public String getNullOrder() { return keySerializeInfo.getProperties().getProperty( org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_SORT_ORDER); }
@Override public Object clone() { TableDesc ret = new TableDesc(); ret.setInputFileFormatClass(inputFileFormatClass); ret.setOutputFileFormatClass(outputFileFormatClass); Properties newProp = new Properties(); Enumeration<Object> keysProp = properties.keys(); while (keysProp.hasMoreElements()) { Object key = keysProp.nextElement(); newProp.put(key, properties.get(key)); } ret.setProperties(newProp); if (jobProperties != null) { ret.jobProperties = new LinkedHashMap<String, String>(jobProperties); } return ret; }
/** * Copies the storage handler properties configured for a table descriptor to a runtime job * configuration. This differs from {@link #copyTablePropertiesToConf(org.apache.hadoop.hive.ql.plan.TableDesc, org.apache.hadoop.mapred.JobConf)} * in that it does not allow parameters already set in the job to override the values from the * table. This is important for setting the config up for reading, * as the job may already have values in it from another table. * @param tbl * @param job */ public static void copyTablePropertiesToConf(TableDesc tbl, JobConf job) throws HiveException { Properties tblProperties = tbl.getProperties(); for(String name: tblProperties.stringPropertyNames()) { String val = (String) tblProperties.get(name); if (val != null) { job.set(name, StringEscapeUtils.escapeJava(val)); } } Map<String, String> jobProperties = tbl.getJobProperties(); if (jobProperties != null) { for (Map.Entry<String, String> entry : jobProperties.entrySet()) { job.set(entry.getKey(), entry.getValue()); } } }
public LoadTableDesc(final Path sourcePath, final TableDesc table, final Map<String, String> partitionSpec, final LoadFileType loadFileType, final AcidUtils.Operation writeType, Long currentWriteId) { super(sourcePath, writeType); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("creating part LTD from " + sourcePath + " to " + ((table.getProperties() == null) ? "null" : table.getTableName())); } init(table, partitionSpec, loadFileType, currentWriteId); }
@Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); TableDesc tbl = this.getConf().getTbl(); try { Deserializer serde = tbl.getDeserializerClass().newInstance(); SerDeUtils.initializeSerDe(serde, hconf, tbl.getProperties(), null); this.outputObjInspector = serde.getObjectInspector(); } catch (Exception e) { LOG.error("Generating output obj inspector from dummy object error", e); e.printStackTrace(); } }
HiveUtils.getStorageHandler( Hive.get().getConf(), tableDesc.getProperties().getProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE)); if (storageHandler != null) { Map<String, String> jobProperties = new LinkedHashMap<String, String>(); Map<String, String> jobSecrets = new LinkedHashMap<String, String>(); if(input) { try { tableDesc.setJobProperties(jobProperties); tableDesc.setJobSecrets(jobSecrets);
Configuration conf = new Configuration(); Path nonExistentPath1 = new Path(UUID.randomUUID().toString()); Path nonExistentPath2 = new Path(UUID.randomUUID().toString()); when(mockTableDesc.isNonNative()).thenReturn(false); when(mockTableDesc.getProperties()).thenReturn(new Properties()); when(mockPartitionDesc.getProperties()).thenReturn(new Properties()); when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc); doReturn(HiveSequenceFileOutputFormat.class).when( mockPartitionDesc).getOutputFileFormatClass(); Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR)); List<Path> inputPaths1 = Utilities.getInputPaths(jobConf, mapWork1, scratchDir, mock(Context.class), false); inputPaths.addAll(inputPaths1); assertFalse(nonExistentPath1.getFileSystem(conf).exists(nonExistentPath1)); List<Path> inputPaths2 = Utilities.getInputPaths(jobConf, mapWork2, scratchDir, mock(Context.class), false); inputPaths.addAll(inputPaths2);
props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), IgnoreKeyTextOutputFormat.class,props); if (tableDesc.getJobProperties() == null) tableDesc.setJobProperties(new HashMap<String, String>()); for (Map.Entry<String, String> el : conf) { tableDesc.getJobProperties().put(el.getKey(), el.getValue()); Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, outputJobInfo.getDatabaseName()+ "." + outputJobInfo.getTableName()); tableDesc.getJobProperties().put( HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); jobProperties); Map<String, String> tableJobProperties = tableDesc.getJobProperties(); if (tableJobProperties != null) { if (tableJobProperties.containsKey(HCatConstants.HCAT_KEY_OUTPUT_INFO)) {
@BeforeClass public static void classSetup() { Properties properties = new Properties(); properties.setProperty(serdeConstants.SERIALIZATION_LIB, TFSOSerDe.class.getName()); properties.setProperty(hive_metastoreConstants.META_TABLE_NAME, "tfs"); nonAcidTableDescriptor = new TableDesc(TFSOInputFormat.class, TFSOOutputFormat.class, properties); properties.setProperty(serdeConstants.LIST_COLUMNS,"data"); properties = new Properties(properties); properties.setProperty(hive_metastoreConstants.BUCKET_COUNT, "1"); acidTableDescriptor = new TableDesc(TFSOInputFormat.class, TFSOOutputFormat.class, properties); tmpdir = new File(System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") + "testFileSinkOperator"); tmpdir.mkdir(); tmpdir.deleteOnExit(); }
Runtime.getRuntime().availableProcessors() * 2); MapWork mapWork = new MapWork(); Path testTablePath = new Path("testTable"); Path[] testPartitionsPaths = new Path[numPartitions]; TableDesc mockTableDesc = mock(TableDesc.class); when(mockTableDesc.isNonNative()).thenReturn(false); when(mockTableDesc.getProperties()).thenReturn(new Properties()); when(mockPartitionDesc.getProperties()).thenReturn(new Properties()); when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc); doReturn(HiveSequenceFileOutputFormat.class).when( testPartitionsPaths[i] = new Path(testTablePath, "p=" + i); mapWork.getPathToAliases().put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName)); mapWork.getAliasToWork().put(testPartitionName, (Operator<?>) mock(Operator.class)); mapWork.getPathToPartitionInfo().put(testPartitionsPaths[i], mockPartitionDesc); List<Path> inputPaths = Utilities.getInputPaths(jobConf, mapWork, new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR)), mock(Context.class), false); assertEquals(inputPaths.size(), numPartitions);
private boolean checkVectorizerSupportedTypes(boolean hasLlap) { for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : aliasToWork.entrySet()) { final String alias = entry.getKey(); Operator<? extends OperatorDesc> op = entry.getValue(); PartitionDesc partitionDesc = aliasToPartnInfo.get(alias); if (op instanceof TableScanOperator && partitionDesc != null && partitionDesc.getTableDesc() != null) { final TableScanOperator tsOp = (TableScanOperator) op; final List<String> readColumnNames = tsOp.getNeededColumns(); final Properties props = partitionDesc.getTableDesc().getProperties(); final List<TypeInfo> typeInfos = TypeInfoUtils.getTypeInfosFromTypeString( props.getProperty(serdeConstants.LIST_COLUMN_TYPES)); final List<String> allColumnTypes = TypeInfoUtils.getTypeStringsFromTypeInfo(typeInfos); final List<String> allColumnNames = Utilities.getColumnNames(props); hasLlap = Utilities.checkVectorizerSupportedTypes(readColumnNames, allColumnNames, allColumnTypes); } } return hasLlap; }
@SuppressWarnings({"unchecked", "rawtypes"}) protected void configureJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { Properties tableProperties = tableDesc.getProperties(); tableProperties.getProperty(PhoenixStorageHandlerConstants .HBASE_INPUT_FORMAT_CLASS); tableDesc.setInputFileFormatClass((Class<? extends InputFormat>) inputFormatClass); String tableName = tableProperties.getProperty(PhoenixStorageHandlerConstants .PHOENIX_TABLE_NAME); if (tableName == null) { tableName = tableDesc.getTableName(); tableProperties.setProperty(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME, tableName);
private ContentSummary runTestGetInputSummary(JobConf jobConf, Properties properties, int numOfPartitions, int bytesPerFile, Class<? extends InputFormat> inputFormatClass) throws IOException { SessionState.start(new HiveConf()); LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>(); LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>(); TableScanOperator scanOp = new TableScanOperator(); PartitionDesc partitionDesc = new PartitionDesc(new TableDesc(inputFormatClass, null, properties), null); Path testTablePath = new Path(testTableName); Path[] testPartitionsPaths = new Path[numOfPartitions]; for (int i=0; i<numOfPartitions; i++) { String testPartitionName = "p=" + 1; testPartitionsPaths[i] = new Path(testTablePath, "p=" + i); pathToPartitionInfo.put(testPartitionsPaths[i], partitionDesc); pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName)); FSDataOutputStream out = fs.create(new Path(testPartitionsPaths[i], "test1.txt")); out.write(data); out.close(); return Utilities.getInputSummary(context, mapWork, null); } finally { if (fs.exists(testTablePath)) {
Properties properties = ret.getProperties(); properties.setProperty( serdeConstants.FIELD_DELIM, directoryDesc.getFieldDelim()); properties.setProperty( serdeConstants.SERIALIZATION_FORMAT, directoryDesc.getFieldDelim()); properties.setProperty( serdeConstants.LINE_DELIM, directoryDesc.getLineDelim()); ret.setInputFileFormatClass(JavaUtils.loadClass(directoryDesc.getInputFormat())); ret.setOutputFileFormatClass(JavaUtils.loadClass(directoryDesc.getOutputFormat()));
public void testAvoidSplitCombination() throws Exception { Configuration conf = new Configuration(); JobConf job = new JobConf(conf); TableDesc tblDesc = Utilities.defaultTd; tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class); PartitionDesc partDesc = new PartitionDesc(tblDesc, null); LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>(); pt.put(new Path("/tmp/testfolder1"), partDesc); pt.put(new Path("/tmp/testfolder2"), partDesc); MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToPartitionInfo(pt); Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive"); Utilities.setMapRedWork(conf, mrwork, mapWorkPath); try { Path[] paths = new Path[2]; paths[0] = new Path("/tmp/testfolder1"); paths[1] = new Path("/tmp/testfolder2"); CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(CombineHiveInputFormat.class, conf); combineInputFormat.pathToPartitionInfo = Utilities.getMapWork(conf).getPathToPartitionInfo(); Set results = combineInputFormat.getNonCombinablePathIndices(job, paths, 2); assertEquals("Should have both path indices in the results set", 2, results.size()); } finally { // Cleanup the mapwork path FileSystem.get(conf).delete(mapWorkPath, true); } }
if (rowLength == null) { LOG.debug("No table property in JobConf. Try to recover the table directly"); Map<String, PartitionDesc> partitionDescMap = Utilities.getMapRedWork(job).getMapWork().getAliasToPartnInfo(); for (String alias : Utilities.getMapRedWork(job).getMapWork().getAliasToPartnInfo().keySet()) { LOG.debug(format("the current alias: %s", alias)); rowLength = partitionDescMap.get(alias).getTableDesc().getProperties().getProperty(TD_ROW_LENGTH); if (rowLength != null) { break; compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(fileSplit.getPath());
private FileSinkOperator createFileSinkOperator(Path finalDirName) { FileSinkOperator fileSinkOperator = mock(FileSinkOperator.class); TableDesc tableDesc = new TableDesc(HiveInputFormat.class, HiveOutputFormat.class, new Properties()); FileSinkDesc fileSinkDesc = new FileSinkDesc(finalDirName, tableDesc, false); fileSinkDesc.setDirName(finalDirName); when(fileSinkOperator.getConf()).thenReturn(fileSinkDesc); when(fileSinkOperator.getSchema()).thenReturn(mock(RowSchema.class)); fileSinkDesc.setTableInfo(tableDesc); when(fileSinkOperator.getCompilationOpContext()).thenReturn(mock(CompilationOpContext.class)); return fileSinkOperator; }
/** * Get footer line count for a table. * * @param table * Table description for target table. * * @param job * Job configuration for current job. */ public static int getFooterCount(TableDesc table, JobConf job) throws IOException { int footerCount; try { footerCount = Integer.parseInt(table.getProperties().getProperty(serdeConstants.FOOTER_COUNT, "0")); if (footerCount > HiveConf.getIntVar(job, HiveConf.ConfVars.HIVE_FILE_MAX_FOOTER)) { throw new IOException("footer number exceeds the limit defined in hive.file.max.footer"); } } catch (NumberFormatException nfe) { // Footer line number must be set as an integer. throw new IOException(nfe); } return footerCount; }
private VectorPartitionContext(PartitionDesc partDesc) { this.partDesc = partDesc; TableDesc td = partDesc.getTableDesc(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition // taking precedence, in the case of partitioned tables Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), partDesc.getProperties()); Map<String, String> partSpec = partDesc.getPartSpec(); tableName = String.valueOf(overlayedProps.getProperty("name")); partName = String.valueOf(partSpec); }
boolean[] cacheIncludes, int allocSize) throws IOException { if (!HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !HiveConf.getBoolVar(jobConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !(sourceIf instanceof TextInputFormat) || !(serDe instanceof LazySimpleSerDe)) { return new DeserializerOrcWriter(serDe, sourceOi, allocSize); Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath); PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null); if (partDesc == null) { return new DeserializerOrcWriter(serDe, sourceOi, allocSize); Properties tblProps = partDesc.getTableDesc().getProperties(); if ("true".equalsIgnoreCase(tblProps.getProperty( serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST))) { LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter due to "