@Override protected GroupType build(String name) { Preconditions.checkState(!fields.isEmpty(), "Cannot build an empty group"); return new GroupType(repetition, name, originalType, fields, id); } }
: HDFSMetadataManager.DEFAULT_NUM_METADATA_FILES_TO_RETAIN; Preconditions.checkState(numFilesToRetain > 0, "Number of files to retain cannot be <= 0");
case JSON: case BSON: Preconditions.checkState( primitiveType == PrimitiveTypeName.BINARY, originalType.toString() + " can only annotate binary fields"); break; case DECIMAL: Preconditions.checkState( (primitiveType == PrimitiveTypeName.INT32) || (primitiveType == PrimitiveTypeName.INT64) || ); if (primitiveType == PrimitiveTypeName.INT32) { Preconditions.checkState( meta.getPrecision() <= MAX_PRECISION_INT32, "INT32 cannot store " + meta.getPrecision() + " digits " + "(max " + MAX_PRECISION_INT32 + ")"); } else if (primitiveType == PrimitiveTypeName.INT64) { Preconditions.checkState( meta.getPrecision() <= MAX_PRECISION_INT64, "INT64 cannot store " + meta.getPrecision() + " digits " + "(max " + MAX_PRECISION_INT64 + ")"); } else if (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) { Preconditions.checkState( meta.getPrecision() <= maxPrecision(length), "FIXED(" + length + ") cannot store " + meta.getPrecision() + case INT_16: case INT_32:
@Override public JavaRDD<AvroPayload> getData(@NonNull final ParquetWorkUnitCalculatorResult workUnitCalcResult) { Preconditions.checkState(workUnitCalcResult.hasWorkUnits(), "No work to process for: " + hiveConf.getDataPath()); /** * Current implementation of HiveSource assumes that only a single work unit exists which * corresponds to the single partition that is processed per job. */ final List<String> workUnits = workUnitCalcResult.getWorkUnits(); final String hdfsPath = new Path(this.hiveConf.getDataPath(), workUnits.get(0)).toString(); log.info("Reading data from path: {}", hdfsPath); final Dataset<Row> data = this.sqlContext.read().parquet(hdfsPath); final int numPartitions = calculateHiveNumPartitions(data); log.info("Using {} partitions", numPartitions); final JavaRDD<AvroPayload> hiveRawData = data .coalesce(numPartitions) .javaRDD() .flatMap(row -> { final List<AvroPayload> payloads = new ArrayList<>(); this.converter.convert(row).forEach(d -> payloads.add(d.getSuccessData().get().getData())); return payloads.iterator(); }); return hiveRawData; }