com.netflix.iceberg.spark.SparkSchemaUtil.convert java code examples

private StructType lazyType() {
 if (type == null) {
  this.type = convert(lazySchema());
 }
 return type;
}

private static Accessor<InternalRow> newAccessor(int p, Type type) {
 switch (type.typeId()) {
  case STRING:
   return new StringAccessor(p, convert(type));
  case DECIMAL:
   return new DecimalAccessor(p, convert(type));
  default:
   return new PositionAccessor(p, convert(type));
 }
}

@Override
public ValueWriter<?> record(Schema record, List<String> names, List<ValueWriter<?>> fields) {
 List<DataType> types = Lists.newArrayList();
 for (Schema.Field field : record.getFields()) {
  types.add(convert(schema.findType(getFieldId(field))));
 }
 return SparkValueWriters.struct(fields, types);
}

@Override
public ValueWriter<?> map(Schema map, ValueWriter<?> valueReader) {
 Type keyType = schema.findType(AvroSchemaUtil.getKeyId(map));
 Type valueType = schema.findType(AvroSchemaUtil.getValueId(map));
 return SparkValueWriters.map(
   SparkValueWriters.strings(), convert(keyType), valueReader, convert(valueType));
}

@Override
public Optional<DataSourceWriter> createWriter(String jobId, StructType dfStruct, SaveMode mode,
                         DataSourceOptions options) {
 Preconditions.checkArgument(mode == SaveMode.Append, "Save mode %s is not supported", mode);
 Table table = findTable(options);
 Schema dfSchema = SparkSchemaUtil.convert(table.schema(), dfStruct);
 List<String> errors = CheckCompatibility.writeCompatibilityErrors(table.schema(), dfSchema);
 if (!errors.isEmpty()) {
  StringBuilder sb = new StringBuilder();
  sb.append("Cannot write incompatible dataframe to table with schema:\n")
    .append(table.schema()).append("\nProblems:");
  for (String error : errors) {
   sb.append("\n* ").append(error);
  }
  throw new IllegalArgumentException(sb.toString());
 }
 Optional<String> formatOption = options.get("iceberg.write.format");
 FileFormat format;
 if (formatOption.isPresent()) {
  format = FileFormat.valueOf(formatOption.get().toUpperCase(Locale.ENGLISH));
 } else {
  format = FileFormat.valueOf(table.properties()
    .getOrDefault(DEFAULT_FILE_FORMAT, DEFAULT_FILE_FORMAT_DEFAULT)
    .toUpperCase(Locale.ENGLISH));
 }
 return Optional.of(new Writer(table, lazyConf(), format));
}

@Override
public ValueWriter<?> array(Schema array, ValueWriter<?> elementWriter) {
 LogicalType logical = array.getLogicalType();
 if (logical != null && "map".equals(logical.getName())) {
  Type keyType = schema.findType(getFieldId(array.getElementType().getField("key")));
  Type valueType = schema.findType(getFieldId(array.getElementType().getField("value")));
  ValueWriter<?>[] writers = ((SparkValueWriters.StructWriter) elementWriter).writers;
  return SparkValueWriters.arrayMap(
    writers[0], convert(keyType), writers[1], convert(valueType));
 }
 Type elementType = schema.findType(AvroSchemaUtil.getElementId(array));
 return SparkValueWriters.array(elementWriter, convert(elementType));
}

private static void assertEqualsUnsafe(Types.ListType list, Collection<?> expected, ArrayData actual) {
 Type elementType = list.elementType();
 List<?> expectedElements = Lists.newArrayList(expected);
 for (int i = 0; i < expectedElements.size(); i += 1) {
  Object expectedValue = expectedElements.get(i);
  Object actualValue = actual.get(i, convert(elementType));
  assertEqualsUnsafe(elementType, expectedValue, actualValue);
 }
}

PartitionRowConverter(Schema partitionSchema, PartitionSpec spec) {
 StructType partitionType = SparkSchemaUtil.convert(partitionSchema);
 StructField[] fields = partitionType.fields();
 this.types = new DataType[fields.length];
 this.positions = new int[types.length];
 this.javaTypes = new Class<?>[types.length];
 this.reusedRow = new GenericInternalRow(types.length);
 List<PartitionField> partitionFields = spec.fields();
 for (int rowIndex = 0; rowIndex < fields.length; rowIndex += 1) {
  this.types[rowIndex] = fields[rowIndex].dataType();
  int sourceId = partitionSchema.columns().get(rowIndex).fieldId();
  for (int specIndex = 0; specIndex < partitionFields.size(); specIndex += 1) {
   PartitionField field = spec.fields().get(specIndex);
   if (field.sourceId() == sourceId && "identity".equals(field.transform().toString())) {
    positions[rowIndex] = specIndex;
    javaTypes[rowIndex] = spec.javaClasses()[specIndex];
    break;
   }
  }
 }
}

private static void assertEqualsUnsafe(Types.MapType map, Map<?, ?> expected, MapData actual) {
 Type keyType = map.keyType();
 Type valueType = map.valueType();
 List<Map.Entry<?, ?>> expectedElements = Lists.newArrayList(expected.entrySet());
 ArrayData actualKeys = actual.keyArray();
 ArrayData actualValues = actual.valueArray();
 for (int i = 0; i < expectedElements.size(); i += 1) {
  Map.Entry<?, ?> expectedPair = expectedElements.get(i);
  Object actualKey = actualKeys.get(i, convert(keyType));
  Object actualValue = actualValues.get(i, convert(keyType));
  assertEqualsUnsafe(keyType, expectedPair.getKey(), actualKey);
  assertEqualsUnsafe(valueType, expectedPair.getValue(), actualValue);
 }
}

private static UnsafeProjection projection(Schema finalSchema, Schema readSchema) {
 StructType struct = convert(readSchema);
 List<AttributeReference> refs = seqAsJavaListConverter(struct.toAttributes()).asJava();
 List<Attribute> attrs = Lists.newArrayListWithExpectedSize(struct.fields().length);
 List<org.apache.spark.sql.catalyst.expressions.Expression> exprs =
   Lists.newArrayListWithExpectedSize(struct.fields().length);
 for (AttributeReference ref : refs) {
  attrs.add(ref.toAttribute());
 }
 for (Types.NestedField field : finalSchema.columns()) {
  int indexInReadSchema = struct.fieldIndex(field.name());
  exprs.add(refs.get(indexInReadSchema));
 }
 return UnsafeProjection.create(
   asScalaBufferConverter(exprs).asScala().toSeq(),
   asScalaBufferConverter(attrs).asScala().toSeq());
}

public static void assertEqualsUnsafe(Types.StructType struct, Record rec, InternalRow row) {
 List<Types.NestedField> fields = struct.fields();
 for (int i = 0; i < fields.size(); i += 1) {
  Type fieldType = fields.get(i).type();
  Object expectedValue = rec.get(i);
  Object actualValue = row.get(i, convert(fieldType));
  assertEqualsUnsafe(fieldType, expectedValue, actualValue);
 }
}

Schema requiredSchema = prune(tableSchema, convert(finalSchema), task.residual());
boolean hasJoinedPartitionColumns = !idColumns.isEmpty();
boolean hasExtraFilterColumns = requiredSchema.columns().size() != finalSchema.columns().size();

return spark.internalCreateDataFrame(JavaRDD.toRDD(rdd), convert(schema), false);

switch (format) {
 case PARQUET:
  String jsonSchema = convert(schema).json();
  return Parquet.write(file)
    .writeSupport(new ParquetWriteSupport())

Javadoc

Convert a Schema to a DataType.

Popular methods of SparkSchemaUtil

identitySpec
prune
Prune columns from a Schema using a StructType projection. This requires that the Spark type is a pr
schemaForTable
Returns a Schema for the given table with fresh field ids. This creates a Schema for an existing tab

Popular in Java

Reactive rest calls using spring rest template
scheduleAtFixedRate (ScheduledExecutorService)
setContentView (Activity)
addToBackStack (FragmentTransaction)
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Github Copilot alternatives

How to use convertmethodin com.netflix.iceberg.spark.SparkSchemaUtil

Best Java code snippets using com.netflix.iceberg.spark.SparkSchemaUtil.convert (Showing top 14 results out of 315)

How to use
convert
method
in
com.netflix.iceberg.spark.SparkSchemaUtil