static Field fromPb(TableFieldSchema fieldSchemaPb) { Builder fieldBuilder = new Builder(); fieldBuilder.setName(fieldSchemaPb.getName()); if (fieldSchemaPb.getMode() != null) { fieldBuilder.setMode(Mode.valueOf(fieldSchemaPb.getMode())); } if (fieldSchemaPb.getDescription() != null) { fieldBuilder.setDescription(fieldSchemaPb.getDescription()); } FieldList subFields = fieldSchemaPb.getFields() != null ? FieldList.of(Lists.transform(fieldSchemaPb.getFields(), FROM_PB_FUNCTION)) : null; fieldBuilder.setType(LegacySQLTypeName.valueOf(fieldSchemaPb.getType()), subFields); return fieldBuilder.build(); } }
/** @see TableFieldSchema#getName() */ public String getName() { return fieldSchema.getName(); }
private void appendInsert(StringBuilder builder, List<TableFieldSchema> fieldsSchema) { builder.append("insert into ${destinationTable}("); for(int i = 0; i< fieldsSchema.size(); i++) { builder.append(fieldsSchema.get(i).getName()); if(i!=(fieldsSchema.size()-1)) { builder.append(","); } } builder.append(" ) \n"); }
private void appendSelect(StringBuilder builder, List<TableFieldSchema> fieldsSchema) { builder.append("Select "); for(int i = 0; i< fieldsSchema.size(); i++) { builder.append("output." + fieldsSchema.get(i).getName()); if(i!=(fieldsSchema.size()-1)) { builder.append(","); } } builder.append(" from( \n"); }
/** * Validates that a BigQuery column in Avro format can be used as a valid Datastore Entity key * name. */ public static void validateKeyColumn(TableFieldSchema column, Object columnValue) throws IllegalArgumentException { // Entity key name must be different than null if (columnValue == null) { throw new IllegalArgumentException( String.format( "Column [%s] with NULL value cannot be set as Entity name.", column.getName())); } // Entity key names cannot exceed 1500 bytes, the maximum size of an idex-able Datastore // string property: https://cloud.google.com/datastore/docs/concepts/limits if (column.getType().equals("STRING") && columnValue.toString().getBytes().length > MAX_STRING_SIZE_BYTES) { throw new IllegalArgumentException( String.format( "Column [%s] exceeding %d bytes cannot be set as Entity name.", column.getName(), MAX_STRING_SIZE_BYTES)); } // BigQuery column type must be among the supported ones (ex: cannot be RECORD) if (!SUPPORTED_KEY_NAME_TYPES.contains(column.getType())) { throw new IllegalArgumentException( String.format( "Column [%s] of type %s cannot be set as Entity name.", column.getName(), column.getType())); } }
public void writeField(StringBuilder builder, TableFieldSchema fieldSchema) { if (fieldSchema.getMode().equals("REPEATED") && fieldSchema.getType().equals("RECORD")) { builder.append(fieldSchema.getName()+ " " + "ARRAY<STRUCT<"); writeOutputSchema(builder, fieldSchema.getFields()); builder.append(">>"); } if(fieldSchema.getMode().equals("REPEATED") && !fieldSchema.getType().equals("RECORD") ) { builder.append(fieldSchema.getName()+ " " + "ARRAY<"); builder.append(fieldSchema.getType()); builder.append(">"); } if(!fieldSchema.getMode().equals("REPEATED") && fieldSchema.getType().equals("RECORD") ) { builder.append(fieldSchema.getName()+ " " + "STRUCT<"); writeOutputSchema(builder, fieldSchema.getFields()); builder.append(">"); } if(!fieldSchema.getMode().equals("REPEATED") && !fieldSchema.getType().equals("RECORD") ) { builder.append(fieldSchema.getName() +" "+ fieldSchema.getType()); } }
@Nullable private static Object convertNullableField( Schema avroSchema, TableFieldSchema fieldSchema, Object v) { // NULLABLE fields are represented as an Avro Union of the corresponding type and "null". verify( avroSchema.getType() == Type.UNION, "Expected Avro schema type UNION, not %s, for BigQuery NULLABLE field %s", avroSchema.getType(), fieldSchema.getName()); List<Schema> unionTypes = avroSchema.getTypes(); verify( unionTypes.size() == 2, "BigQuery NULLABLE field %s should be an Avro UNION of NULL and another type, not %s", fieldSchema.getName(), unionTypes); if (v == null) { return null; } Type firstType = unionTypes.get(0).getType(); if (!firstType.equals(Type.NULL)) { return convertRequiredField(firstType, unionTypes.get(0).getLogicalType(), fieldSchema, v); } return convertRequiredField( unionTypes.get(1).getType(), unionTypes.get(1).getLogicalType(), fieldSchema, v); }
private static TableRow convertGenericRecordToTableRow( GenericRecord record, List<TableFieldSchema> fields) { TableRow row = new TableRow(); for (TableFieldSchema subSchema : fields) { // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the name field // is required, so it may not be null. Field field = record.getSchema().getField(subSchema.getName()); Object convertedValue = getTypedCellValue(field.schema(), subSchema, record.get(field.name())); if (convertedValue != null) { // To match the JSON files exported by BigQuery, do not include null values in the output. row.set(field.name(), convertedValue); } } return row; }
/** * Tries to parse the JSON {@link TableRow} from BigQuery. * * <p>Only supports basic types and arrays. Doesn't support date types. */ public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jsonBqRow) { List<TableFieldSchema> bqFields = bqSchema.getFields(); Map<String, Integer> bqFieldIndices = IntStream.range(0, bqFields.size()) .boxed() .collect(toMap(i -> bqFields.get(i).getName(), i -> i)); List<Object> rawJsonValues = rowSchema .getFields() .stream() .map(field -> bqFieldIndices.get(field.getName())) .map(index -> jsonBqRow.getF().get(index).getV()) .collect(toList()); return IntStream.range(0, rowSchema.getFieldCount()) .boxed() .map(index -> toBeamValue(rowSchema.getField(index).getType(), rawJsonValues.get(index))) .collect(toRow(rowSchema)); }
String fieldName = fieldSchema.getName(); checkArgument( !RESERVED_FIELD_NAMES.contains(fieldName),
private static List<Object> convertRepeatedField( Schema schema, TableFieldSchema fieldSchema, Object v) { Type arrayType = schema.getType(); verify( arrayType == Type.ARRAY, "BigQuery REPEATED field %s should be Avro ARRAY, not %s", fieldSchema.getName(), arrayType); // REPEATED fields are represented as Avro arrays. if (v == null) { // Handle the case of an empty repeated field. return new ArrayList<>(); } @SuppressWarnings("unchecked") List<Object> elements = (List<Object>) v; ArrayList<Object> values = new ArrayList<>(); Type elementType = schema.getElementType().getType(); LogicalType elementLogicalType = schema.getElementType().getLogicalType(); for (Object element : elements) { values.add(convertRequiredField(elementType, elementLogicalType, fieldSchema, element)); } return values; }
private static Field convertField(TableFieldSchema bigQueryField) { Type avroType = BIG_QUERY_TO_AVRO_TYPES.get(bigQueryField.getType()); Schema elementSchema; if (avroType == Type.RECORD) { elementSchema = toGenericAvroSchema(bigQueryField.getName(), bigQueryField.getFields()); } else { elementSchema = Schema.create(avroType); } Schema fieldSchema; if (bigQueryField.getMode() == null || "NULLABLE".equals(bigQueryField.getMode())) { fieldSchema = Schema.createUnion(Schema.create(Type.NULL), elementSchema); } else if ("REQUIRED".equals(bigQueryField.getMode())) { fieldSchema = elementSchema; } else if ("REPEATED".equals(bigQueryField.getMode())) { fieldSchema = Schema.createArray(elementSchema); } else { throw new IllegalArgumentException( String.format("Unknown BigQuery Field Mode: %s", bigQueryField.getMode())); } return new Field( bigQueryField.getName(), fieldSchema, bigQueryField.getDescription(), (Object) null /* Cast to avoid deprecated JsonNode constructor. */); } }
private static Field field(TableFieldSchema field) { return Field.newBuilder(field.getName(), fieldType(field)).setMode(mode(field)).build(); }
@Test public void testToTableSchema_row() { TableSchema schema = toTableSchema(ROW_TYPE); assertThat(schema.getFields().size(), equalTo(1)); TableFieldSchema field = schema.getFields().get(0); assertThat(field.getName(), equalTo("row")); assertThat(field.getType(), equalTo(StandardSQLTypeName.STRUCT.toString())); assertThat(field.getMode(), nullValue()); assertThat(field.getFields(), containsInAnyOrder(ID, VALUE, NAME, TIMESTAMP, VALID)); }
@Test public void testToTableSchema_array_row() { TableSchema schema = toTableSchema(ARRAY_ROW_TYPE); assertThat(schema.getFields().size(), equalTo(1)); TableFieldSchema field = schema.getFields().get(0); assertThat(field.getName(), equalTo("rows")); assertThat(field.getType(), equalTo(StandardSQLTypeName.STRUCT.toString())); assertThat(field.getMode(), equalTo(Mode.REPEATED.toString())); assertThat(field.getFields(), containsInAnyOrder(ID, VALUE, NAME, TIMESTAMP, VALID)); }
static Field fromPb(TableFieldSchema fieldSchemaPb) { Builder fieldBuilder = new Builder(); fieldBuilder.setName(fieldSchemaPb.getName()); if (fieldSchemaPb.getMode() != null) { fieldBuilder.setMode(Mode.valueOf(fieldSchemaPb.getMode())); } if (fieldSchemaPb.getDescription() != null) { fieldBuilder.setDescription(fieldSchemaPb.getDescription()); } FieldList subFields = fieldSchemaPb.getFields() != null ? FieldList.of(Lists.transform(fieldSchemaPb.getFields(), FROM_PB_FUNCTION)) : null; fieldBuilder.setType(LegacySQLTypeName.valueOf(fieldSchemaPb.getType()), subFields); return fieldBuilder.build(); } }
String name = tableFieldSchema.getName(); String bigQueryType = tableFieldSchema.getType();
static Field fromPb(TableFieldSchema fieldSchemaPb) { Builder fieldBuilder = new Builder(); fieldBuilder.name(fieldSchemaPb.getName()); Type.Value enumValue = Type.Value.valueOf(fieldSchemaPb.getType()); if (fieldSchemaPb.getMode() != null) { fieldBuilder.mode(Mode.valueOf(fieldSchemaPb.getMode())); } if (fieldSchemaPb.getDescription() != null) { fieldBuilder.description(fieldSchemaPb.getDescription()); } if (fieldSchemaPb.getFields() != null) { fieldBuilder.type(Type.record(Lists.transform(fieldSchemaPb.getFields(), FROM_PB_FUNCTION))); } else { fieldBuilder.type(new Type(enumValue)); } return fieldBuilder.build(); } }
static Field fromPb(TableFieldSchema fieldSchemaPb) { Builder fieldBuilder = new Builder(); fieldBuilder.name(fieldSchemaPb.getName()); Type.Value enumValue = Type.Value.valueOf(fieldSchemaPb.getType()); if (fieldSchemaPb.getMode() != null) { fieldBuilder.mode(Mode.valueOf(fieldSchemaPb.getMode())); } if (fieldSchemaPb.getDescription() != null) { fieldBuilder.description(fieldSchemaPb.getDescription()); } if (fieldSchemaPb.getFields() != null) { fieldBuilder.type(Type.record(Lists.transform(fieldSchemaPb.getFields(), FROM_PB_FUNCTION))); } else { fieldBuilder.type(new Type(enumValue)); } return fieldBuilder.build(); } }