ResourceFieldSchema[] fields = tupleSchema.getFields(); Assert.isTrue(fields.length == 1, "When using JSON input, only one field is expected");
if (target instanceof PigTuple) { PigTuple pt = (PigTuple) target; ResourceFieldSchema[] fields = pt.getSchema().getSchema().getFields();
@Override public Tuple getNext() throws IOException { try { if(!reader.nextKeyValue()) { return null; } final PhoenixRecordWritable record = reader.getCurrentValue(); if(record == null) { return null; } final Tuple tuple = TypeUtil.transformToTuple(record, schema.getFields()); return tuple; } catch (InterruptedException e) { Thread.currentThread().interrupt(); int errCode = 6018; final String errMsg = "Error while reading input"; throw new ExecException(errMsg, errCode,PigException.REMOTE_ENVIRONMENT, e); } }
@Override public void putNext(Tuple t) throws IOException { ResourceFieldSchema[] fieldSchemas = (schema == null) ? null : schema.getFields(); PhoenixRecord record = new PhoenixRecord(fieldSchemas); for(int i=0; i<t.size(); i++) { record.add(t.get(i)); } try { writer.write(null, record); } catch (InterruptedException e) { throw new RuntimeException(e); } }
@Override public void putNext(Tuple t) throws IOException { ResourceFieldSchema[] fieldSchemas = (schema == null) ? null : schema.getFields(); PhoenixRecordWritable record = new PhoenixRecordWritable(this.columnInfo); try { for(int i=0; i<t.size(); i++) { Object value = t.get(i); if(value == null) { record.add(null); continue; } ColumnInfo cinfo = this.columnInfo.get(i); byte type = (fieldSchemas == null) ? DataType.findType(value) : fieldSchemas[i].getType(); PDataType pDataType = PDataType.fromTypeId(cinfo.getSqlType()); Object v = TypeUtil.castPigTypeToPhoenix(value, type, pDataType); record.add(v); } this.writer.write(null, record); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } catch (SQLException e) { LOG.error("Error on tuple {} .",t); throw new IOException(e); } }
private String getReqiredColumnNamesString(ResourceSchema schema, boolean[] requiredColumns) { StringBuilder sb = new StringBuilder(); ResourceFieldSchema[] fields = schema.getFields(); for (int i = 0; i < requiredColumns.length; i++) { if (requiredColumns[i]) { sb.append(fields[i]).append(","); } } if(sb.charAt(sb.length() - 1) == ',') { sb.deleteCharAt(sb.length() - 1); } return sb.toString(); }
private String getReqiredColumnNamesString(ResourceSchema schema) { StringBuilder sb = new StringBuilder(); for (ResourceFieldSchema field : schema.getFields()) { sb.append(field.getName()).append(","); } if(sb.charAt(sb.length() -1) == ',') { sb.deleteCharAt(sb.length() - 1); } return sb.toString(); }
@Override public void checkSchema(ResourceSchema schema) throws IOException { Preconditions.checkNotNull(schema, "Schema is null"); ResourceFieldSchema[] fields = schema.getFields(); Preconditions.checkNotNull(fields, "Schema fields are undefined"); Preconditions.checkArgument(2 == fields.length, "Expecting 2 schema fields but found %s", fields.length); config.keyConverter.checkStoreSchema(fields[0]); config.valueConverter.checkStoreSchema(fields[1]); }
@Override @SuppressWarnings("rawtypes") public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { this.reader = (OmnitureDataFileRecordReader) reader; ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString(this.schema)); fields = schema.getFields(); }
public ResourceSchema fixSchema(ResourceSchema s){ for (ResourceFieldSchema filed : s.getFields()) { if(filed.getType() == DataType.NULL) filed.setType(DataType.BYTEARRAY); } return s; }
private void validateSchema(ResourceSchema schema) throws IOException { if(type == DataType.BAG && schema != null) { ResourceFieldSchema[] subFields = schema.getFields(); if (subFields.length == 1) { if (subFields[0].type != DataType.TUPLE) { throwInvalidSchemaException(); } } else { throwInvalidSchemaException(); } } }
/*** * Determine the field schema of an ResourceFieldSchema * @param rcFieldSchema the rcFieldSchema we want translated * @return the field schema corresponding to the object * @throws ExecException,FrontendException,SchemaMergeException */ public static Schema.FieldSchema determineFieldSchema(ResourceSchema.ResourceFieldSchema rcFieldSchema) throws ExecException, FrontendException, SchemaMergeException { byte dt = rcFieldSchema.getType(); Iterator<ResourceSchema.ResourceFieldSchema> fieldIter = null; long fieldNum = 0; if (dt == TUPLE || dt == BAG ) { fieldIter = Arrays.asList(rcFieldSchema.getSchema().getFields()).iterator(); fieldNum = rcFieldSchema.getSchema().getFields().length; } return determineFieldSchema(dt, fieldIter, fieldNum, ResourceSchema.ResourceFieldSchema.class); }
/** * Set the schema for data to be stored. This will be called on the * front end during planning if the store is associated with a schema. * A Store function should implement this function to * check that a given schema is acceptable to it. For example, it * can check that the correct partition keys are included; * a storage function to be written directly to an OutputFormat can * make sure the schema will translate in a well defined way. Default implementation * is a no-op. * * @param s to be checked * @throws java.io.IOException if this schema is not acceptable. It should include * a detailed error message indicating what is wrong with the schema. */ @Override public void checkSchema(ResourceSchema s) throws IOException { if (s.getFields()[1].getType() != DataType.BYTEARRAY || s.getFields()[0].getType() == DataType.BYTEARRAY) { throw new InvalidOutputSchema(String.format("Want a key with a string format and binary model for model output but got %s and %s", DataType.findTypeName(s.getFields()[0].getType()), DataType.findTypeName(s.getFields()[1].getType()))); } }
@Override public Tuple getNext() throws IOException { try { if(!reader.nextKeyValue()) { return null; } final PhoenixRecordWritable record = reader.getCurrentValue(); if(record == null) { return null; } final Tuple tuple = TypeUtil.transformToTuple(record, schema.getFields()); return tuple; } catch (InterruptedException e) { Thread.currentThread().interrupt(); int errCode = 6018; final String errMsg = "Error while reading input"; throw new ExecException(errMsg, errCode,PigException.REMOTE_ENVIRONMENT, e); } }
@Override public boolean accept(Path path) { Reader reader; try { reader = OrcFile.createReader(fs, path); ObjectInspector oip = (ObjectInspector)reader.getObjectInspector(); ResourceFieldSchema rs = HiveUtils.getResourceFieldSchema(TypeInfoUtils.getTypeInfoFromObjectInspector(oip)); if (rs.getSchema().getFields().length!=0) { return true; } } catch (IOException e) { throw new RuntimeException(e); } return false; } }
/** * This method adds FieldSchema of 'input source tag/path' as the first * field. This will be called only when PigStorage is invoked with * '-tagFile' or '-tagPath' option and the schema file is present to be * loaded. * * @param schema * @param fieldName * @return ResourceSchema */ public static ResourceSchema getSchemaWithInputSourceTag(ResourceSchema schema, String fieldName) { ResourceFieldSchema[] fieldSchemas = schema.getFields(); ResourceFieldSchema sourceTagSchema = new ResourceFieldSchema(new FieldSchema(fieldName, DataType.CHARARRAY)); ResourceFieldSchema[] fieldSchemasWithSourceTag = new ResourceFieldSchema[fieldSchemas.length + 1]; fieldSchemasWithSourceTag[0] = sourceTagSchema; for(int j = 0; j < fieldSchemas.length; j++) { fieldSchemasWithSourceTag[j + 1] = fieldSchemas[j]; } return schema.setFields(fieldSchemasWithSourceTag); }
@Override public void putNext(Tuple t) throws IOException { ResourceFieldSchema[] fieldSchemas = (schema == null) ? null : schema.getFields(); PhoenixRecordWritable record = new PhoenixRecordWritable(this.columnInfo); try { for(int i=0; i<t.size(); i++) { Object value = t.get(i); if(value == null) { record.add(null); continue; } ColumnInfo cinfo = this.columnInfo.get(i); byte type = (fieldSchemas == null) ? DataType.findType(value) : fieldSchemas[i].getType(); PDataType pDataType = PDataType.fromTypeId(cinfo.getSqlType()); Object v = TypeUtil.castPigTypeToPhoenix(value, type, pDataType); record.add(v); } this.writer.write(null, record); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } catch (SQLException e) { LOG.error("Error on tuple {} .",t); throw new IOException(e); } }
public static Schema getPigSchema(ResourceSchema rSchema) throws FrontendException { if(rSchema == null) { return null; } List<FieldSchema> fsList = new ArrayList<FieldSchema>(); for(ResourceFieldSchema rfs : rSchema.getFields()) { FieldSchema fs = new FieldSchema(rfs.getName(), rfs.getSchema() == null ? null : getPigSchema(rfs.getSchema()), rfs.getType()); if(rfs.getType() == DataType.BAG) { if (fs.schema != null) { // allow partial schema if (fs.schema.size() == 1) { FieldSchema innerFs = fs.schema.getField(0); if (innerFs.type != DataType.TUPLE) { ResourceFieldSchema.throwInvalidSchemaException(); } } else { ResourceFieldSchema.throwInvalidSchemaException(); } } } fsList.add(fs); } return new Schema(fsList); }