Refine search
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) .setDescription("The tuple in the bag") .setType(DataType.TUPLE); HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); if (arrayElementFieldSchema.getType() == Type.STRUCT) { bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { ResourceSchema s = new ResourceSchema(); List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()])); bagSubFieldSchemas[0].setSchema(s); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema)) .setSchema(null); // the element type is not a tuple - so no subschema bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); return new ResourceSchema().setFields(bagSubFieldSchemas);
@Override public void putNext(Tuple t) throws IOException { ResourceFieldSchema[] fieldSchemas = (schema == null) ? null : schema.getFields(); PhoenixRecordWritable record = new PhoenixRecordWritable(this.columnInfo); try { for(int i=0; i<t.size(); i++) { Object value = t.get(i); if(value == null) { record.add(null); continue; } ColumnInfo cinfo = this.columnInfo.get(i); byte type = (fieldSchemas == null) ? DataType.findType(value) : fieldSchemas[i].getType(); PDataType pDataType = PDataType.fromTypeId(cinfo.getSqlType()); Object v = TypeUtil.castPigTypeToPhoenix(value, type, pDataType); record.add(v); } this.writer.write(null, record); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } catch (SQLException e) { LOG.error("Error on tuple {} .",t); throw new IOException(e); } }
if (target instanceof PigTuple) { PigTuple pt = (PigTuple) target; ResourceFieldSchema[] fields = pt.getSchema().getSchema().getFields(); if (fieldName.equals(field.getName())) { foundField = true; byte type = field.getType(); try { Object object = pt.getTuple().get(i); if (DataType.isAtomic(type)) { target = object.toString(); PigTuple rpt = new PigTuple(field.getSchema()); if (object instanceof PigTuple) { rpt.setTuple(((PigTuple) object).getTuple()); Assert.isTrue(false, String.format("Unsupported data type [%s] for field [%s]; use only 'primitives' or 'tuples'", DataType.findTypeName(type), fieldName));
if (serializedSchema == null) return tup; try { schema = new ResourceSchema(Utils.getSchemaFromString(serializedSchema)); } catch (ParserException e) { mLog.error("Unable to parse serialized schema " + serializedSchema, e); ResourceFieldSchema[] fieldSchemas = schema.getFields(); int tupleIdx = 0; if (tupleIdx >= tup.size()) { tup.append(null); if(tup.get(tupleIdx) != null){ byte[] bytes = ((DataByteArray) tup.get(tupleIdx)).get(); val = CastUtils.convertToType(caster, bytes, fieldSchemas[i], fieldSchemas[i].getType()); tup.set(tupleIdx, val);
when(record.getResultMap()).thenReturn(values); ResourceFieldSchema field = new ResourceFieldSchema().setType(DataType.CHARARRAY); ResourceFieldSchema field1 = new ResourceFieldSchema().setType(DataType.INTEGER); ResourceFieldSchema field2 = new ResourceFieldSchema().setType(DataType.LONG); ResourceFieldSchema field3 = new ResourceFieldSchema().setType(DataType.BYTEARRAY); ResourceFieldSchema field4 = new ResourceFieldSchema().setType(DataType.TUPLE); ResourceFieldSchema[] projectedColumns = { field, field1, field2, field3 , field4 }; assertEquals(DataType.LONG, DataType.findType(t.get(2))); assertEquals(DataType.TUPLE, DataType.findType(t.get(4))); Tuple doubleArrayTuple = (Tuple)t.get(4); assertEquals(2,doubleArrayTuple.size()); field = new ResourceFieldSchema().setType(DataType.BIGDECIMAL); field1 = new ResourceFieldSchema().setType(DataType.BIGINTEGER); values.clear(); values.put("first", new BigDecimal(123123123.123213)); assertEquals(DataType.BIGDECIMAL, DataType.findType(t.get(0))); assertEquals(DataType.BIGINTEGER, DataType.findType(t.get(1)));
ResourceSchema tupleSchema = schema.getSchema(); ResourceFieldSchema[] fields = tupleSchema.getFields(); Assert.isTrue(fields.length == 1, "When using JSON input, only one field is expected"); object = pt.getTuple().get(0); type = pt.getTuple().getType(0); } catch (Exception ex) { throw new EsHadoopIllegalStateException("Encountered exception while processing tuple", ex);
Object object = entry.getValue(); if (object == null) { tuple.set(i++, null); continue; switch (fieldSchema.getType()) { case DataType.BYTEARRAY: byte[] bytes = PDataType.fromTypeId(PBinary.INSTANCE.getSqlType()).toBytes(object); tuple.set(i, new DataByteArray(bytes, 0, bytes.length)); break; case DataType.CHARARRAY: tuple.set(i, DataType.toString(object)); break; case DataType.DOUBLE: tuple.set(i, DataType.toDouble(object)); break; case DataType.FLOAT: tuple.set(i, DataType.toFloat(object)); break; case DataType.INTEGER:
/** * Checks to see if the given field is a schema-less Map that has values. * @return true if Map has no schema but has values (mixed schema map). false if not a Map or if Map is just empty. */ private boolean isPopulatedMixedValueMap(ResourceFieldSchema schema, int field, Tuple object) { if (schema.getType() != DataType.MAP) { // Can't be a mixed value map if it's not a map at all. return false; } try { Object fieldValue = object.get(field); Map<?, ?> map = (Map<?, ?>) fieldValue; return schema.getSchema() == null && !(map == null || map.isEmpty()); } catch (ExecException e) { throw new EsHadoopIllegalStateException(e); } }
json.writeNullField(field.getName()); return; switch (field.getType()) { case DataType.BOOLEAN: json.writeBooleanField(field.getName(), (Boolean)d); return; json.writeNumberField(field.getName(), (Integer)d); return; json.writeNumberField(field.getName(), (Long)d); return; json.writeNumberField(field.getName(), (Float)d); return; json.writeNumberField(field.getName(), (Double)d); return; json.writeStringField(field.getName(), d.toString()); return; json.writeStringField(field.getName(), d.toString()); return; json.writeStringField(field.getName(), (String)d); return;
private Result writeTuple(Object object, ResourceFieldSchema field, Generator generator, boolean writeTupleFieldNames, boolean isRoot) { ResourceSchema nestedSchema = field.getSchema(); for (ResourceFieldSchema nestedField : nestedSchema.getFields()) { allEmpty = (nestedField.getSchema() == null && !isPopulatedMixedValueMap(nestedField, currentField, currentTuple) && PigUtils.isComplexType(nestedField)); ResourceFieldSchema[] nestedFields = nestedSchema.getFields(); List<Object> tuples = ((Tuple) object).getAll(); String name = nestedFields[i].getName();
warn("Early termination of record, expected " + schema.getFields().length + " fields bug found " + fieldnum, PigWarning.UDF_WARNING_1); return null; switch (field.getType()) { case DataType.MAP: ResourceSchema s = field.getSchema(); ResourceFieldSchema[] fs = s.getFields(); Tuple t = tupleFactory.newTuple(fs.length); t.set(j, readField(p, fs[j], j)); s = field.getSchema(); fs = s.getFields(); s = fs[0].getSchema(); fs = s.getFields(); if (innerTok == JsonToken.START_OBJECT) { for (int j = 0; j < fs.length; j++) { t.set(j, readField(p, fs[j], j)); t.set(0, readPrimitive(p, innerTok, fs[0])); bag.add(t);
@Test public void testSchema() throws SQLException, IOException { final Configuration configuration = mock(Configuration.class); when(configuration.get(PhoenixConfigurationUtil.SCHEMA_TYPE)).thenReturn(SchemaType.TABLE.name()); final ResourceSchema actual = PhoenixPigSchemaUtil.getResourceSchema( configuration, new Dependencies() { List<ColumnInfo> getSelectColumnMetadataList( Configuration configuration) throws SQLException { return Lists.newArrayList(ID_COLUMN, NAME_COLUMN); } }); // expected schema. final ResourceFieldSchema[] fields = new ResourceFieldSchema[2]; fields[0] = new ResourceFieldSchema().setName("ID") .setType(DataType.LONG); fields[1] = new ResourceFieldSchema().setName("NAME") .setType(DataType.CHARARRAY); final ResourceSchema expected = new ResourceSchema().setFields(fields); assertEquals(expected.toString(), actual.toString()); }
@Override public void checkStoreSchema(ResourceFieldSchema schema) throws IOException { Preconditions.checkNotNull(schema); if (schema.getType() != DataType.BYTEARRAY) throw new IOException("Expected Pig type '" + DataType.findTypeName(DataType.BYTEARRAY) + "' but found '" + DataType.findTypeName(schema.getType()) + "'"); }
case DataType.BAG: Schema innerBagSchema = resourceSchemaToAvroSchema( schema.getFields()[0].getSchema(), name, null, definedRecordNames, doubleColonsToDoubleUnderscores); throw new IOException("AvroStorage can't save maps with untyped values; please specify a value type or a schema."); byte innerType = schema.getFields()[0].getType(); String desc = schema.getFields()[0].getDescription(); if (desc != null) { if (desc.equals("autogenerated from Pig Field Schema")) { if (DataType.isComplex(innerType)) { innerSchema = createNullableUnion( Schema.createMap(resourceSchemaToAvroSchema( schema.getFields()[0].getSchema(), name, nameSpace, definedRecordNames, doubleColonsToDoubleUnderscores))); throw new IOException( "Don't know how to encode type " + DataType.findTypeName(type) + " in schema " + ((schema == null) ? "" : schema.toString()) + "\n");
/** * Set the schema for data to be stored. This will be called on the * front end during planning if the store is associated with a schema. * A Store function should implement this function to * check that a given schema is acceptable to it. For example, it * can check that the correct partition keys are included; * a storage function to be written directly to an OutputFormat can * make sure the schema will translate in a well defined way. Default implementation * is a no-op. * * @param s to be checked * @throws java.io.IOException if this schema is not acceptable. It should include * a detailed error message indicating what is wrong with the schema. */ @Override public void checkSchema(ResourceSchema s) throws IOException { if (s.getFields()[1].getType() != DataType.BYTEARRAY || s.getFields()[0].getType() == DataType.BYTEARRAY) { throw new InvalidOutputSchema(String.format("Want a key with a string format and binary model for model output but got %s and %s", DataType.findTypeName(s.getFields()[0].getType()), DataType.findTypeName(s.getFields()[1].getType()))); } }
if (fieldSchema.getSchema()!=null && fieldSchema.getSchema().getFields().length!=0) { ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields(); delimit = ')'; if (DataType.isComplex(fs.getType())) { field = consumeComplexType(in, fs); while ((buf=in.read())!=delimit) { t.append(field); t.append(value); break; t.append(value); mOut.reset();
private String getReqiredColumnNamesString(ResourceSchema schema) { StringBuilder sb = new StringBuilder(); for (ResourceFieldSchema field : schema.getFields()) { sb.append(field.getName()).append(","); } if(sb.charAt(sb.length() -1) == ',') { sb.deleteCharAt(sb.length() - 1); } return sb.toString(); }
private Result write(Object object, ResourceFieldSchema field, Generator generator) { byte type = (field != null ? field.getType() : DataType.findType(object)); throw new EsHadoopSerializationException("Big decimals are not supported by Elasticsearch - consider using a different type (such as string)"); case DataType.MAP: ResourceSchema nestedSchema = field.getSchema(); ResourceFieldSchema valueType = (nestedSchema != null ? nestedSchema.getFields()[0] : null); nestedSchema = field.getSchema(); ResourceFieldSchema bagType = nestedSchema.getFields()[0];
switch (field.getType()) { case DataType.INTEGER: if (StringUtils.isBlank(val)) { tuple.set(fieldIndex, null); } else { try { tuple.set(fieldIndex, Integer.parseInt(val)); } catch (NumberFormatException nfe1) { throw new NumberFormatException("Error while trying to parse " + val + " into an Integer for field [fieldindex= " + fieldIndex + "] " + field.getName() + "\n" + value.toString()); tuple.set(fieldIndex, Long.parseLong(val)); } catch (NumberFormatException nfe2) { throw new NumberFormatException("Error while trying to parse " + val + " into a Long for field " + field.getName() + "\n" + value.toString()); tuple.set(fieldIndex, new BigDecimal(val)); } catch (NumberFormatException nfe2) { throw new NumberFormatException("Error while trying to parse " + val + " into a BigDecimal for field " + field.getName() + "\n" + value.toString()); if ("event_list".equals(field.getName())) { DataBag bag = bagFactory.newDefaultBag(); String[] events = val.split(","); throw new IOException("Can not process bags for the field " + field.getName() + ". Can only process for the event_list field."); throw new IOException("Unexpected or unknown type in input schema (Omniture fields should be int, chararray or long): " + field.getType());
private void init(Schema inputSchema, GenericUDTF udtf, ConstantObjectInspectInfo constantsInfo) throws IOException { ResourceSchema rs = new ResourceSchema(inputSchema); ResourceFieldSchema wrappedTupleFieldSchema = new ResourceFieldSchema(); wrappedTupleFieldSchema.setType(DataType.TUPLE); wrappedTupleFieldSchema.setSchema(rs); TypeInfo ti = HiveUtils.getTypeInfo(wrappedTupleFieldSchema); inputObjectInspector = (StructObjectInspector)HiveUtils.createObjectInspector(ti); if (constantsInfo!=null) { constantsInfo.injectConstantObjectInspector(inputObjectInspector); } try { outputObjectInspector = udtf.initialize(inputObjectInspector); } catch (Exception e) { throw new IOException(e); } } }