public HCatBaseStorer(String partSpecs, String schema) throws Exception { partitionKeys = new ArrayList<String>(); partitions = new HashMap<String, String>(); if (partSpecs != null && !partSpecs.trim().isEmpty()) { String[] partKVPs = partSpecs.split(","); for (String partKVP : partKVPs) { String[] partKV = partKVP.split("="); if (partKV.length == 2) { String partKey = partKV[0].trim(); partitionKeys.add(partKey); partitions.put(partKey, partKV[1].trim()); } else { throw new FrontendException("Invalid partition column specification. " + partSpecs, PigHCatUtil.PIG_EXCEPTION_CODE); } } } if (schema != null && !schema.trim().isEmpty()) { pigSchema = Utils.getSchemaFromString(schema); } Properties udfProps = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}); onOutOfRange = OOR_VALUE_OPT_VALUES.valueOf(udfProps.getProperty(ON_OORA_VALUE_PROP, getDefaultValue().name())); } static OOR_VALUE_OPT_VALUES getDefaultValue() {
private ResourceSchema createSchema(String schema) { try { return new ResourceSchema(Utils.getSchemaFromString(schema)); } catch (Exception ex) { throw new RuntimeException(ex); } }
private ResourceSchema createSchema(String schema) { try { return new ResourceSchema(Utils.getSchemaFromString(schema)); } catch (Exception ex) { throw new RuntimeException(ex); } }
private ResourceSchema createSchema(String schema) { try { return new ResourceSchema(Utils.getSchemaFromString(schema)); } catch (Exception ex) { throw new RuntimeException(ex); } }
private ResourceSchema createSchema(String schema) { try { return new ResourceSchema(Utils.getSchemaFromString(schema)); } catch (Exception ex) { throw new RuntimeException(ex); } }
@Test public void testProjection() throws Exception { String schemaString = "ES_PARENT: {(parent_name: chararray,parent_value: chararray)}"; Schema schema = Utils.getSchemaFromString(schemaString); System.out.println(PigUtils.asProjection(schema, new Properties())); } }
@Test(expected = Exception.class) public void testLoadingOfBagSchema() throws Exception { assertNotNull(Utils.getSchemaFromString(Utils.getSchemaFromString("name:bytearray,links:{(missing:chararray)}").toString())); }
@Test public void testSchemaSerializationPlusBase64() throws Exception { Schema schemaFromString = Utils.getSchemaFromString("name:bytearray,links:{(missing:chararray)}"); Schema schemaSaved = IOUtils.deserializeFromBase64(IOUtils.serializeToBase64(schemaFromString)); assertEquals(schemaFromString.toString(), schemaSaved.toString()); }
/** * @param schema * @return the schema represented by the string * @throws ParserException if the schema is invalid */ public static Schema schema(String schema) throws ParserException { return Utils.getSchemaFromString(schema); }
@Override public Schema outputSchema(Schema input) { try { return Utils.getSchemaFromString("json: [chararray]"); } catch (ParserException e) { throw new RuntimeException(e); } }
public Schema outputSchema(Schema input) { try { return Utils .getSchemaFromString("PSIInfo:Tuple(columnId : int, psi : double, unitstats : chararray)"); } catch (ParserException e) { log.debug("Error when generating output schema.", e); // just ignore return null; } } }
@Override public Schema outputSchema(Schema input) { try { return new Schema(Utils.getSchemaFromString("{(match:chararray)}")); } catch (Exception e) { throw new RuntimeException(e); } }
@Override public ResourceSchema getSchema(String filename, Job job) throws IOException { return new ResourceSchema(Utils.getSchemaFromString("bytes : bytearray")); } }
/** * @param pigSchemaString the pig schema to parse * @return the parsed pig schema */ public static Schema parsePigSchema(String pigSchemaString) { try { return pigSchemaString == null ? null : Utils.getSchemaFromString(pigSchemaString); } catch (ParserException e) { throw new SchemaConversionException("could not parse Pig schema: " + pigSchemaString, e); } }
/** * to set the data in a location with a known schema * * @param location "where" to store the tuples * @param schema the schema of the data * @param data the tuples to store * @throws ParserException if schema is invalid */ public void set(String location, String schema, Collection<Tuple> data) throws ParserException { set(location, Utils.getSchemaFromString(schema), data); }
/** * to set the data in a location with a known schema * * @param location "where" to store the tuples * @param schema * @param data the tuples to store * @throws ParserException if schema is invalid */ public void set(String location, String schema, Tuple... data) throws ParserException { set(location, Utils.getSchemaFromString(schema), Arrays.asList(data)); }
private static Schema getInputSchema(String signature) { try { return Utils.getSchemaFromString(getProperties(signature).getProperty("inputSchema")); } catch (ParserException e) { throw new RuntimeException(e); } }
@Override @SuppressWarnings("rawtypes") public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { this.reader = (OmnitureDataFileRecordReader) reader; ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString(this.schema)); fields = schema.getFields(); }
public ResourceSchema getSchema(String location, Job job) throws IOException { // The schema for hit_data.tsv won't change for quite sometime and when it does, this class should be updated ResourceSchema s = new ResourceSchema(Utils.getSchemaFromString(schema)); // Store the schema to our UDF context on the backend (is this really necessary considering it's private static final?) UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); p.setProperty("pig.omnituretextloader.schema", schema); return s; }
private Schema getSchema() { try { final String schemaString = getProperties().getProperty(SCHEMA); if (schemaString == null) { throw new ParquetEncodingException("Can not store relation in Parquet as the schema is unknown"); } return Utils.getSchemaFromString(schemaString); } catch (ParserException e) { throw new ParquetEncodingException("can not get schema from context", e); } }