/** * Reads the avro schema at the specified location. * @param location Location of file * @param job Hadoop job object * @return an Avro Schema object derived from the specified file * @throws IOException * */ protected final Schema getAvroSchema(final String location, final Job job) throws IOException { String[] locations = getPathStrings(location); Path[] paths = new Path[locations.length]; for (int i = 0; i < paths.length; ++i) { paths[i] = new Path(locations[i]); } return getAvroSchema(paths, job); }
/** * Internal function for getting the Properties object associated with * this UDF instance. * @return The Properties object associated with this UDF instance */ protected final Properties getProperties() { if (udfContextSignature == null) { return getProperties(AvroStorage.class, null); } else { return getProperties(AvroStorage.class, udfContextSignature); } }
@Override public final ResourceSchema getSchema(final String location, final Job job) throws IOException { if (schema == null) { Schema s = getAvroSchema(location, job); setInputAvroSchema(s); } ResourceSchema rs = AvroStorageSchemaConversionUtilities .avroSchemaToResourceSchema(schema, allowRecursive); return rs; }
setInputAvroSchema(s); setOutputAvroSchema(s); } catch (SchemaParseException e) { setInputAvroSchema(s); setOutputAvroSchema(s); } catch (FileNotFoundException fnfe) { System.err.printf("file not found exception\n"); try { Schema s = ((GenericContainer) Class.forName(schemaClass).newInstance()).getSchema(); setInputAvroSchema(s); setOutputAvroSchema(s); } catch (ClassNotFoundException | IllegalAccessException cnfe) { System.err.printf("class not found exception\n"); setOutputAvroSchema( getAvroSchema(configuredOptions.getOptionValue('e'), new Job(new Configuration()))); log.error("Exception in AvroStorage", e); log.error("AvroStorage called with arguments " + sn + ", " + opts); warn("ParseException in AvroStorage", PigWarning.UDF_WARNING_1); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("AvroStorage(',', '[options]')", validOptions); log.warn("Exception in AvroStorage", e); log.warn("AvroStorage called with arguments " + sn + ", " + opts); warn("IOException in AvroStorage", PigWarning.UDF_WARNING_1);
@Override public void setLocation(final String location, final Job job) throws IOException { FileInputFormat.setInputPaths(job, location); if (schema == null) { schema = getInputAvroSchema(); if (schema == null) { schema = getAvroSchema(location, job); if (schema == null) { throw new IOException( "Could not determine avro schema for location " + location); } setInputAvroSchema(schema); } } }
@Override public RequiredFieldResponse pushProjection(final RequiredFieldList rfl) throws FrontendException { requiredFieldList = rfl; Schema newSchema = AvroStorageSchemaConversionUtilities .newSchemaFromRequiredFieldList(schema, rfl); if (newSchema != null) { schema = newSchema; setInputAvroSchema(schema); return new RequiredFieldResponse(true); } else { log.warn("could not select fields subset " + rfl + "\n"); warn("could not select fields subset", PigWarning.UDF_WARNING_2); return new RequiredFieldResponse(false); } }
/** * Helper function reads the input avro schema from the UDF * Properties. * @return The input avro schema */ public final Schema getInputAvroSchema() { if (schema == null) { updateSchemaFromInputAvroSchema(); } return schema; }
@SuppressWarnings({ "unchecked", "rawtypes" }) @Override public final void prepareToWrite(final RecordWriter w) throws IOException { if (this.udfContextSignature == null) throw new IOException(this.getClass().toString() + ".prepareToWrite called without setting udf context signature"); writer = (RecordWriter<NullWritable, Object>) w; ((AvroRecordWriter) writer).prepareToWrite(getOutputAvroSchema()); }
@Override public final void checkSchema(final ResourceSchema rs) throws IOException { if (rs == null) { throw new IOException("checkSchema: called with null ResourceSchema"); } Schema avroSchema = AvroStorageSchemaConversionUtilities .resourceSchemaToAvroSchema(rs, (schemaName == null || schemaName.length() == 0) ? "pig_output" : schemaName, schemaNameSpace, Maps.<String, List<Schema>> newHashMap(), doubleColonsToDoubleUnderscores); if (avroSchema == null) { throw new IOException("checkSchema: could not translate ResourceSchema to Avro Schema"); } setOutputAvroSchema(avroSchema); }
@Override public final void setUDFContextSignature(final String signature) { udfContextSignature = signature; super.setUDFContextSignature(signature); updateSchemaFromInputAvroSchema(); }
/** * Sets the input avro schema to {@s}. * @param s The specified schema */ protected final void setInputAvroSchema(final Schema s) { schema = s; getProperties().setProperty(INPUT_AVRO_SCHEMA, s.toString()); }
/** * Sets the output avro schema to {@s}. * @param s An Avro schema */ protected final void setOutputAvroSchema(final Schema s) { schema = s; getProperties() .setProperty(OUTPUT_AVRO_SCHEMA, s.toString()); }
/** * Utility function that gets the input avro schema from the udf * properties and updates schema for this instance. */ private final void updateSchemaFromInputAvroSchema() { String schemaString = getProperties().getProperty(INPUT_AVRO_SCHEMA); if (schemaString != null) { Schema s = new Schema.Parser().parse(schemaString); schema = s; } }
/** * Utility function that gets the output schema from the udf * properties for this instance of the store function. * @return the output schema associated with this UDF */ protected final Schema getOutputAvroSchema() { if (schema == null) { String schemaString = getProperties() .getProperty(OUTPUT_AVRO_SCHEMA); if (schemaString != null) { schema = (new Schema.Parser()).parse(schemaString); } } return schema; }