public MapredParquetInputFormat() { this(new ParquetInputFormat<ArrayWritable>(DataWritableReadSupport.class)); }
/** * Create Hadoop job according to arguments from main. */ @Override public synchronized Job createJob(String[] args) throws IOException { Job job = super.createJob(args); // for parquet format job, we have to append parquet schema field. We can only set parquet.pig.schema here // because of 'Job' dependency. While the other two required list parameters are in TrainModelProcessor. @SuppressWarnings("rawtypes") final GlobalMetaData globalMetaData = new ParquetInputFormat().getGlobalMetaData(job); Schema schema = getPigSchemaFromMultipleFiles(globalMetaData.getSchema(), globalMetaData.getKeyValueMetaData()); String schemaStr = pigSchemaToString(schema); job.getConfiguration().set("parquet.pig.schema", schemaStr); return job; }