public PentahoParquetInputFormat() throws Exception { logger.info( "We are initializing parquet input format" ); inClassloader( () -> { ConfigurationProxy conf = new ConfigurationProxy(); job = Job.getInstance( conf ); nativeParquetInputFormat = new ParquetInputFormat<>(); ParquetInputFormat.setReadSupportClass( job, PentahoParquetReadSupport.class ); ParquetInputFormat.setTaskSideMetaData( job, false ); } ); }
conf.setBoolean("parquet.avro.compatible", false); ParquetInputFormat.setReadSupportClass(job, GroupReadSupport.class);
@Override public void sourceConfInit(FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { super.sourceConfInit(fp, tap, jobConf); jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, ThriftReadSupport.class); ThriftReadSupport.setRecordConverterClass(jobConf, TBaseRecordConverter.class); }
@Override public void sourceConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { super.sourceConfInit(fp, tap, jobConf); jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, ThriftReadSupport.class); ThriftReadSupport.setRecordConverterClass(jobConf, TBaseRecordConverter.class); }
@Override public void sourceConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { super.sourceConfInit(fp, tap, jobConf); jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, ThriftReadSupport.class); ThriftReadSupport.setRecordConverterClass(jobConf, TBaseRecordConverter.class); }
@SuppressWarnings("rawtypes") @Override public void sourceConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { if (filterPredicate != null) { ParquetInputFormat.setFilterPredicate(jobConf, filterPredicate); } jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, TupleReadSupport.class); TupleReadSupport.setRequestedFields(jobConf, getSourceFields()); }
@SuppressWarnings("rawtypes") @Override public void sourceConfInit(FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { if (filterPredicate != null) { ParquetInputFormat.setFilterPredicate(jobConf, filterPredicate); } jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, TupleReadSupport.class); TupleReadSupport.setRequestedFields(jobConf, getSourceFields()); }
@SuppressWarnings("rawtypes") @Override public void sourceConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { if (filterPredicate != null) { ParquetInputFormat.setFilterPredicate(jobConf, filterPredicate); } jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, TupleReadSupport.class); TupleReadSupport.setRequestedFields(jobConf, getSourceFields()); }
AvroReadSupport .setAvroReadSchema(jsc.hadoopConfiguration(), (new Schema.Parser().parse(schemaStr))); ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));
AvroReadSupport .setAvroReadSchema(jsc.hadoopConfiguration(), (new Schema.Parser().parse(schemaStr))); ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));
job.setNumReduceTasks(0); job.setInputFormatClass(ParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(job, ParquetReadSupport.class); ParquetInputFormat.setInputPaths(job, inputDir);