private void getWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { if (writer == null) { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector) .writingBase(jobConf.getBoolean(IS_MAJOR, false)) .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()) .reporter(reporter) .minimumWriteId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumWriteId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)) .bucket(bucket) .statementId(-1)//setting statementId == -1 makes compacted delta files use .visibilityTxnId(getCompactorTxnId(jobConf)); //delta_xxxx_yyyy format // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }
@Override public void map(WritableComparable key, CompactorInputSplit split, OutputCollector<NullWritable, NullWritable> nullWritableVOutputCollector, Reporter reporter) throws IOException { // This will only get called once, since CompactRecordReader only returns one record, // the input split. // Based on the split we're passed we go instantiate the real reader and then iterate on it // until it finishes. @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidInputFormat<WritableComparable, V> aif = instantiate(AcidInputFormat.class, jobConf.get(INPUT_FORMAT_CLASS_NAME)); ValidTxnList txnList = new ValidReadTxnList(jobConf.get(ValidTxnList.VALID_TXNS_KEY)); boolean isMajor = jobConf.getBoolean(IS_MAJOR, false); AcidInputFormat.RawReader<V> reader = aif.getRawReader(jobConf, isMajor, split.getBucket(), txnList, split.getBaseDir(), split.getDeltaDirs()); RecordIdentifier identifier = reader.createKey(); V value = reader.createValue(); getWriter(reporter, reader.getObjectInspector(), split.getBucket()); while (reader.next(identifier, value)) { if (isMajor && reader.isDelete(value)) continue; writer.write(value); reporter.progress(); } }
RecordIdentifier identifier = reader.createKey(); V value = reader.createValue(); getWriter(reporter, reader.getObjectInspector(), split.getBucket()); getDeleteEventWriter(reporter, reader.getObjectInspector(), split.getBucket());
RecordIdentifier identifier = reader.createKey(); V value = reader.createValue(); getWriter(reporter, reader.getObjectInspector(), split.getBucket()); getDeleteEventWriter(reporter, reader.getObjectInspector(), split.getBucket());
private void getDeleteEventWriter(Reporter reporter, ObjectInspector inspector, int bucket) throws IOException { AcidOutputFormat.Options options = new AcidOutputFormat.Options(jobConf); options.inspector(inspector).writingBase(false) .writingDeleteDelta(true) // this is the option which will make it a delete writer .isCompressed(jobConf.getBoolean(IS_COMPRESSED, false)) .tableProperties(new StringableMap(jobConf.get(TABLE_PROPS)).toProperties()).reporter(reporter) .minimumWriteId(jobConf.getLong(MIN_TXN, Long.MAX_VALUE)) .maximumWriteId(jobConf.getLong(MAX_TXN, Long.MIN_VALUE)).bucket(bucket) .statementId(-1)//setting statementId == -1 makes compacted delta files use // delta_xxxx_yyyy format .visibilityTxnId(getCompactorTxnId(jobConf)); // Instantiate the underlying output format @SuppressWarnings("unchecked")//since there is no way to parametrize instance of Class AcidOutputFormat<WritableComparable, V> aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); deleteEventWriter = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); } }