@SuppressWarnings("rawtypes") @Test public void testIgnoreFilesWithoutExtension() throws Exception { fs.mkdirs(inputDir); Path avroFile = new Path(inputDir, "somefile.avro"); Path textFile = new Path(inputDir, "someotherfile.txt"); fs.create(avroFile).close(); fs.create(textFile).close(); FileInputFormat.setInputPaths(conf, inputDir); AvroInputFormat inputFormat = new AvroInputFormat(); FileStatus[] statuses = inputFormat.listStatus(conf); Assert.assertEquals(1, statuses.length); Assert.assertEquals("somefile.avro", statuses[0].getPath().getName()); conf.setBoolean(AvroInputFormat.IGNORE_FILES_WITHOUT_EXTENSION_KEY, false); statuses = inputFormat.listStatus(conf); Assert.assertEquals(2, statuses.length); Set<String> names = new HashSet<>(); names.add(statuses[0].getPath().getName()); names.add(statuses[1].getPath().getName()); Assert.assertTrue(names.contains("somefile.avro")); Assert.assertTrue(names.contains("someotherfile.txt")); } }
private JobConf createJobConfig() throws IOException { Path inputPath = new Path(INPUT_PATH); Path outputPath = new Path(OUTPUT_PATH); FileSystem.get(new Configuration()).delete(outputPath, true); JobConf jobConfig = new JobConf(); jobConfig.setInputFormat(AvroInputFormat.class); jobConfig.setOutputFormat(AvroOutputFormat.class); AvroOutputFormat.setOutputPath(jobConfig, outputPath); AvroInputFormat.addInputPath(jobConfig, inputPath); jobConfig.set(AvroJob.OUTPUT_SCHEMA, User.SCHEMA.toString()); jobConfig.set(AvroJob.INPUT_SCHEMA, User.SCHEMA.toString()); return jobConfig; }
/** * Run an avro hadoop job with job conf * @param conf * @throws Exception */ public static void runAvroJob(JobConf conf) throws Exception { Path[] inputPaths = AvroInputFormat.getInputPaths(conf); _log.info("Running hadoop job with input paths:"); for (Path inputPath : inputPaths) { _log.info(inputPath); } _log.info("Output path="+AvroOutputFormat.getOutputPath(conf)); Job job = new Job(conf); job.setJarByClass(AvroUtils.class); job.waitForCompletion(true); }
AvroInputFormat.addInputPath(conf, status.getPath());