private void fileIsGoodAvro(Path path) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (FSDataInputStream in = fs.open(path, 0); FileOutputStream out = new FileOutputStream("target/FOO.avro")) { byte[] buffer = new byte[100]; int bytesRead; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } } java.io.File file = new File("target/FOO.avro"); try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader)) { GenericRecord user = null; while (dataFileReader.hasNext()) { user = dataFileReader.next(user); } } file.delete(); } }
@Test public void testWriteLargeChunk() throws IOException, InterruptedException, ExecutionException { Path f = new Path("/" + name.getMethodName()); EventLoop eventLoop = EVENT_LOOP_GROUP.next(); FanOutOneBlockAsyncDFSOutput out = FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS, f, true, false, (short) 3, 1024 * 1024 * 1024, eventLoop, CHANNEL_CLASS); byte[] b = new byte[50 * 1024 * 1024]; ThreadLocalRandom.current().nextBytes(b); out.write(b); out.flush(false); assertEquals(b.length, out.flush(false).get().longValue()); out.close(); assertEquals(b.length, FS.getFileStatus(f).getLen()); byte[] actual = new byte[b.length]; try (FSDataInputStream in = FS.open(f)) { in.readFully(actual); } assertArrayEquals(b, actual); } }
@Test public void testRecover() throws IOException, InterruptedException, ExecutionException { Path f = new Path("/" + name.getMethodName()); EventLoop eventLoop = EVENT_LOOP_GROUP.next(); FanOutOneBlockAsyncDFSOutput out = FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS, f, true, false, (short) 3, FS.getDefaultBlockSize(), eventLoop, CHANNEL_CLASS); byte[] b = new byte[10]; ThreadLocalRandom.current().nextBytes(b); out.write(b, 0, b.length); out.flush(false).get(); // restart one datanode which causes one connection broken TEST_UTIL.getDFSCluster().restartDataNode(0); out.write(b, 0, b.length); try { out.flush(false).get(); fail("flush should fail"); } catch (ExecutionException e) { // we restarted one datanode so the flush should fail LOG.info("expected exception caught", e); } out.recoverAndClose(null); assertEquals(b.length, FS.getFileStatus(f).getLen()); byte[] actual = new byte[b.length]; try (FSDataInputStream in = FS.open(f)) { in.readFully(actual); } assertArrayEquals(b, actual); }
FSDataInputStream fsDataInputStream = fs.open(outputFilePath); BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream)); TreeSet<String> lines = Sets.newTreeSet();
public FSDataInputStream open(Path location) throws IOException { return this.dfs.open(location); }
public FSDataInputStream open(String location) throws IOException { Path path = new Path(location); return this.dfs.open(path); }
/** * Get the contents of the file at the given path. * <p/> * @param file * @return * @throws IOException */ public String cat(Path file) throws IOException { StringBuilder out = new StringBuilder(); try (BufferedReader br = new BufferedReader(new InputStreamReader(dfs. open(file)));) { String line; while ((line = br.readLine()) != null) { out.append(line).append("\n"); } return out.toString(); } }
import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; DistributedFileSystem fileSystem = new DistributedFileSystem(); Configuration conf = new Configuration(); fileSystem.initialize(new URI("hdfs://namenode-host:54310"), conf); FSDataInputStream input = fileSystem.open(new Path("/path/to/file.csv")); System.out.println((new BufferedReader(new InputStreamReader(input))).readLine());
public static Block getFirstBlock(FileSystem fs, Path path) throws IOException { DFSDataInputStream in = (DFSDataInputStream) ((DistributedFileSystem)fs).open(path); in.readByte(); return in.getCurrentBlock(); }
/** * Creates input stream to read from the object * * @param l is not used here * @return OutputStream * @throws IOException */ public InputStream createInputStream(long l) throws IOException { // permission check if (!hasReadPermission()) { throw new IOException("No read permission : " + path); } try { DistributedFileSystem dfs = HdfsOverFtpSystem.getDfs(); FSDataInputStream in = dfs.open(path); return in; } catch (IOException e) { e.printStackTrace(); return null; } } }
public static void check(DistributedFileSystem fs, Path p, int position, int length) throws IOException { byte[] buf = new byte[length]; int i = 0; try { FSDataInputStream in = fs.open(p); in.read(position, buf, 0, buf.length); for(i = position; i < length + position; i++) { assertEquals((byte) i, buf[i - position]); } in.close(); } catch(IOException ioe) { throw new IOException("p=" + p + ", length=" + length + ", i=" + i, ioe); } }
private void readTestFile(String testFileName) throws Exception { Path filePath = new Path(testFileName); FSDataInputStream istream = dfs.open(filePath, 10240); ByteBuffer buf = ByteBuffer.allocate(10240); int count = 0; try { while (istream.read(buf) > 0) { count += 1; buf.clear(); istream.seek(istream.getPos() + 5); } } catch (IOException ioe) { // Ignore this it's probably a seek after eof. } finally { istream.close(); } }
((DistributedFileSystem)fileSys).open(name); Collection<LocatedBlock> dinfo = dis.getAllBlocks();
static boolean verifyFile(Path f, DistributedFileSystem dfs) { final long seed; final int numBlocks; { final String name = f.getName(); final int i = name.indexOf('_'); seed = Long.parseLong(name.substring(0, i)); numBlocks = Integer.parseInt(name.substring(i + 1)); } final byte[] computed = IO_BUF.get(); final byte[] expected = VERIFY_BUF.get(); try(FSDataInputStream in = dfs.open(f)) { for(int i = 0; i < numBlocks; i++) { in.read(computed); nextBytes(i, seed, expected); Assert.assertArrayEquals(expected, computed); } return true; } catch(Exception e) { LOG.error("Failed to verify file " + f); return false; } }
private void verifyFilesUnreadablebyHDFS(MiniDFSCluster cluster, Path root) throws Exception{ DistributedFileSystem fs = cluster.getFileSystem(); Queue<Path> paths = new LinkedList<>(); paths.add(root); while (!paths.isEmpty()) { Path p = paths.poll(); FileStatus stat = fs.getFileStatus(p); if (!stat.isDirectory()) { try { LOG.warn("\n\n ##Testing path [" + p + "]\n\n"); fs.open(p); Assert.fail("Super user should not be able to read ["+ UserGroupInformation.getCurrentUser() + "] [" + p.getName() + "]"); } catch (AccessControlException e) { Assert.assertTrue(e.getMessage().contains("superuser is not allowed to perform this operation")); } catch (Exception e) { Assert.fail("Should get an AccessControlException here"); } } if (stat.isDirectory()) { FileStatus[] ls = fs.listStatus(p); for (FileStatus f : ls) { paths.add(f.getPath()); } } } }
private void verifyFilesUnreadablebyHDFS(MiniDFSCluster cluster, Path root) throws Exception{ DistributedFileSystem fs = cluster.getFileSystem(); Queue<Path> paths = new LinkedList<>(); paths.add(root); while (!paths.isEmpty()) { Path p = paths.poll(); FileStatus stat = fs.getFileStatus(p); if (!stat.isDirectory()) { try { LOG.warn("\n\n ##Testing path [" + p + "]\n\n"); fs.open(p); Assert.fail("Super user should not be able to read ["+ UserGroupInformation.getCurrentUser() + "] [" + p.getName() + "]"); } catch (AccessControlException e) { Assert.assertTrue(e.getMessage().contains("superuser is not allowed to perform this operation")); } catch (Exception e) { Assert.fail("Should get an AccessControlException here"); } } if (stat.isDirectory()) { FileStatus[] ls = fs.listStatus(p); for (FileStatus f : ls) { paths.add(f.getPath()); } } } }
@Test public void testWriteLargeChunk() throws IOException, InterruptedException, ExecutionException { Path f = new Path("/" + name.getMethodName()); EventLoop eventLoop = EVENT_LOOP_GROUP.next(); FanOutOneBlockAsyncDFSOutput out = FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS, f, true, false, (short) 3, 1024 * 1024 * 1024, eventLoop, CHANNEL_CLASS); byte[] b = new byte[50 * 1024 * 1024]; ThreadLocalRandom.current().nextBytes(b); out.write(b); out.flush(false); assertEquals(b.length, out.flush(false).get().longValue()); out.close(); assertEquals(b.length, FS.getFileStatus(f).getLen()); byte[] actual = new byte[b.length]; try (FSDataInputStream in = FS.open(f)) { in.readFully(actual); } assertArrayEquals(b, actual); } }
@Test public void testBothOldAndNewShortCircuitConfigured() throws Exception { final short REPL_FACTOR = 1; final int FILE_LENGTH = 512; Assume.assumeTrue(null == DomainSocket.getLoadingFailureReason()); TemporarySocketDirectory socketDir = new TemporarySocketDirectory(); HdfsConfiguration conf = getConfiguration(socketDir); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); socketDir.close(); FileSystem fs = cluster.getFileSystem(); Path path = new Path("/foo"); byte orig[] = new byte[FILE_LENGTH]; for (int i = 0; i < orig.length; i++) { orig[i] = (byte)(i%10); } FSDataOutputStream fos = fs.create(path, (short)1); fos.write(orig); fos.close(); DFSTestUtil.waitReplication(fs, path, REPL_FACTOR); FSDataInputStream fis = cluster.getFileSystem().open(path); byte buf[] = new byte[FILE_LENGTH]; IOUtils.readFully(fis, buf, 0, FILE_LENGTH); fis.close(); Assert.assertArrayEquals(orig, buf); Arrays.equals(orig, buf); cluster.shutdown(); }
@Test public void testRecover() throws IOException, InterruptedException, ExecutionException { Path f = new Path("/" + name.getMethodName()); EventLoop eventLoop = EVENT_LOOP_GROUP.next(); FanOutOneBlockAsyncDFSOutput out = FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS, f, true, false, (short) 3, FS.getDefaultBlockSize(), eventLoop, CHANNEL_CLASS); byte[] b = new byte[10]; ThreadLocalRandom.current().nextBytes(b); out.write(b, 0, b.length); out.flush(false).get(); // restart one datanode which causes one connection broken TEST_UTIL.getDFSCluster().restartDataNode(0); out.write(b, 0, b.length); try { out.flush(false).get(); fail("flush should fail"); } catch (ExecutionException e) { // we restarted one datanode so the flush should fail LOG.info("expected exception caught", e); } out.recoverAndClose(null); assertEquals(b.length, FS.getFileStatus(f).getLen()); byte[] actual = new byte[b.length]; try (FSDataInputStream in = FS.open(f)) { in.readFully(actual); } assertArrayEquals(b, actual); }