@Override public ReadableData<S> asReadable(boolean materialize) { if (materializedAt != null && (materializedAt instanceof ReadableSource)) { return ((ReadableSource) materializedAt).asReadable(); } else if (materialized || materialize) { return pipeline.getMaterializeSourceTarget(this).asReadable(); } else { return getReadableDataInternal(); } }
@Override public Iterable<T> read(Configuration conf) throws IOException { return ((ReadableSource<T>) source).read(conf); }
ret.inputConf(RuntimeParameters.DISABLE_COMBINE_FILE, "true"); return ret;
ret.inputConf(RuntimeParameters.DISABLE_COMBINE_FILE, "true"); return ret;
@Override public Iterable<T> read(Configuration conf) throws IOException { return ((ReadableSource<T>) source).read(conf); } }
@Override public ReadableData<T> asReadable() { ReadableData<T> rd = ((ReadableSource<T>) source).asReadable(); if (rd instanceof ReadableDataImpl) { ((ReadableDataImpl<T>) rd).setParent(this); } return rd; } }
@Override public ReadableSource<Pair<K, V>> createSourceTarget( Configuration conf, Path path, Iterable<Pair<K, V>> contents, int parallelism) throws IOException { FileSystem fs = FileSystem.get(conf); outputFn.setConfiguration(conf); outputFn.initialize(); fs.mkdirs(path); List<SequenceFile.Writer> writers = Lists.newArrayListWithExpectedSize(parallelism); for (int i = 0; i < parallelism; i++) { Path out = new Path(path, "out" + i); writers.add(SequenceFile.createWriter(fs, conf, out, keyType.getSerializationClass(), valueType.getSerializationClass())); } int target = 0; for (Pair<K, V> value : contents) { Pair writablePair = (Pair) outputFn.map(value); writers.get(target).append(writablePair.first(), writablePair.second()); target = (target + 1) % parallelism; } for (SequenceFile.Writer writer : writers) { writer.close(); } ReadableSource<Pair<K, V>> ret = new SeqFileTableSource<K, V>(path, this); ret.inputConf(RuntimeParameters.DISABLE_COMBINE_FILE, "true"); return ret; }
@Override public Iterable<T> read(Configuration conf) throws IOException { return ((ReadableSource<T>) source).read(conf); }
@Override public ReadableData<T> asReadable() { ReadableData<T> rd = ((ReadableSource<T>) source).asReadable(); if (rd instanceof ReadableDataImpl) { ((ReadableDataImpl<T>) rd).setParent(this); } return rd; }
@Override public Iterable<T> read(Configuration conf) throws IOException { return ((ReadableSource<T>) source).read(conf); } }
@Override protected ReadableData<S> getReadableDataInternal() { if (source instanceof ReadableSource) { return ((ReadableSource<S>) source).asReadable(); } else { return materializedData(); } }
@Override public Iterable<T> read(Configuration conf) throws IOException { return ((ReadableSource<T>) source).read(conf); }
@Override protected ReadableData<S> getReadableDataInternal() { if (source instanceof ReadableSource) { return ((ReadableSource<S>) source).asReadable(); } else { return materializedData(); } }
@Override public Iterable<T> read(Configuration conf) throws IOException { return ((ReadableSource<T>) source).read(conf); }
protected ReadableData<S> materializedData() { materialized = true; return pipeline.getMaterializeSourceTarget(this).asReadable(); }
@Override public <K, V> PTable<K, V> read(TableSource<K, V> source) { if (source instanceof ReadableSource) { try { Iterable<Pair<K, V>> iterable = ((ReadableSource<Pair<K, V>>) source).read(conf); return new MemTable<K, V>(iterable, source.getTableType(), source.toString()); } catch (IOException e) { LOG.error("Exception reading source: " + source.toString(), e); throw new IllegalStateException(e); } } LOG.error("Source " + source + " is not readable"); throw new IllegalStateException("Source " + source + " is not readable"); }
@Override protected ReadableData<Pair<K, V>> getReadableDataInternal() { try { return ptype.createSourceTarget(pipeline.getConfiguration(), pipeline.createTempPath(), contents, parallelism).asReadable(); } catch (IOException e) { throw new CrunchRuntimeException(e); } }
@Override public <T> PCollection<T> read(Source<T> source) { if (source instanceof ReadableSource) { try { Iterable<T> iterable = ((ReadableSource<T>) source).read(conf); return new MemCollection<T>(iterable, source.getType(), source.toString()); } catch (IOException e) { LOG.error("Exception reading source: " + source.toString(), e); throw new IllegalStateException(e); } } LOG.error("Source " + source + " is not readable"); throw new IllegalStateException("Source " + source + " is not readable"); }
@Override protected ReadableData<Pair<K, V>> getReadableDataInternal() { try { return ptype.createSourceTarget(pipeline.getConfiguration(), pipeline.createTempPath(), contents, parallelism).asReadable(); } catch (IOException e) { throw new CrunchRuntimeException(e); } }
@Override public <K, V> PTable<K, V> read(TableSource<K, V> source) { if (source instanceof ReadableSource) { try { Iterable<Pair<K, V>> iterable = ((ReadableSource<Pair<K, V>>) source).read(conf); return new MemTable<K, V>(iterable, source.getTableType(), source.toString()); } catch (IOException e) { LOG.error("Exception reading source: " + source.toString(), e); throw new IllegalStateException(e); } } LOG.error("Source " + source + " is not readable"); throw new IllegalStateException("Source " + source + " is not readable"); }