/** * Returns a single dataset (all fields are null) * * @return an empty dataset (all fields are null) */ public static DataSet empty() { return new DataSet(null, null); }
private DataSet read(int idx) throws IOException { File path = new File(rootDir, String.format(pattern, idx)); DataSet d = new DataSet(); d.load(path); return d; } }
@Override public DataSet get(String key) { if (!cache.containsKey(key)) { return null; } byte[] data = cache.get(key); ByteArrayInputStream is = new ByteArrayInputStream(data); DataSet ds = new DataSet(); ds.load(is); return ds; }
private DataSet read(int idx) throws IOException { BufferedInputStream bis = new BufferedInputStream(new FileInputStream(paths.get(idx)[0])); DataInputStream dis = new DataInputStream(bis); BufferedInputStream labelInputStream = new BufferedInputStream(new FileInputStream(paths.get(idx)[1])); DataInputStream labelDis = new DataInputStream(labelInputStream); DataSet d = new DataSet(Nd4j.read(dis), Nd4j.read(labelDis)); dis.close(); labelDis.close(); return d; }
@Override public DataSet get(String key) { File file = resolveKey(key); if (!file.exists()) { return null; } else if (!file.isFile()) { throw new IllegalStateException("ERROR: cannot read DataSet: cache path " + file + " is not a file"); } else { DataSet ds = new DataSet(); ds.load(file); return ds; } }
/** * Initializes this data transform fetcher from the passed in datasets * * @param examples the examples to use */ protected void initializeCurrFromList(List<DataSet> examples) { if (examples.isEmpty()) log.warn("Warning: empty dataset from the fetcher"); INDArray inputs = createInputMatrix(examples.size()); INDArray labels = createOutputMatrix(examples.size()); for (int i = 0; i < examples.size(); i++) { inputs.putRow(i, examples.get(i).getFeatureMatrix()); labels.putRow(i, examples.get(i).getLabels()); } curr = new DataSet(inputs, labels); }
public DataSet next(int num) { if( exampleStartOffsets.size() == 0 ) throw new NoSuchElementException(); int currMinibatchSize = Math.min(num, exampleStartOffsets.size()); //Allocate space: //Note the order here: // dimension 0 = number of examples in minibatch // dimension 1 = size of each vector (i.e., number of characters) // dimension 2 = length of each time series/example //Why 'f' order here? See http://deeplearning4j.org/usingrnns.html#data section "Alternative: Implementing a custom DataSetIterator" INDArray input = Nd4j.create(new int[]{currMinibatchSize,validCharacters.length,exampleLength}, 'f'); INDArray labels = Nd4j.create(new int[]{currMinibatchSize,validCharacters.length,exampleLength}, 'f'); for( int i=0; i<currMinibatchSize; i++ ){ int startIdx = exampleStartOffsets.removeFirst(); int endIdx = startIdx + exampleLength; int currCharIdx = charToIdxMap.get(fileCharacters[startIdx]); //Current input int c=0; for( int j=startIdx+1; j<endIdx; j++, c++ ){ int nextCharIdx = charToIdxMap.get(fileCharacters[j]); //Next character to predict input.putScalar(new int[]{i,currCharIdx,c}, 1.0); labels.putScalar(new int[]{i,nextCharIdx,c}, 1.0); currCharIdx = nextCharIdx; } } return new DataSet(input,labels); }
public DataSet convertDataSet(int num) { int batchNumCount = 0; List<DataSet> dataSets = new ArrayList(); FileSystem fs = CommonUtils.openHdfsConnect(); try { while (batchNumCount != num && fileIterator.hasNext()) { ++ batchNumCount; String fullPath = fileIterator.next(); Writable labelText = new Text(FilenameUtils.getBaseName((new File(fullPath)).getParent())); INDArray features = null; INDArray label = Nd4j.zeros(1, labels.size()).putScalar(new int[]{0, labels.indexOf(labelText)}, 1); InputStream imageios = fs.open(new Path(fullPath)); features = asMatrix(imageios); imageios.close(); Nd4j.getAffinityManager().tagLocation(features, AffinityManager.Location.HOST); dataSets.add(new DataSet(features, label)); } } catch (Exception e) { throw new RuntimeException(e.getCause()); } finally { CommonUtils.closeHdfsConnect(fs); } if (dataSets.size() == 0) { return new DataSet(); } else { DataSet result = DataSet.merge( dataSets ); return result; } }
/** * Reshapes the input in to the given rows and columns * * @param rows the row size * @param cols the column size * @return a copy of this data op with the input resized */ @Override public DataSet reshape(int rows, int cols) { DataSet ret = new DataSet(getFeatures().reshape(new long[] {rows, cols}), getLabels()); return ret; }
@Override public DataSet call(Tuple2<String, PortableDataStream> v1) throws Exception { DataSet d = new DataSet(); d.load(v1._2().open()); return d; } }
@Override public DataSet call(String s) throws Exception { //Here: take a String, and map the characters to a one-hot representation Map<Character, Integer> cti = ctiBroadcast.getValue(); int length = s.length(); INDArray features = Nd4j.zeros(1, N_CHARS, length - 1); INDArray labels = Nd4j.zeros(1, N_CHARS, length - 1); char[] chars = s.toCharArray(); int[] f = new int[3]; int[] l = new int[3]; for (int i = 0; i < chars.length - 2; i++) { f[1] = cti.get(chars[i]); f[2] = i; l[1] = cti.get(chars[i + 1]); //Predict the next character given past and current characters l[2] = i; features.putScalar(f, 1.0); labels.putScalar(l, 1.0); } return new DataSet(features, labels); } }
/** * Gets a copy of example i * * @param i the example to getFromOrigin * @return the example at i (one example) */ @Override public DataSet get(int[] i) { return new DataSet(getFeatures().getRows(i), getLabels().getRows(i)); }
totalBatches = baseData.numExamples() / batchSize; for (int i = 0; i < baseData.numExamples() / batchSize; i++) { paths.add(writeData(new DataSet( baseData.getFeatureMatrix().get(NDArrayIndex.interval(offset, offset + batchSize)), baseData.getLabels().get(NDArrayIndex.interval(offset, offset + batchSize)))));
DataSet data = new DataSet(Nd4j.rand(10, 3), Nd4j.rand(10, 1));
@Override public org.nd4j.linalg.dataset.api.DataSet getRange(int from, int to) { if (hasMaskArrays()) { INDArray featureMaskHere = featuresMask != null ? featuresMask.get(interval(from, to)) : null; INDArray labelMaskHere = labelsMask != null ? labelsMask.get(interval(from, to)) : null; return new DataSet(features.get(interval(from, to)), labels.get(interval(from, to)), featureMaskHere, labelMaskHere); } return new DataSet(features.get(interval(from, to)), labels.get(interval(from, to))); }
return new DataSet(f,l,fm,null);
/** * Gets a copy of example i * * @param i the example to getFromOrigin * @return the example at i (one example) */ @Override public DataSet get(int i) { if (i > numExamples() || i < 0) throw new IllegalArgumentException("invalid example number"); if (i == 0 && numExamples() == 1) return this; if (getFeatureMatrix().rank() == 4) { //ensure rank is preserved INDArray slice = getFeatureMatrix().slice(i); return new DataSet(slice.reshape(ArrayUtil.combine(new long[] {1}, slice.shape())), getLabels().slice(i)); } return new DataSet(getFeatures().slice(i), getLabels().slice(i)); }
/** * Sample a dataset * * @param numSamples the number of samples to getFromOrigin * @param rng the rng to use * @param withReplacement whether to allow duplicates (only tracked by example row number) * @return the sample dataset */ @Override public DataSet sample(int numSamples, org.nd4j.linalg.api.rng.Random rng, boolean withReplacement) { INDArray examples = Nd4j.create(numSamples, getFeatures().columns()); INDArray outcomes = Nd4j.create(numSamples, numOutcomes()); Set<Integer> added = new HashSet<>(); for (int i = 0; i < numSamples; i++) { int picked = rng.nextInt(numExamples()); if (!withReplacement) while (added.contains(picked)) picked = rng.nextInt(numExamples()); examples.putRow(i, get(picked).getFeatures()); outcomes.putRow(i, get(picked).getLabels()); } return new DataSet(examples, outcomes); }
/** * Clone the dataset * * @return a clone of the dataset */ @Override public DataSet copy() { DataSet ret = new DataSet(getFeatures().dup(), getLabels().dup()); if (getLabelsMaskArray() != null) ret.setLabelsMaskArray(getLabelsMaskArray().dup()); if (getFeaturesMaskArray() != null) ret.setFeaturesMaskArray(getFeaturesMaskArray().dup()); ret.setColumnNames(getColumnNames()); ret.setLabelNames(getLabelNames()); return ret; }
DataSet ds = new DataSet(featuresHere, labelsHere, featureMaskHere, labelMaskHere); if (exampleMetaData != null && exampleMetaData.size() > i) { ds.setExampleMetaData(Collections.singletonList(exampleMetaData.get(i)));