@Override public DataSet next(int num) { int end = curr + num; List<DataSet> r = new ArrayList<>(); if (end >= list.size()) end = list.size(); for (; curr < end; curr++) { r.add(list.get(curr)); } DataSet d = DataSet.merge(r); if (preProcessor != null) preProcessor.preProcess(d); return d; }
/** * Partitions the data transform by the specified number. * * @param num the number to split by * @return the partitioned data transform */ @Override public List<DataSet> dataSetBatches(int num) { List<List<DataSet>> list = Lists.partition(asList(), num); List<DataSet> ret = new ArrayList<>(); for (List<DataSet> l : list) ret.add(DataSet.merge(l)); return ret; }
/** * Strips the data transform of all but the passed in labels * * @param labels strips the data transform of all but the passed in labels * @return the dataset with only the specified labels */ @Override public DataSet filterBy(int[] labels) { List<DataSet> list = asList(); List<DataSet> newList = new ArrayList<>(); List<Integer> labelList = new ArrayList<>(); for (int i : labels) labelList.add(i); for (DataSet d : list) { int outcome = d.outcome(); if (labelList.contains(outcome)) { newList.add(d); } } return DataSet.merge(newList); }
/** * Partitions a dataset in to mini batches where * each dataset in each list is of the specified number of examples * * @param num the number to split by * @return the partitioned datasets */ @Override public List<DataSet> batchBy(int num) { List<DataSet> batched = Lists.newArrayList(); for (List<DataSet> splitBatch : Lists.partition(asList(), num)) { batched.add(DataSet.merge(splitBatch)); } return batched; }
public DataSet convertDataSet(int num) { int batchNumCount = 0; List<DataSet> dataSets = new ArrayList(); FileSystem fs = CommonUtils.openHdfsConnect(); try { while (batchNumCount != num && fileIterator.hasNext()) { ++ batchNumCount; String fullPath = fileIterator.next(); Writable labelText = new Text(FilenameUtils.getBaseName((new File(fullPath)).getParent())); INDArray features = null; INDArray label = Nd4j.zeros(1, labels.size()).putScalar(new int[]{0, labels.indexOf(labelText)}, 1); InputStream imageios = fs.open(new Path(fullPath)); features = asMatrix(imageios); imageios.close(); Nd4j.getAffinityManager().tagLocation(features, AffinityManager.Location.HOST); dataSets.add(new DataSet(features, label)); } } catch (Exception e) { throw new RuntimeException(e.getCause()); } finally { CommonUtils.closeHdfsConnect(fs); } if (dataSets.size() == 0) { return new DataSet(); } else { DataSet result = DataSet.merge( dataSets ); return result; } }
DataSet merge = DataSet.merge(miniBatch); if (dataNormalization != null) dataNormalization.transform(merge);
private void nextFold() { int left; int right; if (kCursor == k - 1) { left = totalExamples() - lastBatch; right = totalExamples(); } else { left = kCursor * batch; right = left + batch; } List<DataSet> kMinusOneFoldList = new ArrayList<DataSet>(); if (right < totalExamples()) { if (left > 0) { kMinusOneFoldList.add((DataSet) singleFold.getRange(0, left)); } kMinusOneFoldList.add((DataSet) singleFold.getRange(right, totalExamples())); train = DataSet.merge(kMinusOneFoldList); } else { train = (DataSet) singleFold.getRange(0, left); } test = (DataSet) singleFold.getRange(left, right); kCursor++; }
@Override protected DataSet merge(List<DataSet> toMerge) { return DataSet.merge(toMerge); }
/** * @deprecated Use {@link #merge(List)} */ @Deprecated public static DataSet merge(List<DataSet> data, boolean clone) { return merge(data); }
@Override public DataSet next(int num) { int end = curr + num; List<DataSet> r = new ArrayList<>(); if (end >= list.size()) end = list.size(); for (; curr < end; curr++) { r.add(list.get(curr)); } DataSet d = DataSet.merge(r); if (preProcessor != null) preProcessor.preProcess(d); return d; }
/** * Partitions the data transform by the specified number. * * @param num the number to split by * @return the partitioned data transform */ @Override public List<DataSet> dataSetBatches(int num) { List<List<DataSet>> list = Lists.partition(asList(), num); List<DataSet> ret = new ArrayList<>(); for (List<DataSet> l : list) ret.add(DataSet.merge(l)); return ret; }
/** * Partitions a dataset in to mini batches where * each dataset in each list is of the specified number of examples * * @param num the number to split by * @return the partitioned datasets */ @Override public List<DataSet> batchBy(int num) { List<DataSet> batched = Lists.newArrayList(); for (List<DataSet> splitBatch : Lists.partition(asList(), num)) { batched.add(DataSet.merge(splitBatch)); } return batched; }
@Override public DataSet next(int num) { int end = curr + num; List<DataSet> r = new ArrayList<>(); if (end >= list.size()) end = list.size(); for (; curr < end; curr++) { r.add(list.get(curr)); } DataSet d = DataSet.merge(r); if (preProcessor != null) { if (!d.isPreProcessed()) { preProcessor.preProcess(d); d.markAsPreProcessed(); } } return d; }
/** * Strips the data transform of all but the passed in labels * * @param labels strips the data transform of all but the passed in labels * @return the dataset with only the specified labels */ @Override public DataSet filterBy(int[] labels) { List<DataSet> list = asList(); List<DataSet> newList = new ArrayList<>(); List<Integer> labelList = new ArrayList<>(); for (int i : labels) labelList.add(i); for (DataSet d : list) { int outcome = d.outcome(); if (labelList.contains(outcome)) { newList.add(d); } } return DataSet.merge(newList); }
@Override public DataSet next(int num) { int end = curr + num; List<DataSet> r = new ArrayList<>(); if (end >= list.size()) end = list.size(); for (; curr < end; curr++) { r.add(list.get(curr)); } DataSet d = DataSet.merge(r); if (preProcessor != null) { if (!d.isPreProcessed()) { preProcessor.preProcess(d); d.markAsPreProcessed(); } } return d; }
public MovingWindowDataSetFetcher(DataSet data, int windowRows, int windowColumns) { this.data = data; this.windowRows = windowRows; this.windowColumns = windowColumns; List<DataSet> list = data.asList(); List<DataSet> flipped = new ArrayList<>(); for (int i = 0; i < list.size(); i++) { INDArray label = list.get(i).getLabels(); List<INDArray> windows = new MovingWindowMatrix(list.get(i).getFeatureMatrix(), windowRows, windowColumns, true) .windows(true); for (int j = 0; j < windows.size(); j++) { flipped.add(new DataSet(windows.get(j), label)); } flipped.add(list.get(i)); } this.data = DataSet.merge(flipped); }
public MovingWindowDataSetFetcher(DataSet data, int windowRows, int windowColumns) { this.data = data; this.windowRows = windowRows; this.windowColumns = windowColumns; List<DataSet> list = data.asList(); List<DataSet> flipped = new ArrayList<>(); for (int i = 0; i < list.size(); i++) { INDArray label = list.get(i).getLabels(); List<INDArray> windows = new MovingWindowMatrix(list.get(i).getFeatures(), windowRows, windowColumns, true) .windows(true); for (int j = 0; j < windows.size(); j++) { flipped.add(new DataSet(windows.get(j), label)); } flipped.add(list.get(i)); } this.data = DataSet.merge(flipped); }
result = DataSet.merge(temp); else result = temp.get(0);
private void nextFold() { int left; int right; if (kCursor == k - 1) { left = totalExamples() - lastBatch; right = totalExamples(); } else { left = kCursor * batch; right = left + batch; } List<DataSet> kMinusOneFoldList = new ArrayList<DataSet>(); if (right < totalExamples()) { if (left > 0) { kMinusOneFoldList.add((DataSet) singleFold.getRange(0, left)); } kMinusOneFoldList.add((DataSet) singleFold.getRange(right, totalExamples())); train = DataSet.merge(kMinusOneFoldList); } else { train = (DataSet) singleFold.getRange(0, left); } test = (DataSet) singleFold.getRange(left, right); kCursor++; }
private DataSet enrichWithErrors(DataSet ds) { if (!args().errorEnrichment) { return ds; } if (queue.isEmpty()) { // no errors were collected yet. Return the un-enriched dataset. return ds; } int size = this.args().numErrorsAdded; INDArray inputs = Nd4j.zeros(size, featureCalculator.numberOfFeatures()); INDArray labels = Nd4j.zeros(size, labelMapper.numberOfLabels()); int i = 0; for (ErrorRecord errorRecord : queue.getRandomSample(size)) { // we are going to call nextRecord directly, without checking hasNextRecord, because we have // determined how many times we can call (in size). We should get the exception if we were // wrong in our estimate of size. // fill in features and labels for a given record i: Nd4j.copy(errorRecord.features.get(new PointIndex(0)), inputs.get(new PointIndex(i))); Nd4j.copy(errorRecord.label, labels.get(new PointIndex(i))); i++; } DataSet errorDataSet = new DataSet(inputs, labels); array[0] = ds; array[1] = errorDataSet; final DataSet enrichedDataset = DataSet.merge(ObjectArrayList.wrap(array)); return enrichedDataset; }