/** * Extracts file to a directory. Read mode only. * * @param uriOrPath * path or uri to extract to. * @throws IOException */ public void exportData(String uriOrPath) throws IOException { exportData(uriOrPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
@Override public <K, V> String getNameInternal(K key, V value, ExtractionState state) { return "" + state.getCount(); } };
final ExtractionState nps = new ExtractionState(); nps.setMaxFileExtract(max); nps.setRandomSelection(random, totalRecords); if (nps.isFinished()) break;
if (extrState.allowNext()) { reader.getCurrentValue(val); if (dirPath != null) { extrState.tick(); } else { System.out.println(key.toString()); printFile(val); extrState.tick(); extrState.tick(); if (extrState.isFinished()) break;
if (extrState.allowNext()) { reader.getCurrentValue(val); if (dirPath != null) { extrState.tick(); } else { System.out.println(key.toString()); printFile(val); extrState.tick(); extrState.tick(); if (extrState.isFinished()) break;
if (extrState.allowNext()) { reader.getCurrentValue(val); zos.closeEntry(); extrState.tick(); } else { extrState.tick(); if (extrState.isFinished()) break;
/** * Extracts file to a directory. Read mode only. * * @param uriOrPath * path or uri to extract to. * @throws IOException */ public void exportData(String uriOrPath) throws IOException { exportData(uriOrPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
@Override public <K, V> String getNameInternal(K key, V value, ExtractionState state) { return "" + state.getCount(); } };
if (nps.allowNext()) { reader.getCurrentValue(val); dump.dumpValue(key, val); nps.tick(); if (nps.isFinished()) break;
/** * Extracts file to a directory. Read mode only. * * @param fs * filesystem of output file * @param dirPath * path to extract to */ public void exportData(FileSystem fs, Path dirPath) { exportData(fs, dirPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
if (nps.allowNext()) { reader.getCurrentValue(val); dump.dumpValue(key, val); nps.tick(); if (nps.isFinished()) break;
/** * Extracts file to a directory. Read mode only. * * @param fs * filesystem of output file * @param dirPath * path to extract to */ public void exportData(FileSystem fs, Path dirPath) { exportData(fs, dirPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
if (extrState.allowNext()) { reader.getCurrentValue(val); zos.closeEntry(); extrState.tick(); } else { extrState.tick(); if (extrState.isFinished()) break;
public static void checkSampleEquality(String selected,HadoopFastKMeansOptions options) throws IOException { ByteArrayDump neededdump = new ByteArrayDump(); TextBytesSequenceMemoryUtility utility = new TextBytesSequenceMemoryUtility(selected, true); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, neededdump); System.out.println("Finished loading all byte arrays"); int total = 0; long done = 0; for(int i = 0; i < neededdump.centroids.size(); i++){ byte[] a = neededdump.centroids.get(i); for(int j = i+1; j < neededdump.centroids.size(); j++){ done++; if(distanceUnderThreshold(a,neededdump.centroids.get(j),options.checkSampleEqualityThreshold) ){ total++; } } System.out.print("\r" + done + "/" + ((long)(neededdump.centroids.size()) * (long)(neededdump.centroids.size()))/2l + " total: " + total); } System.out.println(); System.out.println("There were " + total + " identical samples"); } private static boolean distanceUnderThreshold(byte[] a, byte[] b, int threshold) {
centroidsPath.toUri(), true); final SelectTopKDump dump = new SelectTopKDump(options.k); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, dump); utility = new IntBytesSequenceMemoryUtility(newcentroidsPath.toUri(), true); final SelectTopKDump neededdump = new SelectTopKDump(randomNeeded); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, neededdump); newcentroids = neededdump.centroids;
/** * load some initially selected centroids from {@link FeatureSelect} as a * {@link ByteKMeans} instance * * @param initialCentroids * @param k * @return a {@link ByteKMeans} * @throws IOException */ public static ByteCentroidsResult sequenceFileToCluster(String initialCentroids, int k) throws IOException { final SelectTopKDump neededdump = new SelectTopKDump(k); final IntBytesSequenceMemoryUtility utility = new IntBytesSequenceMemoryUtility(initialCentroids, true); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, neededdump); final ByteCentroidsResult newFastKMeansCluster = new ByteCentroidsResult(); newFastKMeansCluster.centroids = neededdump.centroids; return newFastKMeansCluster; } }