/** * Extracts file to a directory. Read mode only. * * @param fs * filesystem of output file * @param dirPath * path to extract to */ public void exportData(FileSystem fs, Path dirPath) { exportData(fs, dirPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
/** * Extracts file to a directory. Read mode only. * * @param uriOrPath * path or uri to extract to. * @throws IOException */ public void exportData(String uriOrPath) throws IOException { exportData(uriOrPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
/** * Extracts file to a directory. Read mode only. * * @param uriOrPath * path or uri to extract to. * @throws IOException */ public void exportData(String uriOrPath) throws IOException { exportData(uriOrPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
/** * Extracts file to a directory. Read mode only. * * @param fs * filesystem of output file * @param dirPath * path to extract to */ public void exportData(FileSystem fs, Path dirPath) { exportData(fs, dirPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
public static void checkSampleEquality(String selected,HadoopFastKMeansOptions options) throws IOException { ByteArrayDump neededdump = new ByteArrayDump(); TextBytesSequenceMemoryUtility utility = new TextBytesSequenceMemoryUtility(selected, true); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, neededdump); System.out.println("Finished loading all byte arrays"); int total = 0; long done = 0; for(int i = 0; i < neededdump.centroids.size(); i++){ byte[] a = neededdump.centroids.get(i); for(int j = i+1; j < neededdump.centroids.size(); j++){ done++; if(distanceUnderThreshold(a,neededdump.centroids.get(j),options.checkSampleEqualityThreshold) ){ total++; } } System.out.print("\r" + done + "/" + ((long)(neededdump.centroids.size()) * (long)(neededdump.centroids.size()))/2l + " total: " + total); } System.out.println(); System.out.println("There were " + total + " identical samples"); } private static boolean distanceUnderThreshold(byte[] a, byte[] b, int threshold) {
centroidsPath.toUri(), true); final SelectTopKDump dump = new SelectTopKDump(options.k); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, dump); utility = new IntBytesSequenceMemoryUtility(newcentroidsPath.toUri(), true); final SelectTopKDump neededdump = new SelectTopKDump(randomNeeded); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, neededdump); newcentroids = neededdump.centroids;
final ExtractionState nps = new ExtractionState(); nps.setMaxFileExtract(max);
/** * load some initially selected centroids from {@link FeatureSelect} as a * {@link ByteKMeans} instance * * @param initialCentroids * @param k * @return a {@link ByteKMeans} * @throws IOException */ public static ByteCentroidsResult sequenceFileToCluster(String initialCentroids, int k) throws IOException { final SelectTopKDump neededdump = new SelectTopKDump(k); final IntBytesSequenceMemoryUtility utility = new IntBytesSequenceMemoryUtility(initialCentroids, true); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, neededdump); final ByteCentroidsResult newFastKMeansCluster = new ByteCentroidsResult(); newFastKMeansCluster.centroids = neededdump.centroids; return newFastKMeansCluster; } }