protected void processFile(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<?> data, FastByIDMap<FastByIDMap<Long>> timestamps, boolean fromPriorData) { log.info("Reading file info..."); int count = 0; while (dataOrUpdateFileIterator.hasNext()) { String line = dataOrUpdateFileIterator.next(); if (!line.isEmpty()) { processLine(line, data, timestamps, fromPriorData); if (++count % 1000000 == 0) { log.info("Processed {} lines", count); } } } log.info("Read lines: {}", count); }
protected void processFileWithoutID(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<FastIDSet> data, FastByIDMap<FastByIDMap<Long>> timestamps) { log.info("Reading file info..."); int count = 0; while (dataOrUpdateFileIterator.hasNext()) { String line = dataOrUpdateFileIterator.next(); if (!line.isEmpty()) { processLineWithoutID(line, data, timestamps); if (++count % 100000 == 0) { log.info("Processed {} lines", count); } } } log.info("Read lines: {}", count); }
protected void processFileWithoutID(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<FastIDSet> data, FastByIDMap<FastByIDMap<Long>> timestamps) { log.info("Reading file info..."); int count = 0; while (dataOrUpdateFileIterator.hasNext()) { String line = dataOrUpdateFileIterator.next(); if (!line.isEmpty()) { processLineWithoutID(line, data, timestamps); if (++count % 100000 == 0) { log.info("Processed {} lines", count); } } } log.info("Read lines: {}", count); }
protected void processFileWithoutID(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<FastIDSet> data, FastByIDMap<FastByIDMap<Long>> timestamps) { log.info("Reading file info..."); int count = 0; while (dataOrUpdateFileIterator.hasNext()) { String line = dataOrUpdateFileIterator.next(); if (!line.isEmpty()) { processLineWithoutID(line, data, timestamps); if (++count % 100000 == 0) { log.info("Processed {} lines", count); } } } log.info("Read lines: {}", count); }
protected void processFile(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<?> data, FastByIDMap<FastByIDMap<Long>> timestamps, boolean fromPriorData) { log.info("Reading file info..."); int count = 0; while (dataOrUpdateFileIterator.hasNext()) { String line = dataOrUpdateFileIterator.next(); if (!line.isEmpty()) { processLine(line, data, timestamps, fromPriorData); if (++count % 1000000 == 0) { log.info("Processed {} lines", count); } } } log.info("Read lines: {}", count); }
protected void processFile(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<?> data, FastByIDMap<FastByIDMap<Long>> timestamps, boolean fromPriorData) { log.info("Reading file info..."); int count = 0; while (dataOrUpdateFileIterator.hasNext()) { String line = dataOrUpdateFileIterator.next(); if (!line.isEmpty()) { processLine(line, data, timestamps, fromPriorData); if (++count % 1000000 == 0) { log.info("Processed {} lines", count); } } } log.info("Read lines: {}", count); }
/** * Read in a dictionary file. Format is: First line is the number of entries * * <pre> * term DocFreq Index * </pre> */ private static String[] loadTermDictionary(InputStream is) throws IOException { FileLineIterator it = new FileLineIterator(is); int numEntries = Integer.parseInt(it.next()); String[] result = new String[numEntries]; while (it.hasNext()) { String line = it.next(); if (line.startsWith("#")) { continue; } String[] tokens = VectorHelper.TAB_PATTERN.split(line); if (tokens.length < 3) { continue; } int index = Integer.parseInt(tokens[2]); // tokens[1] is the doc freq result[index] = tokens[0]; } return result; } }