private Entry<Long, Double> tfidfEntry(double logDf, Entry<Long, Double> docidTf) { Long docId = docidTf.getKey(); double tf = docidTf.getValue(); double idf = logDocCount - logDf; return entry(docId, tf * idf); } }
private static Entry<String, Double> tfidfEntry(double logDocCount, double logDf, Entry<String, Long> docIdTf) { String docId = docIdTf.getKey(); double tf = docIdTf.getValue(); double idf = logDocCount - logDf; return entry(docId, tf * idf); } }
private static Entry<Long, Double> tfidfEntry(Entry<Entry<Long, String>, Long> tfEntry, Double idf) { final Long tf = tfEntry.getValue(); return entry(tfEntry.getKey().getKey(), tf * idf); } }
private static Entry<String, Double> tfidfEntry(Entry<Entry<String, String>, Long> tfEntry, Double idf) { final Long tf = tfEntry.getValue(); return entry(tfEntry.getKey().getKey(), tf * idf); } }
static Stream<Entry<String, String>> docLines(String name) { try { return Files.lines(Paths.get(TfIdfJdkStreams.class.getResource("books/" + name).toURI())) .map(String::toLowerCase) .map(line -> entry(name, line)); } catch (IOException | URISyntaxException e) { throw new RuntimeException(e); } }
private static Stream<Entry<Long, String>> tokenize(Entry<Long, String> docLine) { return Arrays.stream(TfIdfJdkStreams.DELIMITER.split(docLine.getValue())) .filter(token -> !token.isEmpty()) .map(word -> entry(docLine.getKey(), word)); } }
@Override @SuppressWarnings("unchecked") protected boolean tryProcess1(@Nonnull Object item) { Entry<Entry<Long, String>, Long> e = (Entry<Entry<Long, String>, Long>) item; long docId = e.getKey().getKey(); String word = e.getKey().getValue(); long tf = e.getValue(); wordDocTf.computeIfAbsent(word, w -> new ArrayList<>()) .add(entry(docId, (double) tf)); return true; }
private Stream<Entry<Long, String>> tokenize(Entry<Long, String> docLine) { return Arrays.stream(DELIMITER.split(docLine.getValue())) .filter(token -> !token.isEmpty()) .filter(token -> !stopwords.contains(token)) .map(word -> entry(docLine.getKey(), word)); }
private static Stream<Entry<String, String>> tokenize(Entry<String, String> docLine) { return Arrays.stream(TfIdfJdkStreams.DELIMITER.split(docLine.getValue())) .filter(token -> !token.isEmpty()) .map(word -> entry(docLine.getKey(), word)); } }
static Stream<Entry<Long, String>> docLines(Entry<Long, String> idAndName) { try { return Files.lines(Paths.get(TfIdfJdkStreams.class.getResource("books/" + idAndName.getValue()).toURI())) .map(String::toLowerCase) .map(line -> entry(idAndName.getKey(), line)); } catch (IOException | URISyntaxException e) { throw new RuntimeException(e); } }
private Entry<String, List<Entry<Long, Double>>> toInvertedIndexEntry( Entry<String, List<Entry<Long, Double>>> wordDocTf ) { String word = wordDocTf.getKey(); List<Entry<Long, Double>> docidTfs = wordDocTf.getValue(); return entry(word, docScores(docidTfs)); }
private static Entry<String, Collection<Entry<String, Double>>> toInvertedIndexEntry( double logDocCount, String word, Collection<Entry<String, Long>> docIdTfs ) { return entry(word, docScores(logDocCount, docIdTfs)); }
/** * Convenience for {@link #kafka(Properties, DistributedFunction, String...)} * wrapping the output in {@code Map.Entry}. */ @Nonnull public static <K, V> StreamSource<Entry<K, V>> kafka( @Nonnull Properties properties, @Nonnull String ... topics ) { return KafkaSources.<K, V, Entry<K, V>>kafka(properties, r -> entry(r.key(), r.value()), topics); }
/** * Convenience for {@link #kafka(Properties, DistributedFunction, String...)} * wrapping the output in {@code Map.Entry}. */ @Nonnull public static <K, V> StreamSource<Entry<K, V>> kafka( @Nonnull Properties properties, @Nonnull String ... topics ) { return KafkaSources.<K, V, Entry<K, V>>kafka(properties, r -> entry(r.key(), r.value()), topics); }
/** * The actual classification of the images by using the pre-trained model. */ private static Entry<String, Double> classifyWithModel(ImageClassifierVggCifar10 classifier, BufferedImage image) { Planar<GrayF32> planar = new Planar<>(GrayF32.class, image.getWidth(), image.getHeight(), 3); ConvertBufferedImage.convertFromPlanar(image, planar, true, GrayF32.class); classifier.classify(planar); return classifier.getAllResults().stream() .map(score -> entry(classifier.getCategories().get(score.category), score.score)) .max(Comparator.comparing(Entry::getValue)).get(); }
/** * Returns a projection that converts the {@link EventJournalCacheEvent} to a * {@link java.util.Map.Entry} using the event's new value as a value. * * @see Sources#cacheJournal * @see Sources#remoteCacheJournal */ public static <K, V> DistributedFunction<EventJournalCacheEvent<K, V>, Entry<K, V>> cacheEventToEntry() { return e -> entry(e.getKey(), e.getNewValue()); }
/** * Returns a projection that converts the {@link EventJournalMapEvent} to a * {@link java.util.Map.Entry} using the event's new value as a value. * * @see Sources#mapJournal * @see Sources#remoteMapJournal */ public static <K, V> DistributedFunction<EventJournalMapEvent<K, V>, Entry<K, V>> mapEventToEntry() { return e -> entry(e.getKey(), e.getNewValue()); }
@Override public Entry read(ObjectDataInput in) throws IOException { return entry(in.readObject(), in.readObject()); }
private static Pipeline buildPipeline(String connectionUrl) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.jdbc(connectionUrl, "SELECT * FROM " + TABLE_NAME, resultSet -> new User(resultSet.getInt(1), resultSet.getString(2)))) .map(user -> Util.entry(user.getId(), user)) .drainTo(Sinks.map(MAP_NAME)); return p; }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(AvroSources.filesBuilder(AvroSink.DIRECTORY_NAME, ReflectDatumReader<User>::new) //Both Jet members share the same local file system .sharedFileSystem(true) .build()) .map(user -> Util.entry(user.getUsername(), user)) .drainTo(Sinks.map(AvroSink.MAP_NAME)); return p; }