@Test public void testCountMinSketch() { Dataset<Long> df = spark.range(1000); CountMinSketch sketch1 = df.stat().countMinSketch("id", 10, 20, 42); Assert.assertEquals(1000, sketch1.totalCount()); Assert.assertEquals(10, sketch1.depth()); Assert.assertEquals(20, sketch1.width()); CountMinSketch sketch2 = df.stat().countMinSketch(col("id"), 10, 20, 42); Assert.assertEquals(1000, sketch2.totalCount()); Assert.assertEquals(10, sketch2.depth()); Assert.assertEquals(20, sketch2.width()); CountMinSketch sketch3 = df.stat().countMinSketch("id", 0.001, 0.99, 42); Assert.assertEquals(1000, sketch3.totalCount()); Assert.assertEquals(0.001, sketch3.relativeError(), 1.0e-4); Assert.assertEquals(0.99, sketch3.confidence(), 5.0e-3); CountMinSketch sketch4 = df.stat().countMinSketch(col("id"), 0.001, 0.99, 42); Assert.assertEquals(1000, sketch4.totalCount()); Assert.assertEquals(0.001, sketch4.relativeError(), 1.0e-4); Assert.assertEquals(0.99, sketch4.confidence(), 5.0e-3); }
@Test public void testCountMinSketch() { Dataset<Long> df = spark.range(1000); CountMinSketch sketch1 = df.stat().countMinSketch("id", 10, 20, 42); Assert.assertEquals(1000, sketch1.totalCount()); Assert.assertEquals(10, sketch1.depth()); Assert.assertEquals(20, sketch1.width()); CountMinSketch sketch2 = df.stat().countMinSketch(col("id"), 10, 20, 42); Assert.assertEquals(1000, sketch2.totalCount()); Assert.assertEquals(10, sketch2.depth()); Assert.assertEquals(20, sketch2.width()); CountMinSketch sketch3 = df.stat().countMinSketch("id", 0.001, 0.99, 42); Assert.assertEquals(1000, sketch3.totalCount()); Assert.assertEquals(0.001, sketch3.relativeError(), 1.0e-4); Assert.assertEquals(0.99, sketch3.confidence(), 5.0e-3); CountMinSketch sketch4 = df.stat().countMinSketch(col("id"), 0.001, 0.99, 42); Assert.assertEquals(1000, sketch4.totalCount()); Assert.assertEquals(0.001, sketch4.relativeError(), 1.0e-4); Assert.assertEquals(0.99, sketch4.confidence(), 5.0e-3); }
@Test public void testCountMinSketch() { Dataset<Long> df = spark.range(1000); CountMinSketch sketch1 = df.stat().countMinSketch("id", 10, 20, 42); Assert.assertEquals(1000, sketch1.totalCount()); Assert.assertEquals(10, sketch1.depth()); Assert.assertEquals(20, sketch1.width()); CountMinSketch sketch2 = df.stat().countMinSketch(col("id"), 10, 20, 42); Assert.assertEquals(1000, sketch2.totalCount()); Assert.assertEquals(10, sketch2.depth()); Assert.assertEquals(20, sketch2.width()); CountMinSketch sketch3 = df.stat().countMinSketch("id", 0.001, 0.99, 42); Assert.assertEquals(1000, sketch3.totalCount()); Assert.assertEquals(0.001, sketch3.relativeError(), 1.0e-4); Assert.assertEquals(0.99, sketch3.confidence(), 5.0e-3); CountMinSketch sketch4 = df.stat().countMinSketch(col("id"), 0.001, 0.99, 42); Assert.assertEquals(1000, sketch4.totalCount()); Assert.assertEquals(0.001, sketch4.relativeError(), 1.0e-4); Assert.assertEquals(0.99, sketch4.confidence(), 5.0e-3); }