diff --git a/README.md b/README.md index be86a132b..f413c2824 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,6 @@ See [Algebird's page on the Scaladex](https://index.scala-lang.org/twitter/algeb - [Scio](https://github.com/spotify/scio) - [Packetloop](https://www.packetloop.com) (see [this tweet](https://twitter.com/cloudjunky/status/355073917720858626)) - Ebay uses Algebird for machine learning: [ScalaDays talk](http://www.slideshare.net/VitalyGordon/scalable-and-flexible-machine-learning-with-scala-linkedin) -- [Apple (FEAR Team)](https://news.ycombinator.com/item?id=16969118) Other projects built with Algebird, as compiled by the Scaladex: [![Scaladex Dependents](https://index.scala-lang.org/count.svg?q=dependencies:twitter/algebird*&subject=scaladex:&color=blue&style=flat-square)](https://index.scala-lang.org/search?q=dependencies:twitter/algebird-core) diff --git a/algebird-core/src/main/scala/com/twitter/algebird/BloomFilter.scala b/algebird-core/src/main/scala/com/twitter/algebird/BloomFilter.scala index f02f6c948..6d81ebe88 100644 --- a/algebird-core/src/main/scala/com/twitter/algebird/BloomFilter.scala +++ b/algebird-core/src/main/scala/com/twitter/algebird/BloomFilter.scala @@ -157,13 +157,16 @@ object BloomFilter { * This is \hat{S}^{-1}(t) in the cardinality estimation paper used above. */ def sInverse(t: Int): Double = - scala.math.log1p(-t.toDouble / width) / (numHashes * scala.math.log1p(-1.0 / width)) + if (numBits == width) 0.0 + else + scala.math.log(1 - t.toDouble / width) / (numHashes * scala.math.log1p(-1.0 / width)) // Variable names correspond to those used in the paper. val t = numBits val n = sInverse(t).round.toInt // Take the min and max because the probability formula assumes // nl <= sInverse(t - 1) and sInverse(t + 1) <= nr + val nl = scala.math.min(sInverse(t - 1).floor, (1 - approximationWidth) * n).toInt val nr = diff --git a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala index 618e12b5a..905799a79 100644 --- a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala +++ b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala @@ -354,7 +354,6 @@ class BloomFilterTest extends WordSpec with Matchers { val items = (1 until exactCardinality).map { _.toString } val bf = bfMonoid.create(items: _*) val size = bf.size - assert(size ~ exactCardinality) assert(size.min <= size.estimate) assert(size.max >= size.estimate) @@ -410,6 +409,16 @@ class BloomFilterTest extends WordSpec with Matchers { } } + "BloomFilter method `size`" should { + + "return the appropriate size when it's saturated " in { + val bfMonoid = BloomFilterMonoid[String](5, 13) + val strings = List(8, 9, 8, 10, 1, 8, 11, 12, 13, 14, 15, 67, 18981, 1122, 86787).map(_.toString) + val bf = bfMonoid.create(strings: _*) + assert(bf.size.isZero) + } + } + "BloomFilter method `checkAndAdd`" should { "be identical to method `+`" in {