From c3295005f1cc0c55545d9351c18c485bbe03f75e Mon Sep 17 00:00:00 2001
From: cesar colle <cesar.colle@gmail.com>
Date: Wed, 29 Aug 2018 22:50:39 +0200
Subject: [PATCH 1/5] Add the test faillure for bloomfilter

---
 .../twitter/algebird/BloomFilterTest.scala    | 159 ++++++++++--------
 1 file changed, 87 insertions(+), 72 deletions(-)

diff --git a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
index 618e12b5a..b704509bc 100644
--- a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
+++ b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
@@ -2,16 +2,16 @@ package com.twitter.algebird
 
 import java.io.{ByteArrayOutputStream, ObjectOutputStream}
 
-import org.scalacheck.{Arbitrary, Gen, Properties}
-import org.scalatest.{Matchers, WordSpec}
 import org.scalacheck.Prop._
+import org.scalacheck.{Arbitrary, Gen}
+import org.scalatest.{Matchers, WordSpec}
 
 object BloomFilterTestUtils {
   def toSparse[A](bf: BF[A]): BFSparse[A] = bf match {
     case BFZero(hashes, width) => BFSparse(hashes, RichCBitSet(), width)
     case BFItem(item, hashes, width) =>
       BFSparse(hashes, RichCBitSet.fromArray(hashes(item)), width)
-    case bfs @ BFSparse(_, _, _) => bfs
+    case bfs@BFSparse(_, _, _) => bfs
     case BFInstance(hashes, bitset, width) =>
       BFSparse(hashes, RichCBitSet.fromBitSet(bitset), width)
   }
@@ -22,15 +22,15 @@ object BloomFilterTestUtils {
       val bs = LongBitSet.empty(width)
       bs += hashes(item)
       BFInstance(hashes, bs.toBitSetNoCopy, width)
-    case bfs @ BFSparse(hashes, bitset, width)   => bfs.dense
-    case bfi @ BFInstance(hashes, bitset, width) => bfi
+    case bfs@BFSparse(hashes, bitset, width) => bfs.dense
+    case bfi@BFInstance(hashes, bitset, width) => bfi
   }
 }
 
 class BloomFilterLaws extends CheckProperties {
 
-  import com.twitter.algebird.BaseProperties._
   import BloomFilterTestUtils._
+  import com.twitter.algebird.BaseProperties._
 
   val NUM_HASHES = 6
   val WIDTH = 32
@@ -117,7 +117,7 @@ class BloomFilterLaws extends CheckProperties {
       val next1 = a + b
 
       Equiv[BF[String]].equiv(next, next1) &&
-      (check == a.contains(b))
+        (check == a.contains(b))
     }
   }
 
@@ -155,8 +155,8 @@ class BFHashIndices extends CheckProperties {
   }
 
   /**
-   *   This is the version of the BFHash as of before the "negative values fix"
-   */
+    * This is the version of the BFHash as of before the "negative values fix"
+    */
   case class NegativeBFHash(numHashes: Int, width: Int) {
     val size = numHashes
 
@@ -203,7 +203,7 @@ class BFHashIndices extends CheckProperties {
   }
 }
 
-class BloomFilterFalsePositives[T: Gen: Hash128](falsePositiveRate: Double) extends ApproximateProperty {
+class BloomFilterFalsePositives[T: Gen : Hash128](falsePositiveRate: Double) extends ApproximateProperty {
 
   type Exact = Set[T]
   type Approx = BF[T]
@@ -237,7 +237,7 @@ class BloomFilterFalsePositives[T: Gen: Hash128](falsePositiveRate: Double) exte
   def approximateResult(bf: BF[T], t: T) = bf.contains(t)
 }
 
-class BloomFilterCardinality[T: Gen: Hash128] extends ApproximateProperty {
+class BloomFilterCardinality[T: Gen : Hash128] extends ApproximateProperty {
 
   type Exact = Set[T]
   type Approx = BF[T]
@@ -264,10 +264,12 @@ class BloomFilterCardinality[T: Gen: Hash128] extends ApproximateProperty {
   def inputGenerator(set: Set[T]) = Gen.const(())
 
   def exactResult(s: Set[T], u: Unit) = s.size
+
   def approximateResult(bf: BF[T], u: Unit) = bf.size
 }
 
 class BloomFilterProperties extends ApproximateProperties("BloomFilter") {
+
   import ApproximateProperty.toProp
 
   for (falsePositiveRate <- List(0.1, 0.01, 0.001)) {
@@ -304,54 +306,53 @@ class BloomFilterTest extends WordSpec with Matchers {
     }
 
     "identify all true positives" in {
-      (0 to 100).foreach { _ =>
-        {
-          val bfMonoid = new BloomFilterMonoid[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
-          val numEntries = 5
-          val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
-          val bf = bfMonoid.create(entries: _*)
-
-          entries.foreach { i =>
-            assert(bf.contains(i.toString).isTrue)
-          }
+      (0 to 100).foreach { _ => {
+        val bfMonoid = new BloomFilterMonoid[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
+        val numEntries = 5
+        val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
+        val bf = bfMonoid.create(entries: _*)
+
+        entries.foreach { i =>
+          assert(bf.contains(i.toString).isTrue)
         }
       }
+      }
     }
 
     "have small false positive rate" in {
       val iter = 10000
 
-      Seq(0.1, 0.01, 0.001).foreach { fpProb =>
-        {
-          val fps = (0 until iter).par.map { _ =>
-            {
-              val numEntries = RAND.nextInt(10) + 1
+      Seq(0.1, 0.01, 0.001).foreach { fpProb => {
+        val fps = (0 until iter).par.map { _ => {
+          val numEntries = RAND.nextInt(10) + 1
 
-              val bfMonoid = BloomFilter[String](numEntries, fpProb)
+          val bfMonoid = BloomFilter[String](numEntries, fpProb)
 
-              val entries = RAND
-                .shuffle((0 until 1000).toList)
-                .take(numEntries + 1)
-                .map(_.toString)
-              val bf = bfMonoid.create(entries.drop(1): _*)
+          val entries = RAND
+            .shuffle((0 until 1000).toList)
+            .take(numEntries + 1)
+            .map(_.toString)
+          val bf = bfMonoid.create(entries.drop(1): _*)
 
-              if (bf.contains(entries(0)).isTrue) 1.0 else 0.0
-            }
-          }
+          if (bf.contains(entries(0)).isTrue) 1.0 else 0.0
+        }
+        }
 
-          val observedFpProb = fps.sum / fps.size
+        val observedFpProb = fps.sum / fps.size
 
-          // the 2.5 is a fudge factor to make the probability of it low
-          // in tests
-          assert(observedFpProb <= 2.5 * fpProb)
-        }
+        // the 2.5 is a fudge factor to make the probability of it low
+        // in tests
+        assert(observedFpProb <= 2.5 * fpProb)
+      }
       }
     }
 
     "approximate cardinality" in {
       val bfMonoid = BloomFilterMonoid[String](10, 100000)
       Seq(10, 100, 1000, 10000).foreach { exactCardinality =>
-        val items = (1 until exactCardinality).map { _.toString }
+        val items = (1 until exactCardinality).map {
+          _.toString
+        }
         val bf = bfMonoid.create(items: _*)
         val size = bf.size
 
@@ -362,18 +363,17 @@ class BloomFilterTest extends WordSpec with Matchers {
     }
 
     "work as an Aggregator" in {
-      (0 to 10).foreach { _ =>
-        {
-          val aggregator = BloomFilterAggregator[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
-          val numEntries = 5
-          val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
-          val bf = aggregator(entries)
-
-          entries.foreach { i =>
-            assert(bf.contains(i.toString).isTrue)
-          }
+      (0 to 10).foreach { _ => {
+        val aggregator = BloomFilterAggregator[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
+        val numEntries = 5
+        val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
+        val bf = aggregator(entries)
+
+        entries.foreach { i =>
+          assert(bf.contains(i.toString).isTrue)
         }
       }
+      }
     }
 
     "not serialize @transient dense BFInstance" in {
@@ -397,8 +397,8 @@ class BloomFilterTest extends WordSpec with Matchers {
     }
 
     /**
-     * this test failed before the fix for https://github.com/twitter/algebird/issues/229
-     */
+      * this test failed before the fix for https://github.com/twitter/algebird/issues/229
+      */
     "not have negative hash values" in {
       val NUM_HASHES = 2
       val WIDTH = 4752800
@@ -408,33 +408,48 @@ class BloomFilterTest extends WordSpec with Matchers {
 
       assert(index >= 0)
     }
+    "return his size event if it's saturated" in {
+      val bfMonoid = BloomFilterMonoid[String](5, 13)
+      val strings = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10).map(_.toString)
+      val bf = bfMonoid.create(strings: _*)
+
+      assert(bf.size.min > 0)
+    }
+
+    "return a max approximate size if it's saturated" in {
+      val bfMonoid = BloomFilterMonoid[String](5, 13)
+      val strings = List(8, 9, 8, 10, 1, 8, 11, 7, 7, 1).map(_.toString)
+      val bf = bfMonoid.create(strings: _*)
+      // even it's seems big. This assert fail..
+      assert(bf.size.max < strings.length * 100)
+    }
+
   }
 
   "BloomFilter method `checkAndAdd`" should {
 
     "be identical to method `+`" in {
-      (0 to 100).foreach { _ =>
-        {
-          val bfMonoid = new BloomFilterMonoid[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
-          val numEntries = 5
-          val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
-          val bf = bfMonoid.create(entries: _*)
-          val bfWithCheckAndAdd = entries
-            .map { entry =>
-              (entry, bfMonoid.create(entry))
-            }
-            .foldLeft((bfMonoid.zero, bfMonoid.zero)) {
-              case ((left, leftAlt), (entry, right)) =>
-                val (newLeftAlt, contained) = leftAlt.checkAndAdd(entry)
-                left.contains(entry) shouldBe contained
-                (left + entry, newLeftAlt)
-            }
-
-          entries.foreach { i =>
-            assert(bf.contains(i.toString).isTrue)
+      (0 to 100).foreach { _ => {
+        val bfMonoid = new BloomFilterMonoid[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
+        val numEntries = 5
+        val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
+        val bf = bfMonoid.create(entries: _*)
+        val bfWithCheckAndAdd = entries
+          .map { entry =>
+            (entry, bfMonoid.create(entry))
           }
+          .foldLeft((bfMonoid.zero, bfMonoid.zero)) {
+            case ((left, leftAlt), (entry, right)) =>
+              val (newLeftAlt, contained) = leftAlt.checkAndAdd(entry)
+              left.contains(entry) shouldBe contained
+              (left + entry, newLeftAlt)
+          }
+
+        entries.foreach { i =>
+          assert(bf.contains(i.toString).isTrue)
         }
       }
+      }
     }
   }
 

From dd53848235251bb1b2f8ef4e8e279adf0be930ce Mon Sep 17 00:00:00 2001
From: cesar colle <cesar.colle@gmail.com>
Date: Wed, 29 Aug 2018 23:05:29 +0200
Subject: [PATCH 2/5] add scalafmt's algebird

---
 .../twitter/algebird/BloomFilterTest.scala    | 131 +++++++++---------
 1 file changed, 68 insertions(+), 63 deletions(-)

diff --git a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
index b704509bc..68e7550e0 100644
--- a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
+++ b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
@@ -11,7 +11,7 @@ object BloomFilterTestUtils {
     case BFZero(hashes, width) => BFSparse(hashes, RichCBitSet(), width)
     case BFItem(item, hashes, width) =>
       BFSparse(hashes, RichCBitSet.fromArray(hashes(item)), width)
-    case bfs@BFSparse(_, _, _) => bfs
+    case bfs @ BFSparse(_, _, _) => bfs
     case BFInstance(hashes, bitset, width) =>
       BFSparse(hashes, RichCBitSet.fromBitSet(bitset), width)
   }
@@ -22,8 +22,8 @@ object BloomFilterTestUtils {
       val bs = LongBitSet.empty(width)
       bs += hashes(item)
       BFInstance(hashes, bs.toBitSetNoCopy, width)
-    case bfs@BFSparse(hashes, bitset, width) => bfs.dense
-    case bfi@BFInstance(hashes, bitset, width) => bfi
+    case bfs @ BFSparse(hashes, bitset, width)   => bfs.dense
+    case bfi @ BFInstance(hashes, bitset, width) => bfi
   }
 }
 
@@ -117,7 +117,7 @@ class BloomFilterLaws extends CheckProperties {
       val next1 = a + b
 
       Equiv[BF[String]].equiv(next, next1) &&
-        (check == a.contains(b))
+      (check == a.contains(b))
     }
   }
 
@@ -155,8 +155,8 @@ class BFHashIndices extends CheckProperties {
   }
 
   /**
-    * This is the version of the BFHash as of before the "negative values fix"
-    */
+   * This is the version of the BFHash as of before the "negative values fix"
+   */
   case class NegativeBFHash(numHashes: Int, width: Int) {
     val size = numHashes
 
@@ -203,7 +203,7 @@ class BFHashIndices extends CheckProperties {
   }
 }
 
-class BloomFilterFalsePositives[T: Gen : Hash128](falsePositiveRate: Double) extends ApproximateProperty {
+class BloomFilterFalsePositives[T: Gen: Hash128](falsePositiveRate: Double) extends ApproximateProperty {
 
   type Exact = Set[T]
   type Approx = BF[T]
@@ -237,7 +237,7 @@ class BloomFilterFalsePositives[T: Gen : Hash128](falsePositiveRate: Double) ext
   def approximateResult(bf: BF[T], t: T) = bf.contains(t)
 }
 
-class BloomFilterCardinality[T: Gen : Hash128] extends ApproximateProperty {
+class BloomFilterCardinality[T: Gen: Hash128] extends ApproximateProperty {
 
   type Exact = Set[T]
   type Approx = BF[T]
@@ -306,44 +306,47 @@ class BloomFilterTest extends WordSpec with Matchers {
     }
 
     "identify all true positives" in {
-      (0 to 100).foreach { _ => {
-        val bfMonoid = new BloomFilterMonoid[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
-        val numEntries = 5
-        val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
-        val bf = bfMonoid.create(entries: _*)
-
-        entries.foreach { i =>
-          assert(bf.contains(i.toString).isTrue)
+      (0 to 100).foreach { _ =>
+        {
+          val bfMonoid = new BloomFilterMonoid[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
+          val numEntries = 5
+          val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
+          val bf = bfMonoid.create(entries: _*)
+
+          entries.foreach { i =>
+            assert(bf.contains(i.toString).isTrue)
+          }
         }
       }
-      }
     }
 
     "have small false positive rate" in {
       val iter = 10000
 
-      Seq(0.1, 0.01, 0.001).foreach { fpProb => {
-        val fps = (0 until iter).par.map { _ => {
-          val numEntries = RAND.nextInt(10) + 1
+      Seq(0.1, 0.01, 0.001).foreach { fpProb =>
+        {
+          val fps = (0 until iter).par.map { _ =>
+            {
+              val numEntries = RAND.nextInt(10) + 1
 
-          val bfMonoid = BloomFilter[String](numEntries, fpProb)
+              val bfMonoid = BloomFilter[String](numEntries, fpProb)
 
-          val entries = RAND
-            .shuffle((0 until 1000).toList)
-            .take(numEntries + 1)
-            .map(_.toString)
-          val bf = bfMonoid.create(entries.drop(1): _*)
+              val entries = RAND
+                .shuffle((0 until 1000).toList)
+                .take(numEntries + 1)
+                .map(_.toString)
+              val bf = bfMonoid.create(entries.drop(1): _*)
 
-          if (bf.contains(entries(0)).isTrue) 1.0 else 0.0
-        }
-        }
+              if (bf.contains(entries(0)).isTrue) 1.0 else 0.0
+            }
+          }
 
-        val observedFpProb = fps.sum / fps.size
+          val observedFpProb = fps.sum / fps.size
 
-        // the 2.5 is a fudge factor to make the probability of it low
-        // in tests
-        assert(observedFpProb <= 2.5 * fpProb)
-      }
+          // the 2.5 is a fudge factor to make the probability of it low
+          // in tests
+          assert(observedFpProb <= 2.5 * fpProb)
+        }
       }
     }
 
@@ -363,17 +366,18 @@ class BloomFilterTest extends WordSpec with Matchers {
     }
 
     "work as an Aggregator" in {
-      (0 to 10).foreach { _ => {
-        val aggregator = BloomFilterAggregator[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
-        val numEntries = 5
-        val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
-        val bf = aggregator(entries)
-
-        entries.foreach { i =>
-          assert(bf.contains(i.toString).isTrue)
+      (0 to 10).foreach { _ =>
+        {
+          val aggregator = BloomFilterAggregator[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
+          val numEntries = 5
+          val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
+          val bf = aggregator(entries)
+
+          entries.foreach { i =>
+            assert(bf.contains(i.toString).isTrue)
+          }
         }
       }
-      }
     }
 
     "not serialize @transient dense BFInstance" in {
@@ -397,8 +401,8 @@ class BloomFilterTest extends WordSpec with Matchers {
     }
 
     /**
-      * this test failed before the fix for https://github.com/twitter/algebird/issues/229
-      */
+     * this test failed before the fix for https://github.com/twitter/algebird/issues/229
+     */
     "not have negative hash values" in {
       val NUM_HASHES = 2
       val WIDTH = 4752800
@@ -429,27 +433,28 @@ class BloomFilterTest extends WordSpec with Matchers {
   "BloomFilter method `checkAndAdd`" should {
 
     "be identical to method `+`" in {
-      (0 to 100).foreach { _ => {
-        val bfMonoid = new BloomFilterMonoid[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
-        val numEntries = 5
-        val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
-        val bf = bfMonoid.create(entries: _*)
-        val bfWithCheckAndAdd = entries
-          .map { entry =>
-            (entry, bfMonoid.create(entry))
+      (0 to 100).foreach { _ =>
+        {
+          val bfMonoid = new BloomFilterMonoid[String](RAND.nextInt(5) + 1, RAND.nextInt(64) + 32)
+          val numEntries = 5
+          val entries = (0 until numEntries).map(_ => RAND.nextInt.toString)
+          val bf = bfMonoid.create(entries: _*)
+          val bfWithCheckAndAdd = entries
+            .map { entry =>
+              (entry, bfMonoid.create(entry))
+            }
+            .foldLeft((bfMonoid.zero, bfMonoid.zero)) {
+              case ((left, leftAlt), (entry, right)) =>
+                val (newLeftAlt, contained) = leftAlt.checkAndAdd(entry)
+                left.contains(entry) shouldBe contained
+                (left + entry, newLeftAlt)
+            }
+
+          entries.foreach { i =>
+            assert(bf.contains(i.toString).isTrue)
           }
-          .foldLeft((bfMonoid.zero, bfMonoid.zero)) {
-            case ((left, leftAlt), (entry, right)) =>
-              val (newLeftAlt, contained) = leftAlt.checkAndAdd(entry)
-              left.contains(entry) shouldBe contained
-              (left + entry, newLeftAlt)
-          }
-
-        entries.foreach { i =>
-          assert(bf.contains(i.toString).isTrue)
         }
       }
-      }
     }
   }
 

From d81a4d04402f71f13443c190b7500ddc5274dd6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9sar=20Coll=C3=A9?= <cesar.colle@gmail.com>
Date: Thu, 30 Aug 2018 08:48:42 +0200
Subject: [PATCH 3/5] Revert "Add Apple to 'Projects using Algebird' (#663)"

This reverts commit 9180862044b343b68d03d6f21ac6615aebdbf65b.
---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index be86a132b..f413c2824 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,6 @@ See [Algebird's page on the Scaladex](https://index.scala-lang.org/twitter/algeb
 - [Scio](https://github.com/spotify/scio)
 - [Packetloop](https://www.packetloop.com) (see [this tweet](https://twitter.com/cloudjunky/status/355073917720858626))
 - Ebay uses Algebird for machine learning: [ScalaDays talk](http://www.slideshare.net/VitalyGordon/scalable-and-flexible-machine-learning-with-scala-linkedin)
-- [Apple (FEAR Team)](https://news.ycombinator.com/item?id=16969118)
 
 Other projects built with Algebird, as compiled by the Scaladex: [![Scaladex Dependents](https://index.scala-lang.org/count.svg?q=dependencies:twitter/algebird*&subject=scaladex:&color=blue&style=flat-square)](https://index.scala-lang.org/search?q=dependencies:twitter/algebird-core)
 

From 8043f7a597db352445c6b8fd06a05c356b8821fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9sar=20Coll=C3=A9?= <cesar.colle@gmail.com>
Date: Thu, 30 Aug 2018 08:57:15 +0200
Subject: [PATCH 4/5] Add faillur test

---
 .../com/twitter/algebird/BloomFilterTest.scala    | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
index 68e7550e0..3c9173886 100644
--- a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
+++ b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
@@ -2,9 +2,9 @@ package com.twitter.algebird
 
 import java.io.{ByteArrayOutputStream, ObjectOutputStream}
 
-import org.scalacheck.Prop._
-import org.scalacheck.{Arbitrary, Gen}
+import org.scalacheck.{Arbitrary, Gen, Properties}
 import org.scalatest.{Matchers, WordSpec}
+import org.scalacheck.Prop._
 
 object BloomFilterTestUtils {
   def toSparse[A](bf: BF[A]): BFSparse[A] = bf match {
@@ -29,8 +29,8 @@ object BloomFilterTestUtils {
 
 class BloomFilterLaws extends CheckProperties {
 
-  import BloomFilterTestUtils._
   import com.twitter.algebird.BaseProperties._
+  import BloomFilterTestUtils._
 
   val NUM_HASHES = 6
   val WIDTH = 32
@@ -155,7 +155,7 @@ class BFHashIndices extends CheckProperties {
   }
 
   /**
-   * This is the version of the BFHash as of before the "negative values fix"
+   *   This is the version of the BFHash as of before the "negative values fix"
    */
   case class NegativeBFHash(numHashes: Int, width: Int) {
     val size = numHashes
@@ -264,12 +264,10 @@ class BloomFilterCardinality[T: Gen: Hash128] extends ApproximateProperty {
   def inputGenerator(set: Set[T]) = Gen.const(())
 
   def exactResult(s: Set[T], u: Unit) = s.size
-
   def approximateResult(bf: BF[T], u: Unit) = bf.size
 }
 
 class BloomFilterProperties extends ApproximateProperties("BloomFilter") {
-
   import ApproximateProperty.toProp
 
   for (falsePositiveRate <- List(0.1, 0.01, 0.001)) {
@@ -353,9 +351,7 @@ class BloomFilterTest extends WordSpec with Matchers {
     "approximate cardinality" in {
       val bfMonoid = BloomFilterMonoid[String](10, 100000)
       Seq(10, 100, 1000, 10000).foreach { exactCardinality =>
-        val items = (1 until exactCardinality).map {
-          _.toString
-        }
+        val items = (1 until exactCardinality).map { _.toString }
         val bf = bfMonoid.create(items: _*)
         val size = bf.size
 
@@ -412,6 +408,7 @@ class BloomFilterTest extends WordSpec with Matchers {
 
       assert(index >= 0)
     }
+
     "return his size event if it's saturated" in {
       val bfMonoid = BloomFilterMonoid[String](5, 13)
       val strings = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10).map(_.toString)

From ee3b2db1570386122a5748e4b2987cbfdcc1c9b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9sar=20Coll=C3=A9?= <cesar.colle@gmail.com>
Date: Fri, 7 Sep 2018 18:43:25 +0200
Subject: [PATCH 5/5] Fix #632 : BloomFilter saturated exception

From the paper linked to the size estimation of a bloomfilter,
we have a inegality mixing t = number bits to 1 of the filter, m=width of filter, numHash=k

        nl <= S-1(t-1)
        nr >= S-1(t+1)
S-1 is defined like :

	S-1(t) = ln(1 - t/m) / k.ln(1 - 1/m)

problem occur when _t = m_

	S-1(m) = ln(0) / k.ln(1 - 1/m)

but *ln(-1/m)* is not correct.

problem is :

	ln( 1 - 1/m) < 0
The scala maths library give :

	scala> val infinity = scala.math.log(0) / -42
	infinity: Double = Infinity

	scala> infinity.round.toInt
	res24: Int = -1

just add special case for the S-1 function.
---
 .../com/twitter/algebird/BloomFilter.scala     |  5 ++++-
 .../com/twitter/algebird/BloomFilterTest.scala | 18 +++++-------------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/algebird-core/src/main/scala/com/twitter/algebird/BloomFilter.scala b/algebird-core/src/main/scala/com/twitter/algebird/BloomFilter.scala
index f02f6c948..6d81ebe88 100644
--- a/algebird-core/src/main/scala/com/twitter/algebird/BloomFilter.scala
+++ b/algebird-core/src/main/scala/com/twitter/algebird/BloomFilter.scala
@@ -157,13 +157,16 @@ object BloomFilter {
      * This is \hat{S}^{-1}(t) in the cardinality estimation paper used above.
      */
     def sInverse(t: Int): Double =
-      scala.math.log1p(-t.toDouble / width) / (numHashes * scala.math.log1p(-1.0 / width))
+      if (numBits == width) 0.0
+      else
+        scala.math.log(1 - t.toDouble / width) / (numHashes * scala.math.log1p(-1.0 / width))
 
     // Variable names correspond to those used in the paper.
     val t = numBits
     val n = sInverse(t).round.toInt
     // Take the min and max because the probability formula assumes
     // nl <= sInverse(t - 1) and sInverse(t + 1) <= nr
+
     val nl =
       scala.math.min(sInverse(t - 1).floor, (1 - approximationWidth) * n).toInt
     val nr =
diff --git a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
index 3c9173886..905799a79 100644
--- a/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
+++ b/algebird-test/src/test/scala/com/twitter/algebird/BloomFilterTest.scala
@@ -354,7 +354,6 @@ class BloomFilterTest extends WordSpec with Matchers {
         val items = (1 until exactCardinality).map { _.toString }
         val bf = bfMonoid.create(items: _*)
         val size = bf.size
-
         assert(size ~ exactCardinality)
         assert(size.min <= size.estimate)
         assert(size.max >= size.estimate)
@@ -408,23 +407,16 @@ class BloomFilterTest extends WordSpec with Matchers {
 
       assert(index >= 0)
     }
+  }
 
-    "return his size event if it's saturated" in {
-      val bfMonoid = BloomFilterMonoid[String](5, 13)
-      val strings = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10).map(_.toString)
-      val bf = bfMonoid.create(strings: _*)
-
-      assert(bf.size.min > 0)
-    }
+  "BloomFilter method `size`" should {
 
-    "return a max approximate size if it's saturated" in {
+    "return the appropriate size when it's saturated " in {
       val bfMonoid = BloomFilterMonoid[String](5, 13)
-      val strings = List(8, 9, 8, 10, 1, 8, 11, 7, 7, 1).map(_.toString)
+      val strings = List(8, 9, 8, 10, 1, 8, 11, 12, 13, 14, 15, 67, 18981, 1122, 86787).map(_.toString)
       val bf = bfMonoid.create(strings: _*)
-      // even it's seems big. This assert fail..
-      assert(bf.size.max < strings.length * 100)
+      assert(bf.size.isZero)
     }
-
   }
 
   "BloomFilter method `checkAndAdd`" should {