From 466bafbc8c0cb4358bfb128f814bbc41af624ebc Mon Sep 17 00:00:00 2001 From: andrew Date: Fri, 17 Mar 2017 17:49:10 +0800 Subject: [PATCH 1/5] RegisterSet class set and updateIfGreater method update value, but have not check weather the value greater than 0x1f. may be that is a bug. --- .../analytics/stream/cardinality/RegisterSet.java | 2 ++ .../analytics/stream/cardinality/RegisterSetTest.java | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java index 89639c289..74449b4f6 100644 --- a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java +++ b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java @@ -57,6 +57,7 @@ public static int getSizeForCount(int count) { } public void set(int position, int value) { + value = value & 0x1f; int bucketPos = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD)); this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift); @@ -69,6 +70,7 @@ public int get(int position) { } public boolean updateIfGreater(int position, int value) { + value = value & 0x1f; int bucket = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD)); int mask = 0x1f << shift; diff --git a/src/test/java/com/clearspring/analytics/stream/cardinality/RegisterSetTest.java b/src/test/java/com/clearspring/analytics/stream/cardinality/RegisterSetTest.java index ee3c996b2..86fd4cee8 100644 --- a/src/test/java/com/clearspring/analytics/stream/cardinality/RegisterSetTest.java +++ b/src/test/java/com/clearspring/analytics/stream/cardinality/RegisterSetTest.java @@ -28,7 +28,14 @@ public class RegisterSetTest { public void testGetAndSet() throws Exception { RegisterSet rs = new RegisterSet((int) Math.pow(2, 4)); rs.set(0, 11); + rs.set(1, 11); + rs.set(2, 11); assertEquals(11, rs.get(0)); + assertEquals(11, rs.get(1)); + assertEquals(11, rs.get(2)); + rs.set(0, 0xFF); + assertEquals(11, rs.get(1)); + assertEquals(11, rs.get(2)); } @Test From b0c18c8a8c98bbf8bb156e32c1f41092b56bd42a Mon Sep 17 00:00:00 2001 From: wangc Date: Sat, 18 Mar 2017 02:57:15 +0800 Subject: [PATCH 2/5] assign value bigger than 0x1F to RegisterSet would cause a assert, if have not check the value. and I think maybe change the 0x1F depends on RegisterSet.REGISTER would be better. --- .../clearspring/analytics/stream/cardinality/RegisterSet.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java index 74449b4f6..89639c289 100644 --- a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java +++ b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java @@ -57,7 +57,6 @@ public static int getSizeForCount(int count) { } public void set(int position, int value) { - value = value & 0x1f; int bucketPos = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD)); this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift); @@ -70,7 +69,6 @@ public int get(int position) { } public boolean updateIfGreater(int position, int value) { - value = value & 0x1f; int bucket = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD)); int mask = 0x1f << shift; From 41d131c0c66c9d9ca3155a9d45c7f930e70c5ac3 Mon Sep 17 00:00:00 2001 From: wangc Date: Sat, 18 Mar 2017 03:08:13 +0800 Subject: [PATCH 3/5] This is the version of set/updateIfGreater after check value --- .../clearspring/analytics/stream/cardinality/RegisterSet.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java index 89639c289..338bb29c5 100644 --- a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java +++ b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java @@ -57,6 +57,7 @@ public static int getSizeForCount(int count) { } public void set(int position, int value) { + //value = value & 0x1F; //make sure value would not longger than REGISTER_SIZE int bucketPos = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD)); this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift); @@ -69,6 +70,7 @@ public int get(int position) { } public boolean updateIfGreater(int position, int value) { + //value = value & 0x1F; //make sure value would not longger than REGISTER_SIZE int bucket = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD)); int mask = 0x1f << shift; From 443cb4f85bed8c0bfdade09299448423b191f13c Mon Sep 17 00:00:00 2001 From: wangchuan2008888 Date: Tue, 21 Mar 2017 08:43:54 +0800 Subject: [PATCH 4/5] Update RegisterSet.java --- .../clearspring/analytics/stream/cardinality/RegisterSet.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java index 338bb29c5..068b03e55 100644 --- a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java +++ b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java @@ -57,7 +57,7 @@ public static int getSizeForCount(int count) { } public void set(int position, int value) { - //value = value & 0x1F; //make sure value would not longger than REGISTER_SIZE + value = value & 0x1F; //make sure value would not longger than REGISTER_SIZE int bucketPos = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD)); this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift); @@ -70,7 +70,7 @@ public int get(int position) { } public boolean updateIfGreater(int position, int value) { - //value = value & 0x1F; //make sure value would not longger than REGISTER_SIZE + value = value & 0x1F; //make sure value would not longger than REGISTER_SIZE int bucket = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD)); int mask = 0x1f << shift; From fb0b1301c024ed863cad40a1308983f1b596565d Mon Sep 17 00:00:00 2001 From: andrew Date: Mon, 10 Apr 2017 22:10:12 +0800 Subject: [PATCH 5/5] add getCardinality to bloom filter --- .../analytics/stream/membership/BloomFilter.java | 11 +++++++++++ .../analytics/stream/membership/BloomFilterTest.java | 11 ++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java b/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java index a374b088a..b778fd902 100644 --- a/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java +++ b/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java @@ -178,6 +178,17 @@ public static BloomFilter deserialize(byte[] bytes) { return filter; } + /** + * this method return the cardinality estimation of current bloom filter + * base on article "cardinality estimation for dynamic bloomfilter" + * */ + public long getCardinality(){ + int nhash = this.getHashCount(); + int mbits = this.buckets(); + + int bitsUsed = this.buckets() - this.emptyBuckets(); + return Math.round(1.0/nhash * Math.log(1.0-1.0*bitsUsed/mbits)/Math.log(1-1.0/mbits)); + } } class BloomFilterSerializer implements ICompactSerializer { diff --git a/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java b/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java index 178d53c6a..09a75ef29 100644 --- a/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java +++ b/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java @@ -27,6 +27,7 @@ import java.util.Set; import java.util.UUID; +import com.clearspring.analytics.TestUtils; import com.clearspring.analytics.stream.membership.KeyGenerator.RandomStringGenerator; import org.junit.Before; @@ -56,7 +57,15 @@ public BloomFilterTest() { public void clear() { bf.clear(); } - + @Test + public void testCardinality(){ + Random r = new Random(); + for(int i =0;i<7799;i++){ + String str = Integer.toHexString(r.nextInt()); + bf.add(str); + } + System.out.println(bf.getCardinality()); + } @Test public void testOne() { bf.add("a");