From 3a63f18caeacf4303850db3f251f45ae1f1b579f Mon Sep 17 00:00:00 2001 From: Havoc Pennington Date: Thu, 2 Jul 2015 20:24:09 -0400 Subject: [PATCH 1/3] Reimplement ResolveMemos with a ridiculous hand-rolled hash table This is intended to band-aid #330 with a hash table that sucks, but ought to be much faster to copy than the stock Java one. The stock Java hash table rehashes everything every time, while this hash table can often just copy an array. That said, I didn't benchmark it or anything, it may well also be super slow. This is more or less a troll commit to get someone to do better. --- .../java/com/typesafe/config/impl/BadMap.java | 140 ++++++++++++++++++ .../typesafe/config/impl/ResolveMemos.java | 12 +- 2 files changed, 144 insertions(+), 8 deletions(-) create mode 100644 config/src/main/java/com/typesafe/config/impl/BadMap.java diff --git a/config/src/main/java/com/typesafe/config/impl/BadMap.java b/config/src/main/java/com/typesafe/config/impl/BadMap.java new file mode 100644 index 000000000..820fb3044 --- /dev/null +++ b/config/src/main/java/com/typesafe/config/impl/BadMap.java @@ -0,0 +1,140 @@ +package com.typesafe.config.impl; + +/** + * A terrible Map that isn't as expensive as HashMap to copy and + * add one item to. Please write something real if you see this + * and get cranky. + */ +final class BadMap { + final static class Entry { + final int hash; + final Object key; + final Object value; + final Entry next; + + Entry(int hash, Object k, Object v, Entry next) { + this.hash = hash; + this.key = k; + this.value = v; + this.next = next; + } + + Object find(Object k) { + if (key.equals(k)) + return value; + else if (next != null) + return next.find(k); + else + return null; + } + } + + final int size; + final Entry[] entries; + + private final static Entry[] emptyEntries = {}; + + BadMap() { + this(0, (Entry[]) emptyEntries); + } + + private BadMap(int size, Entry[] entries) { + this.size = size; + this.entries = entries; + } + + BadMap copyingPut(K k, V v) { + int newSize = size + 1; + Entry[] newEntries; + // The "- 1" is so we pick size 2 when newSize is 1. + int threshold = (newSize * 2) - 1; + if (threshold > (entries.length * 2)) { + // nextPrime doesn't always return a prime larger than + // we passed in, so this block may not actually change + // the entries size. + newEntries = new Entry[nextPrime(threshold)]; + } else { + newEntries = new Entry[entries.length]; + } + + if (newEntries.length == entries.length) + System.arraycopy(entries, 0, newEntries, 0, entries.length); + else + rehash(entries, newEntries); + + int hash = Math.abs(k.hashCode()); + store(newEntries, hash, k, v); + return new BadMap(newSize, newEntries); + } + + private static void store(Entry[] entries, int hash, K k, V v) { + int i = hash % entries.length; + Entry old = entries[i]; // old may be null + entries[i] = new Entry(hash, k, v, old); + } + + private static void store(Entry[] entries, Entry e) { + int i = e.hash % entries.length; + Entry old = entries[i]; // old may be null + if (old == null && e.next == null) { + // share the entry since it has no "next" + entries[i] = e; + } else { + // bah, have to copy it + entries[i] = new Entry(e.hash, e.key, e.value, old); + } + } + + private static void rehash(Entry[] src, Entry[] dest) { + for (Entry old : src) { + if (old != null) { + if (old.next == null) { + store(dest, old); + } else { + store(dest, old.hash, old.key, old.value); + store(dest, old.next); + } + } + } + } + + @SuppressWarnings("unchecked") + V get(K k) { + if (entries.length == 0) { + return null; + } else { + int hash = Math.abs(k.hashCode()); + int i = hash % entries.length; + Entry e = entries[i]; + if (e == null) + return null; + else + return (V) e.find(k); + } + } + + + private final static int[] primes = { + /* Skip some early ones that are close together */ + 2, /* 3, */ 5, /* 7, */ 11, /* 13, */ 17, /* 19, */ 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, + 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, + 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, + 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, + 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, + 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, + 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, + 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, + 947, 953, 967, 971, 977, 983, 991, 997, 1009, + /* now we start skipping some, this is arbitrary */ + 2053, 3079, 4057, 7103 + }; + + private final static int nextPrime(int i) { + for (int p : primes) { + if (p > i) + return p; + } + /* oh well */ + return primes[primes.length - 1]; + } +} diff --git a/config/src/main/java/com/typesafe/config/impl/ResolveMemos.java b/config/src/main/java/com/typesafe/config/impl/ResolveMemos.java index cf98a900d..4fe96b3ea 100644 --- a/config/src/main/java/com/typesafe/config/impl/ResolveMemos.java +++ b/config/src/main/java/com/typesafe/config/impl/ResolveMemos.java @@ -11,14 +11,14 @@ final class ResolveMemos { // note that we can resolve things to undefined (represented as Java null, // rather than ConfigNull) so this map can have null values. - final private Map memos; + final private BadMap memos; - private ResolveMemos(Map memos) { + private ResolveMemos(BadMap memos) { this.memos = memos; } ResolveMemos() { - this(new HashMap()); + this(new BadMap()); } AbstractConfigValue get(MemoKey key) { @@ -26,10 +26,6 @@ AbstractConfigValue get(MemoKey key) { } ResolveMemos put(MemoKey key, AbstractConfigValue value) { - // completely inefficient, but so far nobody cares about resolve() - // performance, we can clean it up someday... - Map copy = new HashMap(memos); - copy.put(key, value); - return new ResolveMemos(copy); + return new ResolveMemos(memos.copyingPut(key, value)); } } From 60011923919a6713952277ec9a5e602093118ec5 Mon Sep 17 00:00:00 2001 From: Havoc Pennington Date: Fri, 3 Jul 2015 09:38:47 -0400 Subject: [PATCH 2/3] minor cleanups to BadMap --- .../main/java/com/typesafe/config/impl/BadMap.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/config/src/main/java/com/typesafe/config/impl/BadMap.java b/config/src/main/java/com/typesafe/config/impl/BadMap.java index 820fb3044..0c59eba9d 100644 --- a/config/src/main/java/com/typesafe/config/impl/BadMap.java +++ b/config/src/main/java/com/typesafe/config/impl/BadMap.java @@ -46,13 +46,12 @@ private BadMap(int size, Entry[] entries) { BadMap copyingPut(K k, V v) { int newSize = size + 1; Entry[] newEntries; - // The "- 1" is so we pick size 2 when newSize is 1. - int threshold = (newSize * 2) - 1; - if (threshold > (entries.length * 2)) { + if (newSize > entries.length) { // nextPrime doesn't always return a prime larger than // we passed in, so this block may not actually change - // the entries size. - newEntries = new Entry[nextPrime(threshold)]; + // the entries size. the "-1" is to ensure we use + // array length 2 when going from 0 to 1. + newEntries = new Entry[nextPrime((newSize*2) - 1)]; } else { newEntries = new Entry[entries.length]; } @@ -126,7 +125,7 @@ V get(K k) { 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009, /* now we start skipping some, this is arbitrary */ - 2053, 3079, 4057, 7103 + 2053, 3079, 4057, 7103, 10949, 16069, 32609, 65867, 104729 }; private final static int nextPrime(int i) { From c1ec3af32937b79ac1bcb1c68223aeb889a6ba1c Mon Sep 17 00:00:00 2001 From: Sam Holeman Date: Fri, 10 Aug 2018 15:07:23 -0700 Subject: [PATCH 3/3] Tweak BadMap, add tests --- .../java/com/typesafe/config/impl/BadMap.java | 34 ++++--- .../typesafe/config/impl/ResolveMemos.java | 5 +- .../com/typesafe/config/impl/BadMapTest.scala | 94 +++++++++++++++++++ 3 files changed, 111 insertions(+), 22 deletions(-) create mode 100644 config/src/test/scala/com/typesafe/config/impl/BadMapTest.scala diff --git a/config/src/main/java/com/typesafe/config/impl/BadMap.java b/config/src/main/java/com/typesafe/config/impl/BadMap.java index 0c59eba9d..04b375367 100644 --- a/config/src/main/java/com/typesafe/config/impl/BadMap.java +++ b/config/src/main/java/com/typesafe/config/impl/BadMap.java @@ -29,13 +29,13 @@ else if (next != null) } } - final int size; - final Entry[] entries; + private final int size; + private final Entry[] entries; private final static Entry[] emptyEntries = {}; BadMap() { - this(0, (Entry[]) emptyEntries); + this(0, emptyEntries); } private BadMap(int size, Entry[] entries) { @@ -51,19 +51,20 @@ BadMap copyingPut(K k, V v) { // we passed in, so this block may not actually change // the entries size. the "-1" is to ensure we use // array length 2 when going from 0 to 1. - newEntries = new Entry[nextPrime((newSize*2) - 1)]; + newEntries = new Entry[nextPrime((newSize * 2) - 1)]; } else { newEntries = new Entry[entries.length]; } - if (newEntries.length == entries.length) + if (newEntries.length == entries.length) { System.arraycopy(entries, 0, newEntries, 0, entries.length); - else + } else { rehash(entries, newEntries); + } int hash = Math.abs(k.hashCode()); store(newEntries, hash, k, v); - return new BadMap(newSize, newEntries); + return new BadMap<>(newSize, newEntries); } private static void store(Entry[] entries, int hash, K k, V v) { @@ -72,7 +73,7 @@ private static void store(Entry[] entries, int hash, K k, V v) { entries[i] = new Entry(hash, k, v, old); } - private static void store(Entry[] entries, Entry e) { + private static void store(Entry[] entries, Entry e) { int i = e.hash % entries.length; Entry old = entries[i]; // old may be null if (old == null && e.next == null) { @@ -84,15 +85,12 @@ private static void store(Entry[] entries, Entry e) { } } - private static void rehash(Entry[] src, Entry[] dest) { - for (Entry old : src) { - if (old != null) { - if (old.next == null) { - store(dest, old); - } else { - store(dest, old.hash, old.key, old.value); - store(dest, old.next); - } + private static void rehash(Entry[] src, Entry[] dest) { + for (Entry entry : src) { + // have to store each "next" element individually; they may belong in different indices + while (entry != null) { + store(dest, entry); + entry = entry.next; } } } @@ -128,7 +126,7 @@ V get(K k) { 2053, 3079, 4057, 7103, 10949, 16069, 32609, 65867, 104729 }; - private final static int nextPrime(int i) { + private static int nextPrime(int i) { for (int p : primes) { if (p > i) return p; diff --git a/config/src/main/java/com/typesafe/config/impl/ResolveMemos.java b/config/src/main/java/com/typesafe/config/impl/ResolveMemos.java index 4fe96b3ea..941d44f65 100644 --- a/config/src/main/java/com/typesafe/config/impl/ResolveMemos.java +++ b/config/src/main/java/com/typesafe/config/impl/ResolveMemos.java @@ -1,8 +1,5 @@ package com.typesafe.config.impl; -import java.util.HashMap; -import java.util.Map; - /** * This exists because we have to memoize resolved substitutions as we go * through the config tree; otherwise we could end up creating multiple copies @@ -18,7 +15,7 @@ private ResolveMemos(BadMap memos) { } ResolveMemos() { - this(new BadMap()); + this(new BadMap<>()); } AbstractConfigValue get(MemoKey key) { diff --git a/config/src/test/scala/com/typesafe/config/impl/BadMapTest.scala b/config/src/test/scala/com/typesafe/config/impl/BadMapTest.scala new file mode 100644 index 000000000..bfc7af173 --- /dev/null +++ b/config/src/test/scala/com/typesafe/config/impl/BadMapTest.scala @@ -0,0 +1,94 @@ +package com.typesafe.config.impl + +import org.junit.Assert._ +import org.junit.Test + +class BadMapTest extends TestUtils { + @Test + def copyingPut(): Unit = { + val map = new BadMap[String, String]() + val copy = map.copyingPut("key", "value") + + assertNull(map.get("key")) + assertEquals("value", copy.get("key")) + } + + @Test + def retrieveOldElement(): Unit = { + val map = new BadMap[String, String]() + .copyingPut("key1", "value1") + .copyingPut("key2", "value2") + .copyingPut("key3", "value3") + + assertEquals("value1", map.get("key1")) + assertEquals("value2", map.get("key2")) + assertEquals("value3", map.get("key3")) + } + + @Test + def putOverride(): Unit = { + val map = new BadMap[String, String]() + .copyingPut("key", "value1") + .copyingPut("key", "value2") + .copyingPut("key", "value3") + + assertEquals("value3", map.get("key")) + } + + @Test + def notFound(): Unit = { + val map = new BadMap[String, String]() + + assertNull(map.get("invalid key")) + } + + @Test + def putMany(): Unit = { + val entries = (1 to 1000).map(i => (s"key$i", s"value$i")) + var map = new BadMap[String, String]() + + for ((key, value) <- entries) { + map = map.copyingPut(key, value) + } + + for ((key, value) <- entries) { + assertEquals(value, map.get(key)) + } + } + + @Test + def putSameHash(): Unit = { + val hash = 2 + val entries = (1 to 10).map(i => (new UniqueKeyWithHash(hash), s"value$i")) + var map = new BadMap[UniqueKeyWithHash, String]() + + for ((key, value) <- entries) { + map = map.copyingPut(key, value) + } + + for ((key, value) <- entries) { + assertEquals(value, map.get(key)) + } + } + + @Test + def putSameHashModLength(): Unit = { + // given that the table will eventually be the following size, we insert entries who should + // eventually all share the same index and then later be redistributed once rehashed + val size = 11 + val entries = (1 to size * 2).map(i => (new UniqueKeyWithHash(size * i), s"value$i")) + var map = new BadMap[UniqueKeyWithHash, String]() + + for ((key, value) <- entries) { + map = map.copyingPut(key, value) + } + + for ((key, value) <- entries) { + assertEquals(value, map.get(key)) + } + } + + private class UniqueKeyWithHash(hash: Int) { + override def hashCode(): Int = hash + } +}