Skip to content

Commit

Permalink
Switch to hasher based hashing.
Browse files Browse the repository at this point in the history
Signed-off-by: Maxim Zaks <[email protected]>
  • Loading branch information
mzaks committed Nov 11, 2024
1 parent c46f26e commit c21d14a
Show file tree
Hide file tree
Showing 30 changed files with 1,248 additions and 1,247 deletions.
153 changes: 78 additions & 75 deletions stdlib/benchmarks/hashlib/bench_hash.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
from benchmark import Bench, BenchConfig, Bencher, BenchId
from bit import byte_swap, rotate_bits_left
from memory import UnsafePointer
from hashlib.hash import hash as old_hash
from hashlib._ahash import (
from hashlib.ahash import (
AHasher,
_folded_multiply,
_read_small,
Expand All @@ -27,7 +26,7 @@ from hashlib._ahash import (
MULTIPLE,
ROT,
)
from hashlib._hasher import _hash_with_hasher
from hashlib.fnv1a import Fnv1a

# Source: https://www.101languages.net/arabic/most-common-arabic-words/
alias words_ar = """
Expand Down Expand Up @@ -598,50 +597,25 @@ fn gen_word_pairs[words: String = words_en]() -> List[String]:
# Benchmarks
# ===----------------------------------------------------------------------===#
@parameter
fn bench_small_keys[s: String](inout b: Bencher) raises:
fn bench_small_keys[s: String, H: Hasher](inout b: Bencher) raises:
var words = gen_word_pairs[s]()

@always_inline
@parameter
fn call_fn():
for w in words:
var h = old_hash(w[])
var h = hash[HasherType=H](w[])
benchmark.keep(h)

b.iter[call_fn]()


@parameter
fn bench_small_keys_new_hash_function[s: String](inout b: Bencher) raises:
var words = gen_word_pairs[s]()

@always_inline
@parameter
fn call_fn():
for w in words:
var h = _hash_with_hasher(w[].unsafe_ptr(), w[].byte_length())
benchmark.keep(h)

b.iter[call_fn]()


@parameter
fn bench_long_key[s: String](inout b: Bencher) raises:
@always_inline
@parameter
fn call_fn():
var h = old_hash(s)
benchmark.keep(h)

b.iter[call_fn]()


@parameter
fn bench_long_key_new_hash_function[s: String](inout b: Bencher) raises:
fn bench_long_key[s: String, H: Hasher](inout b: Bencher) raises:
@always_inline
@parameter
fn call_fn():
var h = _hash_with_hasher(s.unsafe_ptr(), s.byte_length())
var h = hash[HasherType=H](s)
benchmark.keep(h)

b.iter[call_fn]()
Expand All @@ -651,64 +625,93 @@ fn bench_long_key_new_hash_function[s: String](inout b: Bencher) raises:
# Benchmark Main
# ===----------------------------------------------------------------------===#
def main():
alias ahasher = AHasher[SIMD[DType.uint64, 4](0)]
var m = Bench(BenchConfig(num_repetitions=1))
m.bench_function[bench_small_keys[words_ar]](BenchId("bench_small_keys_ar"))
m.bench_function[bench_small_keys_new_hash_function[words_ar]](
BenchId("bench_small_keys_new_ar")
m.bench_function[bench_small_keys[words_ar, ahasher]](
BenchId("bench_small_keys_ar_ahash")
)
m.bench_function[bench_small_keys[words_el, ahasher]](
BenchId("bench_small_keys_el_ahash")
)
m.bench_function[bench_small_keys[words_en, ahasher]](
BenchId("bench_small_keys_en_ahash")
)
m.bench_function[bench_small_keys[words_he, ahasher]](
BenchId("bench_small_keys_he_ahash")
)
m.bench_function[bench_small_keys[words_lv, ahasher]](
BenchId("bench_small_keys_lv_ahash")
)
m.bench_function[bench_small_keys[words_pl, ahasher]](
BenchId("bench_small_keys_pl_ahash")
)
m.bench_function[bench_small_keys[words_ru, ahasher]](
BenchId("bench_small_keys_ru_ahash")
)
m.bench_function[bench_small_keys[words_el]](BenchId("bench_small_keys_el"))
m.bench_function[bench_small_keys_new_hash_function[words_el]](
BenchId("bench_small_keys_new_el")

m.bench_function[bench_small_keys[words_ar, Fnv1a]](
BenchId("bench_small_keys_ar_fnv1a")
)
m.bench_function[bench_small_keys[words_el, Fnv1a]](
BenchId("bench_small_keys_el_fnv1a")
)
m.bench_function[bench_small_keys[words_en]](BenchId("bench_small_keys_en"))
m.bench_function[bench_small_keys_new_hash_function[words_en]](
BenchId("bench_small_keys_new_en")
m.bench_function[bench_small_keys[words_en, Fnv1a]](
BenchId("bench_small_keys_en_fnv1a")
)
m.bench_function[bench_small_keys[words_he]](BenchId("bench_small_keys_he"))
m.bench_function[bench_small_keys_new_hash_function[words_he]](
BenchId("bench_small_keys_new_he")
m.bench_function[bench_small_keys[words_he, Fnv1a]](
BenchId("bench_small_keys_he_fnv1a")
)
m.bench_function[bench_small_keys[words_lv]](BenchId("bench_small_keys_lv"))
m.bench_function[bench_small_keys_new_hash_function[words_lv]](
BenchId("bench_small_keys_new_lv")
m.bench_function[bench_small_keys[words_lv, Fnv1a]](
BenchId("bench_small_keys_lv_fnv1a")
)
m.bench_function[bench_small_keys[words_pl]](BenchId("bench_small_keys_pl"))
m.bench_function[bench_small_keys_new_hash_function[words_pl]](
BenchId("bench_small_keys_new_pl")
m.bench_function[bench_small_keys[words_pl, Fnv1a]](
BenchId("bench_small_keys_pl_fnv1a")
)
m.bench_function[bench_small_keys[words_ru]](BenchId("bench_small_keys_ru"))
m.bench_function[bench_small_keys_new_hash_function[words_ru]](
BenchId("bench_small_keys_new_ru")
m.bench_function[bench_small_keys[words_ru, Fnv1a]](
BenchId("bench_small_keys_ru_fnv1a")
)

m.bench_function[bench_long_key[words_ar]](BenchId("bench_long_key_ar"))
m.bench_function[bench_long_key_new_hash_function[words_ar]](
BenchId("bench_long_key_new_ar")
m.bench_function[bench_long_key[words_ar, ahasher]](
BenchId("bench_long_key_ar_ahash")
)
m.bench_function[bench_long_key[words_el, ahasher]](
BenchId("bench_long_key_el_ahash")
)
m.bench_function[bench_long_key[words_en, ahasher]](
BenchId("bench_long_key_keys_en_ahash")
)
m.bench_function[bench_long_key[words_he, ahasher]](
BenchId("bench_long_key_he_ahash")
)
m.bench_function[bench_long_key[words_el]](BenchId("bench_long_key_el"))
m.bench_function[bench_long_key_new_hash_function[words_el]](
BenchId("bench_long_key_new_el")
m.bench_function[bench_long_key[words_lv, ahasher]](
BenchId("bench_long_key_lv_ahash")
)
m.bench_function[bench_long_key[words_pl, ahasher]](
BenchId("bench_long_key_pl_ahash")
)
m.bench_function[bench_long_key[words_ru, ahasher]](
BenchId("bench_long_key_ru_ahash")
)

m.bench_function[bench_long_key[words_ar, Fnv1a]](
BenchId("bench_long_key_ar_fnv1a")
)
m.bench_function[bench_long_key[words_en]](
BenchId("bench_long_key_keys_en")
m.bench_function[bench_long_key[words_el, Fnv1a]](
BenchId("bench_long_key_el_fnv1a")
)
m.bench_function[bench_long_key_new_hash_function[words_en]](
BenchId("bench_long_key_new_en")
m.bench_function[bench_long_key[words_en, Fnv1a]](
BenchId("bench_long_key_keys_en_fnv1a")
)
m.bench_function[bench_long_key[words_he]](BenchId("bench_long_key_he"))
m.bench_function[bench_long_key_new_hash_function[words_he]](
BenchId("bench_long_key_new_he")
m.bench_function[bench_long_key[words_he, Fnv1a]](
BenchId("bench_long_key_he_fnv1a")
)
m.bench_function[bench_long_key[words_lv]](BenchId("bench_long_key_lv"))
m.bench_function[bench_long_key_new_hash_function[words_lv]](
BenchId("bench_long_key_new_lv")
m.bench_function[bench_long_key[words_lv, Fnv1a]](
BenchId("bench_long_key_lv_fnv1a")
)
m.bench_function[bench_long_key[words_pl]](BenchId("bench_long_key_pl"))
m.bench_function[bench_long_key_new_hash_function[words_pl]](
BenchId("bench_long_key_new_pl")
m.bench_function[bench_long_key[words_pl, Fnv1a]](
BenchId("bench_long_key_pl_fnv1a")
)
m.bench_function[bench_long_key[words_ru]](BenchId("bench_long_key_ru"))
m.bench_function[bench_long_key_new_hash_function[words_ru]](
BenchId("bench_long_key_new_ru")
m.bench_function[bench_long_key[words_ru, Fnv1a]](
BenchId("bench_long_key_ru_fnv1a")
)
m.dump_report()
12 changes: 1 addition & 11 deletions stdlib/src/builtin/dtype.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ These are Mojo built-ins, so you don't need to import them.
"""

from collections import KeyElement
from hashlib._hasher import _HashableWithHasher, _Hasher
from sys import sizeof, bitwidthof, os_is_windows

alias _mIsSigned = UInt8(1)
Expand All @@ -33,7 +32,6 @@ struct DType(
Representable,
KeyElement,
CollectionElementNew,
_HashableWithHasher,
):
"""Represents DType and provides methods for working with it."""

Expand Down Expand Up @@ -299,15 +297,7 @@ struct DType(
self._as_i8(), rhs._as_i8()
)

fn __hash__(self) -> UInt:
"""Return a 64-bit hash for this `DType` value.
Returns:
A 64-bit integer hash of this `DType` value.
"""
return hash(UInt8(self._as_i8()))

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with this `DType` value.
Parameters:
Expand Down
16 changes: 1 addition & 15 deletions stdlib/src/builtin/int.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ These are Mojo built-ins, so you don't need to import them.
from collections import KeyElement

from math import Ceilable, CeilDivable, Floorable, Truncable
from hashlib.hash import _hash_simd
from hashlib._hasher import _HashableWithHasher, _Hasher
from builtin.io import _snprintf
from collections.string import (
_calc_initial_buffer_size_int32,
Expand Down Expand Up @@ -289,7 +287,6 @@ struct Int(
KeyElement,
Roundable,
IntLike,
_HashableWithHasher,
):
"""This type represents an integer value."""

Expand Down Expand Up @@ -1105,18 +1102,7 @@ struct Int(
"""
return str(self)

fn __hash__(self) -> UInt:
"""Hash the int using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
# TODO(MOCO-636): switch to DType.index
return _hash_simd(Scalar[DType.int64](self))

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with this int value.
Parameters:
Expand Down
15 changes: 1 addition & 14 deletions stdlib/src/builtin/simd.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ from bit import pop_count
from documentation import doc_private
from math import Ceilable, CeilDivable, Floorable, Truncable
from builtin.dtype import _uint_type_of_width
from hashlib.hash import _hash_simd
from hashlib._hasher import _HashableWithHasher, _Hasher
from builtin.format_int import _try_write_int
from builtin._format_float import _write_float
from builtin.io import _snprintf
Expand Down Expand Up @@ -179,7 +177,6 @@ struct SIMD[type: DType, size: Int](
Floorable,
Writable,
Hashable,
_HashableWithHasher,
Intable,
IntLike,
Representable,
Expand Down Expand Up @@ -1541,17 +1538,7 @@ struct SIMD[type: DType, size: Int](
# TODO: see how can we implement this.
return llvm_intrinsic["llvm.round", Self, has_side_effect=False](self)

fn __hash__(self) -> UInt:
"""Hash the value using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
return _hash_simd(self)

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with this SIMD value.
Parameters:
Expand Down
15 changes: 2 additions & 13 deletions stdlib/src/builtin/string_literal.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from sys.ffi import c_char

from memory import memcpy, UnsafePointer
from collections import List
from hashlib._hasher import _HashableWithHasher, _Hasher
from hashlib.hasher import Hashable, Hasher
from utils import StringRef, Span, StringSlice, StaticString
from utils import Writable, Writer
from utils._visualizers import lldb_formatter_wrapping_type
Expand Down Expand Up @@ -50,7 +50,6 @@ struct StringLiteral(
Stringable,
FloatableRaising,
BytesCollectionElement,
_HashableWithHasher,
):
"""This type represents a string literal.
Expand Down Expand Up @@ -335,17 +334,7 @@ struct StringLiteral(
"""
return self.__str__().__repr__()

fn __hash__(self) -> UInt:
"""Hash the underlying buffer using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
return hash(self.unsafe_ptr(), len(self))

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with the underlying bytes.
Parameters:
Expand Down
17 changes: 2 additions & 15 deletions stdlib/src/builtin/uint.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@ These are Mojo built-ins, so you don't need to import them.
from sys import bitwidthof
from utils._visualizers import lldb_formatter_wrapping_type
from documentation import doc_private
from hashlib.hash import _hash_simd
from hashlib._hasher import _HashableWithHasher, _Hasher


@lldb_formatter_wrapping_type
@value
@register_passable("trivial")
struct UInt(IntLike, _HashableWithHasher):
struct UInt(IntLike, Hashable):
"""This type represents an unsigned integer.
An unsigned integer represents a positive integral number.
Expand Down Expand Up @@ -151,18 +149,7 @@ struct UInt(IntLike, _HashableWithHasher):
"""
return "UInt(" + str(self) + ")"

fn __hash__(self) -> UInt:
"""Hash the UInt using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
# TODO(MOCO-636): switch to DType.index
return _hash_simd(Scalar[DType.uint64](self))

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with this uint value.
Parameters:
Expand Down
Loading

0 comments on commit c21d14a

Please sign in to comment.