Version 2 Initial Commit

This commit drops all the non-generic, reflection-based code, including all previous subpackages. It also renames several functions, and moves the float code into a new subpackage. This represents a large change to the API and thus is a new major version.
shawnsmithdev · May 23, 2022 · d9207db · d9207db
1 parent e5b8c98
commit d9207db
Show file tree

Hide file tree

Showing 32 changed files with 804 additions and 1,284 deletions.
diff --git a/README.md b/README.md
@@ -1,13 +1,13 @@
-zermelo 
+zermelo v2 
 =========
 [![go.dev reference](https://img.shields.io/badge/go.dev-reference-007d9c?logo=go&logoColor=white&style=flat-square)](https://pkg.go.dev/github.com/shawnsmithdev/zermelo)
 [![license](http://img.shields.io/badge/license-MIT-red.svg?style=flat)](https://raw.githubusercontent.com/shawnsmithdev/zermelo/master/LICENSE)
 [![Go Report Card](https://goreportcard.com/badge/github.com/shawnsmithdev/zermelo)](https://goreportcard.com/report/github.com/shawnsmithdev/zermelo)
 
-A radix sorting library for Go.  Trade memory for speed!
+A radix sorting library for Go.  Trade memory for speed! Now with more generics!
 
 ```go
-import "github.com/shawnsmithdev/zermelo"
+import "github.com/shawnsmithdev/zermelo/v2"
 
 func foo(large []uint64)
     zermelo.SortIntegers(large)
@@ -35,29 +35,44 @@ Zermelo is named after [Ernst Zermelo](http://en.wikipedia.org/wiki/Ernst_Zermel
 
 Supported Types
 ===============
-`SortIntegers` and `IntSorter` support constraints.Integer slices,
+`Sort` and `NewSorter` support constraints.Integer slices,
 that is `[]int`, `[]uint64`, `[]byte`, etc, and derived types.
 
-`SortFloats` and `FloatSorter` support constraints.Float slices,
-specifically `[]float32` and `[]float64` and derived types.
 
 Sorter
 ======
 
-An `IntSorter` or `FloatSorter` will reuse buffers created during `Sort()` calls. This is not thread safe.
+A `Sorter` returned by `NewSorter` will reuse buffers created during `Sort()` calls. This is not thread safe.
 Buffers are grown as needed at a 25% exponential growth rate.  This means if you sort a slice of size `n`,
 subsequent calls with slices up to `n * 1.25` in length will not cause another buffer allocation. This does not apply
 to the first allocation, which will make a buffer of the same size as the requested slice. This way, if the slices being
 sorted do not grow in size, there is no unused buffer space.
 
 ```go
-import "github.com/shawnsmithdev/zermelo"
+import "github.com/shawnsmithdev/zermelo/v2"
 
 func foo(bar [][]uint64) {
-    sorter := zermelo.NewIntSorter[uint64]()
+    sorter := zermelo.NewSorter[uint64]()
     for _, x := range(bar) {
         sorter.Sort(x)
     }
 }
 
-```
+```
+
+Float Subpackage
+================
+`SortFloats` and `FloatSorter` provied in the `floats` subpackage support constraints.Float slices,
+specifically `[]float32` and `[]float64` and derived types.
+This uses the unsafe package to treat floats as though they were unsigned integers.
+
+```go
+import "github.com/shawnsmithdev/zermelo/v2/floats"
+
+func foo(bar [][]floats64) {
+    sorter := floats.NewFloatSorter[float64]()
+    for _, x := range(bar) {
+        sorter.Sort(x)
+    }
+}
+```
diff --git a/bench_test.go b/bench_test.go
@@ -1,10 +1,12 @@
 package zermelo
 
 import (
+	"github.com/shawnsmithdev/zermelo/v2/internal"
 	"golang.org/x/exp/constraints"
 	"golang.org/x/exp/slices"
 	"math/rand"
 	"runtime"
+	"sort"
 	"sync"
 	"testing"
 	"time"
@@ -16,102 +18,86 @@ const testSmallSize = compSortCutoff64
 const testMediumSize = 1024   // ~1k * 64bit = 8 KB
 const testLargeSize = 1 << 20 // ~1M * 64bit = 8 MB
 
-// []uint64
-func BenchmarkZSortUint64T(b *testing.B) {
-	sortFunc := func(x []uint64) {
-		SortIntegersBYOB[uint64](x, make([]uint64, testTinySize))
-	}
-	testIntSortBencher[uint64](b, testTinySize, sortFunc)
-}
-func BenchmarkZSorterUint64T(b *testing.B) {
-	s := newIntSorter[uint64]()
-	s.setCutoff(0)
-	testIntSortBencher[uint64](b, testTinySize, s.Sort)
-}
-func BenchmarkGoSortUint64T(b *testing.B) {
-	testIntSortBencher[uint64](b, testTinySize, slices.Sort[uint64])
+// tiny32
+func BenchmarkSortSortInt32T(b *testing.B) {
+	testSortBencher[int32](b, testTinySize, sortSort[int32])
 }
-func BenchmarkZSortUint64S(b *testing.B) {
-	testIntSortBencher[uint64](b, testSmallSize, SortIntegers[uint64])
+func BenchmarkSlicesSortInt32T(b *testing.B) {
+	testSortBencher[int32](b, testTinySize, slices.Sort[int32])
 }
-func BenchmarkZSorterUint64S(b *testing.B) {
-	testIntSortBencher[uint64](b, testSmallSize, NewIntSorter[uint64]().Sort)
+func BenchmarkZSortInt32T(b *testing.B) {
+	testSortBencher[int32](b, testTinySize, Sort[int32])
 }
-func BenchmarkGoSortUint64S(b *testing.B) {
-	testIntSortBencher[uint64](b, testSmallSize, slices.Sort[uint64])
+func BenchmarkZSorterInt32T(b *testing.B) {
+	testSortBencher[int32](b, testTinySize, newSorter[int32]().withCutoff(0).Sort)
 }
-func BenchmarkZSortUint64M(b *testing.B) {
-	testIntSortBencher[uint64](b, testMediumSize, SortIntegers[uint64])
-}
-func BenchmarkZSorterUint64M(b *testing.B) {
-	testIntSortBencher[uint64](b, testMediumSize, NewIntSorter[uint64]().Sort)
-}
-func BenchmarkGoSortUint64M(b *testing.B) {
-	testIntSortBencher[uint64](b, testMediumSize, slices.Sort[uint64])
+
+// tiny
+func BenchmarkSortSortUint64T(b *testing.B) {
+	testSortBencher[uint64](b, testTinySize, sortSort[uint64])
 }
-func BenchmarkZSortUint64L(b *testing.B) {
-	testIntSortBencher[uint64](b, testLargeSize, SortIntegers[uint64])
+func BenchmarkSlicesSortUint64T(b *testing.B) {
+	testSortBencher[uint64](b, testTinySize, slices.Sort[uint64])
 }
-func BenchmarkZSorterUint64L(b *testing.B) {
-	testIntSortBencher[uint64](b, testLargeSize, NewIntSorter[uint64]().Sort)
+func BenchmarkZSortUint64T(b *testing.B) {
+	testSortBencher[uint64](b, testTinySize, Sort[uint64])
 }
-func BenchmarkGoSortUint64L(b *testing.B) {
-	testIntSortBencher[uint64](b, testLargeSize, slices.Sort[uint64])
+func BenchmarkZSorterUint64T(b *testing.B) {
+	testSortBencher[uint64](b, testTinySize, newSorter[uint64]().withCutoff(0).Sort)
 }
 
-// []float64
-func BenchmarkZSortFloat64T(b *testing.B) {
-	sortFunc := func(x []float64) {
-		SortFloatsBYOB(x, make([]float64, testTinySize))
-	}
-	testFloatSortBencher(b, testTinySize, sortFunc)
+// small
+func BenchmarkSortSortUint64S(b *testing.B) {
+	testSortBencher[uint64](b, testSmallSize, sortSort[uint64])
 }
-func BenchmarkZSorterFloat64T(b *testing.B) {
-	s := newFloatSorter[float64]()
-	s.setCutoff(0)
-	testFloatSortBencher(b, testTinySize, s.Sort)
+func BenchmarkSlicesSortUint64S(b *testing.B) {
+	testSortBencher[uint64](b, testSmallSize, slices.Sort[uint64])
 }
-func BenchmarkGoSortFloat64T(b *testing.B) {
-	testFloatSortBencher(b, testTinySize, slices.Sort[float64])
+func BenchmarkZSortUint64S(b *testing.B) {
+	testSortBencher[uint64](b, testSmallSize, Sort[uint64])
 }
-func BenchmarkZSortFloat64S(b *testing.B) {
-	testFloatSortBencher(b, testSmallSize, SortFloats[float64])
+func BenchmarkZSorterUint64S(b *testing.B) {
+	testSortBencher[uint64](b, testSmallSize, newSorter[uint64]().withCutoff(0).Sort)
 }
-func BenchmarkZSorterFloat64S(b *testing.B) {
-	testFloatSortBencher(b, testSmallSize, NewFloatSorter[float64]().Sort)
+
+// medium
+func BenchmarkSortSortUint64M(b *testing.B) {
+	testSortBencher[uint64](b, testMediumSize, sortSort[uint64])
 }
-func BenchmarkGoSortFloat64S(b *testing.B) {
-	testFloatSortBencher(b, testSmallSize, slices.Sort[float64])
+func BenchmarkSlicesSortUint64M(b *testing.B) {
+	testSortBencher[uint64](b, testMediumSize, slices.Sort[uint64])
 }
-func BenchmarkZSortFloat64M(b *testing.B) {
-	testFloatSortBencher(b, testMediumSize, SortFloats[float64])
+func BenchmarkZSortUint64M(b *testing.B) {
+	testSortBencher[uint64](b, testMediumSize, Sort[uint64])
 }
-func BenchmarkZSorterFloat64M(b *testing.B) {
-	testFloatSortBencher(b, testMediumSize, NewFloatSorter[float64]().Sort)
+func BenchmarkZSorterUint64M(b *testing.B) {
+	testSortBencher[uint64](b, testMediumSize, newSorter[uint64]().withCutoff(0).Sort)
 }
-func BenchmarkGoSortFloat64M(b *testing.B) {
-	testFloatSortBencher(b, testMediumSize, slices.Sort[float64])
+
+// large
+func BenchmarkSortSortUint64L(b *testing.B) {
+	testSortBencher[uint64](b, testLargeSize, sortSort[uint64])
 }
-func BenchmarkZSortFloat64L(b *testing.B) {
-	testFloatSortBencher(b, testLargeSize, SortFloats[float64])
+func BenchmarkSlicesSortUint64L(b *testing.B) {
+	testSortBencher[uint64](b, testLargeSize, slices.Sort[uint64])
 }
-func BenchmarkZSorterFloat64L(b *testing.B) {
-	testFloatSortBencher(b, testLargeSize, NewFloatSorter[float64]().Sort)
+func BenchmarkZSortUint64L(b *testing.B) {
+	testSortBencher[uint64](b, testLargeSize, Sort[uint64])
 }
-func BenchmarkGoSortFloat64L(b *testing.B) {
-	testFloatSortBencher(b, testLargeSize, slices.Sort[float64])
+func BenchmarkZSorterUint64L(b *testing.B) {
+	testSortBencher[uint64](b, testLargeSize, newSorter[uint64]().withCutoff(0).Sort)
 }
 
 func sortedTestData[T constraints.Integer](size int) func(int) [][]T {
 	return func(n int) [][]T {
-		result := testDataFromRng[T](randInteger[T](), size)(n)
+		result := testDataFromRng[T](internal.RandInteger[T](), size)(n)
 		var wg sync.WaitGroup
 		cpus := runtime.NumCPU()
 		for cpu := 0; cpu < cpus; cpu++ {
 			wg.Add(1)
 			go func(c int) {
 				defer wg.Done()
-				presorter := NewIntSorter[T]()
+				presorter := NewSorter[T]()
 				for i := c; i < len(result); i += cpus {
 					presorter.Sort(result[i])
 				}
@@ -123,42 +109,38 @@ func sortedTestData[T constraints.Integer](size int) func(int) [][]T {
 }
 
 // presorted
+func BenchmarkSortSortSorted(b *testing.B) {
+	testBencher[uint64](b, sortSort[uint64], sortedTestData[uint64](testSmallSize))
+}
+func BenchmarkSlicesSortSorted(b *testing.B) {
+	testBencher[uint64](b, slices.Sort[uint64], sortedTestData[uint64](testSmallSize))
+}
 func BenchmarkZSortSorted(b *testing.B) {
-	testBencher[uint64](b, SortIntegers[uint64], sortedTestData[uint64](testSmallSize))
+	testBencher[uint64](b, Sort[uint64], sortedTestData[uint64](testSmallSize))
 }
 func BenchmarkZSorterSorted(b *testing.B) {
-	testBencher[uint64](b, NewIntSorter[uint64]().Sort, sortedTestData[uint64](testSmallSize))
+	testBencher[uint64](b, NewSorter[uint64]().Sort, sortedTestData[uint64](testSmallSize))
 }
-func BenchmarkGoSortSorted(b *testing.B) {
-	testBencher[uint64](b, slices.Sort[uint64], sortedTestData[uint64](testSmallSize))
-}
-
-type sorter[T any] func([]T)
 
 func testDataFromRng[T any](rng func() T, size int) func(int) [][]T {
 	return func(n int) [][]T {
 		result := make([][]T, n)
 		for i := 0; i < n; i++ {
 			result[i] = make([]T, size)
-			fillSlice(result[i], rng)
+			internal.FillSlice(result[i], rng)
 		}
 		return result
 	}
 }
 
-func testIntSortBencher[T constraints.Integer](b *testing.B, size int, s sorter[T]) {
+func testSortBencher[T constraints.Integer](b *testing.B, size int, s func([]T)) {
 	rand.Seed(time.Now().UnixNano())
-	rng := randInteger[T]()
+	rng := internal.RandInteger[T]()
 	testBencher(b, s, testDataFromRng[T](rng, size))
 }
 
-func testFloatSortBencher(b *testing.B, size int, s sorter[float64]) {
-	rand.Seed(time.Now().UnixNano())
-	testBencher(b, s, testDataFromRng[float64](randFloat64(false), size))
-}
-
 // for bench b, tests s by copying rnd to x and sorting x repeatedly
-func testBencher[T constraints.Ordered](b *testing.B, s sorter[T], getTestData func(n int) [][]T) {
+func testBencher[T constraints.Ordered](b *testing.B, s func([]T), getTestData func(n int) [][]T) {
 	b.StopTimer()
 	rnd := getTestData(b.N)
 	b.ResetTimer()
@@ -167,3 +149,13 @@ func testBencher[T constraints.Ordered](b *testing.B, s sorter[T], getTestData f
 		s(rnd[i])
 	}
 }
+
+type sortable[I constraints.Integer] []I
+
+func (s sortable[I]) Len() int           { return len(s) }
+func (s sortable[I]) Less(i, j int) bool { return s[i] < s[j] }
+func (s sortable[I]) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+
+func sortSort[I constraints.Integer](x []I) {
+	sort.Sort(sortable[I](x))
+}
diff --git a/floats/README.md b/floats/README.md
@@ -0,0 +1,25 @@
+zermelo/floats
+==============
+This subpackage handles sorting float slices. 
+
+Example
+-------
+
+```go
+package main
+
+import (
+	"github.com/shawnsmithdev/zermelo/floats"
+	"something"
+)
+
+func main() {
+	var x []float64
+	x = something.GetFloatData()
+	floats.SortFloats(x)
+}
+```
+
+Sorter Example
+--------------
+todo
diff --git a/floats/float_sorter.go b/floats/float_sorter.go
@@ -0,0 +1,63 @@
+package floats
+
+import (
+	"github.com/shawnsmithdev/zermelo/v2"
+	"golang.org/x/exp/constraints"
+	"golang.org/x/exp/slices"
+)
+
+// cutoffSorter is a Sorter with adjustable comparison sort cutoff, for testing.
+type cutoffSorter[F constraints.Float] interface {
+	zermelo.Sorter[F]
+	withCutoff(int) cutoffSorter[F]
+}
+
+type floatSorter[F constraints.Float, U constraints.Unsigned] struct {
+	uintSorter     zermelo.Sorter[U]
+	compSortCutoff int
+	topBit         U
+}
+
+func (s *floatSorter[F, U]) Sort(x []F) {
+	x = sortNaNs(x)
+	if len(x) < 2 {
+		return
+	}
+	if len(x) < s.compSortCutoff {
+		slices.Sort(x)
+		return
+	}
+
+	y := unsafeSliceConvert[F, U](x)
+	floatFlip[U](y, s.topBit)
+	s.uintSorter.Sort(y)
+	floatUnflip[U](y, s.topBit)
+}
+
+func (s *floatSorter[F, U]) withCutoff(cutoff int) cutoffSorter[F] {
+	s.compSortCutoff = cutoff
+	return s
+}
+
+// NewFloatSorter creates a new Sorter for float slices that will use radix sort on large slices and reuses buffers.
+// The first sort creates a buffer the same size as the slice being sorted and keeps it for future use.
+// Later sorts may grow this buffer as needed. The FloatSorter returned is not thread safe.
+// Using this sorter can be much faster than repeat calls to SortFloats.
+func NewFloatSorter[F constraints.Float]() zermelo.Sorter[F] {
+	return newFloatSorter[F]()
+}
+
+func newFloatSorter[F constraints.Float]() cutoffSorter[F] {
+	if isFloat32[F]() {
+		return &floatSorter[F, uint32]{
+			uintSorter:     zermelo.NewSorter[uint32](),
+			compSortCutoff: compSortCutoffFloat32,
+			topBit:         uint32(1) << 31,
+		}
+	}
+	return &floatSorter[F, uint64]{
+		uintSorter:     zermelo.NewSorter[uint64](),
+		compSortCutoff: compSortCutoffFloat64,
+		topBit:         uint64(1) << 63,
+	}
+}