Skip to content

Commit

Permalink
Version 2 Initial Commit
Browse files Browse the repository at this point in the history
This commit drops all the non-generic, reflection-based code, including all previous subpackages.
It also renames several functions, and moves the float code into a new subpackage.
This represents a large change to the API and thus is a new major version.
  • Loading branch information
shawnsmithdev committed May 23, 2022
1 parent e5b8c98 commit d9207db
Show file tree
Hide file tree
Showing 32 changed files with 804 additions and 1,284 deletions.
35 changes: 25 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
zermelo
zermelo v2
=========
[![go.dev reference](https://img.shields.io/badge/go.dev-reference-007d9c?logo=go&logoColor=white&style=flat-square)](https://pkg.go.dev/github.com/shawnsmithdev/zermelo)
[![license](http://img.shields.io/badge/license-MIT-red.svg?style=flat)](https://raw.githubusercontent.com/shawnsmithdev/zermelo/master/LICENSE)
[![Go Report Card](https://goreportcard.com/badge/github.com/shawnsmithdev/zermelo)](https://goreportcard.com/report/github.com/shawnsmithdev/zermelo)

A radix sorting library for Go. Trade memory for speed!
A radix sorting library for Go. Trade memory for speed! Now with more generics!

```go
import "github.com/shawnsmithdev/zermelo"
import "github.com/shawnsmithdev/zermelo/v2"

func foo(large []uint64)
zermelo.SortIntegers(large)
Expand Down Expand Up @@ -35,29 +35,44 @@ Zermelo is named after [Ernst Zermelo](http://en.wikipedia.org/wiki/Ernst_Zermel

Supported Types
===============
`SortIntegers` and `IntSorter` support constraints.Integer slices,
`Sort` and `NewSorter` support constraints.Integer slices,
that is `[]int`, `[]uint64`, `[]byte`, etc, and derived types.

`SortFloats` and `FloatSorter` support constraints.Float slices,
specifically `[]float32` and `[]float64` and derived types.

Sorter
======

An `IntSorter` or `FloatSorter` will reuse buffers created during `Sort()` calls. This is not thread safe.
A `Sorter` returned by `NewSorter` will reuse buffers created during `Sort()` calls. This is not thread safe.
Buffers are grown as needed at a 25% exponential growth rate. This means if you sort a slice of size `n`,
subsequent calls with slices up to `n * 1.25` in length will not cause another buffer allocation. This does not apply
to the first allocation, which will make a buffer of the same size as the requested slice. This way, if the slices being
sorted do not grow in size, there is no unused buffer space.

```go
import "github.com/shawnsmithdev/zermelo"
import "github.com/shawnsmithdev/zermelo/v2"

func foo(bar [][]uint64) {
sorter := zermelo.NewIntSorter[uint64]()
sorter := zermelo.NewSorter[uint64]()
for _, x := range(bar) {
sorter.Sort(x)
}
}

```
```

Float Subpackage
================
`SortFloats` and `FloatSorter` provied in the `floats` subpackage support constraints.Float slices,
specifically `[]float32` and `[]float64` and derived types.
This uses the unsafe package to treat floats as though they were unsigned integers.

```go
import "github.com/shawnsmithdev/zermelo/v2/floats"

func foo(bar [][]floats64) {
sorter := floats.NewFloatSorter[float64]()
for _, x := range(bar) {
sorter.Sort(x)
}
}
```
156 changes: 74 additions & 82 deletions bench_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package zermelo

import (
"github.com/shawnsmithdev/zermelo/v2/internal"
"golang.org/x/exp/constraints"
"golang.org/x/exp/slices"
"math/rand"
"runtime"
"sort"
"sync"
"testing"
"time"
Expand All @@ -16,102 +18,86 @@ const testSmallSize = compSortCutoff64
const testMediumSize = 1024 // ~1k * 64bit = 8 KB
const testLargeSize = 1 << 20 // ~1M * 64bit = 8 MB

// []uint64
func BenchmarkZSortUint64T(b *testing.B) {
sortFunc := func(x []uint64) {
SortIntegersBYOB[uint64](x, make([]uint64, testTinySize))
}
testIntSortBencher[uint64](b, testTinySize, sortFunc)
}
func BenchmarkZSorterUint64T(b *testing.B) {
s := newIntSorter[uint64]()
s.setCutoff(0)
testIntSortBencher[uint64](b, testTinySize, s.Sort)
}
func BenchmarkGoSortUint64T(b *testing.B) {
testIntSortBencher[uint64](b, testTinySize, slices.Sort[uint64])
// tiny32
func BenchmarkSortSortInt32T(b *testing.B) {
testSortBencher[int32](b, testTinySize, sortSort[int32])
}
func BenchmarkZSortUint64S(b *testing.B) {
testIntSortBencher[uint64](b, testSmallSize, SortIntegers[uint64])
func BenchmarkSlicesSortInt32T(b *testing.B) {
testSortBencher[int32](b, testTinySize, slices.Sort[int32])
}
func BenchmarkZSorterUint64S(b *testing.B) {
testIntSortBencher[uint64](b, testSmallSize, NewIntSorter[uint64]().Sort)
func BenchmarkZSortInt32T(b *testing.B) {
testSortBencher[int32](b, testTinySize, Sort[int32])
}
func BenchmarkGoSortUint64S(b *testing.B) {
testIntSortBencher[uint64](b, testSmallSize, slices.Sort[uint64])
func BenchmarkZSorterInt32T(b *testing.B) {
testSortBencher[int32](b, testTinySize, newSorter[int32]().withCutoff(0).Sort)
}
func BenchmarkZSortUint64M(b *testing.B) {
testIntSortBencher[uint64](b, testMediumSize, SortIntegers[uint64])
}
func BenchmarkZSorterUint64M(b *testing.B) {
testIntSortBencher[uint64](b, testMediumSize, NewIntSorter[uint64]().Sort)
}
func BenchmarkGoSortUint64M(b *testing.B) {
testIntSortBencher[uint64](b, testMediumSize, slices.Sort[uint64])

// tiny
func BenchmarkSortSortUint64T(b *testing.B) {
testSortBencher[uint64](b, testTinySize, sortSort[uint64])
}
func BenchmarkZSortUint64L(b *testing.B) {
testIntSortBencher[uint64](b, testLargeSize, SortIntegers[uint64])
func BenchmarkSlicesSortUint64T(b *testing.B) {
testSortBencher[uint64](b, testTinySize, slices.Sort[uint64])
}
func BenchmarkZSorterUint64L(b *testing.B) {
testIntSortBencher[uint64](b, testLargeSize, NewIntSorter[uint64]().Sort)
func BenchmarkZSortUint64T(b *testing.B) {
testSortBencher[uint64](b, testTinySize, Sort[uint64])
}
func BenchmarkGoSortUint64L(b *testing.B) {
testIntSortBencher[uint64](b, testLargeSize, slices.Sort[uint64])
func BenchmarkZSorterUint64T(b *testing.B) {
testSortBencher[uint64](b, testTinySize, newSorter[uint64]().withCutoff(0).Sort)
}

// []float64
func BenchmarkZSortFloat64T(b *testing.B) {
sortFunc := func(x []float64) {
SortFloatsBYOB(x, make([]float64, testTinySize))
}
testFloatSortBencher(b, testTinySize, sortFunc)
// small
func BenchmarkSortSortUint64S(b *testing.B) {
testSortBencher[uint64](b, testSmallSize, sortSort[uint64])
}
func BenchmarkZSorterFloat64T(b *testing.B) {
s := newFloatSorter[float64]()
s.setCutoff(0)
testFloatSortBencher(b, testTinySize, s.Sort)
func BenchmarkSlicesSortUint64S(b *testing.B) {
testSortBencher[uint64](b, testSmallSize, slices.Sort[uint64])
}
func BenchmarkGoSortFloat64T(b *testing.B) {
testFloatSortBencher(b, testTinySize, slices.Sort[float64])
func BenchmarkZSortUint64S(b *testing.B) {
testSortBencher[uint64](b, testSmallSize, Sort[uint64])
}
func BenchmarkZSortFloat64S(b *testing.B) {
testFloatSortBencher(b, testSmallSize, SortFloats[float64])
func BenchmarkZSorterUint64S(b *testing.B) {
testSortBencher[uint64](b, testSmallSize, newSorter[uint64]().withCutoff(0).Sort)
}
func BenchmarkZSorterFloat64S(b *testing.B) {
testFloatSortBencher(b, testSmallSize, NewFloatSorter[float64]().Sort)

// medium
func BenchmarkSortSortUint64M(b *testing.B) {
testSortBencher[uint64](b, testMediumSize, sortSort[uint64])
}
func BenchmarkGoSortFloat64S(b *testing.B) {
testFloatSortBencher(b, testSmallSize, slices.Sort[float64])
func BenchmarkSlicesSortUint64M(b *testing.B) {
testSortBencher[uint64](b, testMediumSize, slices.Sort[uint64])
}
func BenchmarkZSortFloat64M(b *testing.B) {
testFloatSortBencher(b, testMediumSize, SortFloats[float64])
func BenchmarkZSortUint64M(b *testing.B) {
testSortBencher[uint64](b, testMediumSize, Sort[uint64])
}
func BenchmarkZSorterFloat64M(b *testing.B) {
testFloatSortBencher(b, testMediumSize, NewFloatSorter[float64]().Sort)
func BenchmarkZSorterUint64M(b *testing.B) {
testSortBencher[uint64](b, testMediumSize, newSorter[uint64]().withCutoff(0).Sort)
}
func BenchmarkGoSortFloat64M(b *testing.B) {
testFloatSortBencher(b, testMediumSize, slices.Sort[float64])

// large
func BenchmarkSortSortUint64L(b *testing.B) {
testSortBencher[uint64](b, testLargeSize, sortSort[uint64])
}
func BenchmarkZSortFloat64L(b *testing.B) {
testFloatSortBencher(b, testLargeSize, SortFloats[float64])
func BenchmarkSlicesSortUint64L(b *testing.B) {
testSortBencher[uint64](b, testLargeSize, slices.Sort[uint64])
}
func BenchmarkZSorterFloat64L(b *testing.B) {
testFloatSortBencher(b, testLargeSize, NewFloatSorter[float64]().Sort)
func BenchmarkZSortUint64L(b *testing.B) {
testSortBencher[uint64](b, testLargeSize, Sort[uint64])
}
func BenchmarkGoSortFloat64L(b *testing.B) {
testFloatSortBencher(b, testLargeSize, slices.Sort[float64])
func BenchmarkZSorterUint64L(b *testing.B) {
testSortBencher[uint64](b, testLargeSize, newSorter[uint64]().withCutoff(0).Sort)
}

func sortedTestData[T constraints.Integer](size int) func(int) [][]T {
return func(n int) [][]T {
result := testDataFromRng[T](randInteger[T](), size)(n)
result := testDataFromRng[T](internal.RandInteger[T](), size)(n)
var wg sync.WaitGroup
cpus := runtime.NumCPU()
for cpu := 0; cpu < cpus; cpu++ {
wg.Add(1)
go func(c int) {
defer wg.Done()
presorter := NewIntSorter[T]()
presorter := NewSorter[T]()
for i := c; i < len(result); i += cpus {
presorter.Sort(result[i])
}
Expand All @@ -123,42 +109,38 @@ func sortedTestData[T constraints.Integer](size int) func(int) [][]T {
}

// presorted
func BenchmarkSortSortSorted(b *testing.B) {
testBencher[uint64](b, sortSort[uint64], sortedTestData[uint64](testSmallSize))
}
func BenchmarkSlicesSortSorted(b *testing.B) {
testBencher[uint64](b, slices.Sort[uint64], sortedTestData[uint64](testSmallSize))
}
func BenchmarkZSortSorted(b *testing.B) {
testBencher[uint64](b, SortIntegers[uint64], sortedTestData[uint64](testSmallSize))
testBencher[uint64](b, Sort[uint64], sortedTestData[uint64](testSmallSize))
}
func BenchmarkZSorterSorted(b *testing.B) {
testBencher[uint64](b, NewIntSorter[uint64]().Sort, sortedTestData[uint64](testSmallSize))
testBencher[uint64](b, NewSorter[uint64]().Sort, sortedTestData[uint64](testSmallSize))
}
func BenchmarkGoSortSorted(b *testing.B) {
testBencher[uint64](b, slices.Sort[uint64], sortedTestData[uint64](testSmallSize))
}

type sorter[T any] func([]T)

func testDataFromRng[T any](rng func() T, size int) func(int) [][]T {
return func(n int) [][]T {
result := make([][]T, n)
for i := 0; i < n; i++ {
result[i] = make([]T, size)
fillSlice(result[i], rng)
internal.FillSlice(result[i], rng)
}
return result
}
}

func testIntSortBencher[T constraints.Integer](b *testing.B, size int, s sorter[T]) {
func testSortBencher[T constraints.Integer](b *testing.B, size int, s func([]T)) {
rand.Seed(time.Now().UnixNano())
rng := randInteger[T]()
rng := internal.RandInteger[T]()
testBencher(b, s, testDataFromRng[T](rng, size))
}

func testFloatSortBencher(b *testing.B, size int, s sorter[float64]) {
rand.Seed(time.Now().UnixNano())
testBencher(b, s, testDataFromRng[float64](randFloat64(false), size))
}

// for bench b, tests s by copying rnd to x and sorting x repeatedly
func testBencher[T constraints.Ordered](b *testing.B, s sorter[T], getTestData func(n int) [][]T) {
func testBencher[T constraints.Ordered](b *testing.B, s func([]T), getTestData func(n int) [][]T) {
b.StopTimer()
rnd := getTestData(b.N)
b.ResetTimer()
Expand All @@ -167,3 +149,13 @@ func testBencher[T constraints.Ordered](b *testing.B, s sorter[T], getTestData f
s(rnd[i])
}
}

type sortable[I constraints.Integer] []I

func (s sortable[I]) Len() int { return len(s) }
func (s sortable[I]) Less(i, j int) bool { return s[i] < s[j] }
func (s sortable[I]) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

func sortSort[I constraints.Integer](x []I) {
sort.Sort(sortable[I](x))
}
25 changes: 25 additions & 0 deletions floats/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
zermelo/floats
==============
This subpackage handles sorting float slices.

Example
-------

```go
package main

import (
"github.com/shawnsmithdev/zermelo/floats"
"something"
)

func main() {
var x []float64
x = something.GetFloatData()
floats.SortFloats(x)
}
```

Sorter Example
--------------
todo
63 changes: 63 additions & 0 deletions floats/float_sorter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package floats

import (
"github.com/shawnsmithdev/zermelo/v2"
"golang.org/x/exp/constraints"
"golang.org/x/exp/slices"
)

// cutoffSorter is a Sorter with adjustable comparison sort cutoff, for testing.
type cutoffSorter[F constraints.Float] interface {
zermelo.Sorter[F]
withCutoff(int) cutoffSorter[F]
}

type floatSorter[F constraints.Float, U constraints.Unsigned] struct {
uintSorter zermelo.Sorter[U]
compSortCutoff int
topBit U
}

func (s *floatSorter[F, U]) Sort(x []F) {
x = sortNaNs(x)
if len(x) < 2 {
return
}
if len(x) < s.compSortCutoff {
slices.Sort(x)
return
}

y := unsafeSliceConvert[F, U](x)
floatFlip[U](y, s.topBit)
s.uintSorter.Sort(y)
floatUnflip[U](y, s.topBit)
}

func (s *floatSorter[F, U]) withCutoff(cutoff int) cutoffSorter[F] {
s.compSortCutoff = cutoff
return s
}

// NewFloatSorter creates a new Sorter for float slices that will use radix sort on large slices and reuses buffers.
// The first sort creates a buffer the same size as the slice being sorted and keeps it for future use.
// Later sorts may grow this buffer as needed. The FloatSorter returned is not thread safe.
// Using this sorter can be much faster than repeat calls to SortFloats.
func NewFloatSorter[F constraints.Float]() zermelo.Sorter[F] {
return newFloatSorter[F]()
}

func newFloatSorter[F constraints.Float]() cutoffSorter[F] {
if isFloat32[F]() {
return &floatSorter[F, uint32]{
uintSorter: zermelo.NewSorter[uint32](),
compSortCutoff: compSortCutoffFloat32,
topBit: uint32(1) << 31,
}
}
return &floatSorter[F, uint64]{
uintSorter: zermelo.NewSorter[uint64](),
compSortCutoff: compSortCutoffFloat64,
topBit: uint64(1) << 63,
}
}
Loading

0 comments on commit d9207db

Please sign in to comment.