summaryrefslogtreecommitdiff
path: root/src/sort/sort.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/sort/sort.go')
-rw-r--r--src/sort/sort.go474
1 files changed, 474 insertions, 0 deletions
diff --git a/src/sort/sort.go b/src/sort/sort.go
new file mode 100644
index 000000000..e980c295c
--- /dev/null
+++ b/src/sort/sort.go
@@ -0,0 +1,474 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package sort provides primitives for sorting slices and user-defined
+// collections.
+package sort
+
+// A type, typically a collection, that satisfies sort.Interface can be
+// sorted by the routines in this package. The methods require that the
+// elements of the collection be enumerated by an integer index.
+type Interface interface {
+ // Len is the number of elements in the collection.
+ Len() int
+ // Less reports whether the element with
+ // index i should sort before the element with index j.
+ Less(i, j int) bool
+ // Swap swaps the elements with indexes i and j.
+ Swap(i, j int)
+}
+
+func min(a, b int) int {
+ if a < b {
+ return a
+ }
+ return b
+}
+
+// Insertion sort
+func insertionSort(data Interface, a, b int) {
+ for i := a + 1; i < b; i++ {
+ for j := i; j > a && data.Less(j, j-1); j-- {
+ data.Swap(j, j-1)
+ }
+ }
+}
+
+// siftDown implements the heap property on data[lo, hi).
+// first is an offset into the array where the root of the heap lies.
+func siftDown(data Interface, lo, hi, first int) {
+ root := lo
+ for {
+ child := 2*root + 1
+ if child >= hi {
+ break
+ }
+ if child+1 < hi && data.Less(first+child, first+child+1) {
+ child++
+ }
+ if !data.Less(first+root, first+child) {
+ return
+ }
+ data.Swap(first+root, first+child)
+ root = child
+ }
+}
+
+func heapSort(data Interface, a, b int) {
+ first := a
+ lo := 0
+ hi := b - a
+
+ // Build heap with greatest element at top.
+ for i := (hi - 1) / 2; i >= 0; i-- {
+ siftDown(data, i, hi, first)
+ }
+
+ // Pop elements, largest first, into end of data.
+ for i := hi - 1; i >= 0; i-- {
+ data.Swap(first, first+i)
+ siftDown(data, lo, i, first)
+ }
+}
+
+// Quicksort, following Bentley and McIlroy,
+// ``Engineering a Sort Function,'' SP&E November 1993.
+
+// medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a].
+func medianOfThree(data Interface, a, b, c int) {
+ m0 := b
+ m1 := a
+ m2 := c
+ // bubble sort on 3 elements
+ if data.Less(m1, m0) {
+ data.Swap(m1, m0)
+ }
+ if data.Less(m2, m1) {
+ data.Swap(m2, m1)
+ }
+ if data.Less(m1, m0) {
+ data.Swap(m1, m0)
+ }
+ // now data[m0] <= data[m1] <= data[m2]
+}
+
+func swapRange(data Interface, a, b, n int) {
+ for i := 0; i < n; i++ {
+ data.Swap(a+i, b+i)
+ }
+}
+
+func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
+ m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
+ if hi-lo > 40 {
+ // Tukey's ``Ninther,'' median of three medians of three.
+ s := (hi - lo) / 8
+ medianOfThree(data, lo, lo+s, lo+2*s)
+ medianOfThree(data, m, m-s, m+s)
+ medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
+ }
+ medianOfThree(data, lo, m, hi-1)
+
+ // Invariants are:
+ // data[lo] = pivot (set up by ChoosePivot)
+ // data[lo <= i < a] = pivot
+ // data[a <= i < b] < pivot
+ // data[b <= i < c] is unexamined
+ // data[c <= i < d] > pivot
+ // data[d <= i < hi] = pivot
+ //
+ // Once b meets c, can swap the "= pivot" sections
+ // into the middle of the slice.
+ pivot := lo
+ a, b, c, d := lo+1, lo+1, hi, hi
+ for {
+ for b < c {
+ if data.Less(b, pivot) { // data[b] < pivot
+ b++
+ } else if !data.Less(pivot, b) { // data[b] = pivot
+ data.Swap(a, b)
+ a++
+ b++
+ } else {
+ break
+ }
+ }
+ for b < c {
+ if data.Less(pivot, c-1) { // data[c-1] > pivot
+ c--
+ } else if !data.Less(c-1, pivot) { // data[c-1] = pivot
+ data.Swap(c-1, d-1)
+ c--
+ d--
+ } else {
+ break
+ }
+ }
+ if b >= c {
+ break
+ }
+ // data[b] > pivot; data[c-1] < pivot
+ data.Swap(b, c-1)
+ b++
+ c--
+ }
+
+ n := min(b-a, a-lo)
+ swapRange(data, lo, b-n, n)
+
+ n = min(hi-d, d-c)
+ swapRange(data, c, hi-n, n)
+
+ return lo + b - a, hi - (d - c)
+}
+
+func quickSort(data Interface, a, b, maxDepth int) {
+ for b-a > 7 {
+ if maxDepth == 0 {
+ heapSort(data, a, b)
+ return
+ }
+ maxDepth--
+ mlo, mhi := doPivot(data, a, b)
+ // Avoiding recursion on the larger subproblem guarantees
+ // a stack depth of at most lg(b-a).
+ if mlo-a < b-mhi {
+ quickSort(data, a, mlo, maxDepth)
+ a = mhi // i.e., quickSort(data, mhi, b)
+ } else {
+ quickSort(data, mhi, b, maxDepth)
+ b = mlo // i.e., quickSort(data, a, mlo)
+ }
+ }
+ if b-a > 1 {
+ insertionSort(data, a, b)
+ }
+}
+
+// Sort sorts data.
+// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
+// data.Less and data.Swap. The sort is not guaranteed to be stable.
+func Sort(data Interface) {
+ // Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
+ n := data.Len()
+ maxDepth := 0
+ for i := n; i > 0; i >>= 1 {
+ maxDepth++
+ }
+ maxDepth *= 2
+ quickSort(data, 0, n, maxDepth)
+}
+
+type reverse struct {
+ // This embedded Interface permits Reverse to use the methods of
+ // another Interface implementation.
+ Interface
+}
+
+// Less returns the opposite of the embedded implementation's Less method.
+func (r reverse) Less(i, j int) bool {
+ return r.Interface.Less(j, i)
+}
+
+// Reverse returns the reverse order for data.
+func Reverse(data Interface) Interface {
+ return &reverse{data}
+}
+
+// IsSorted reports whether data is sorted.
+func IsSorted(data Interface) bool {
+ n := data.Len()
+ for i := n - 1; i > 0; i-- {
+ if data.Less(i, i-1) {
+ return false
+ }
+ }
+ return true
+}
+
+// Convenience types for common cases
+
+// IntSlice attaches the methods of Interface to []int, sorting in increasing order.
+type IntSlice []int
+
+func (p IntSlice) Len() int { return len(p) }
+func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
+func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+// Sort is a convenience method.
+func (p IntSlice) Sort() { Sort(p) }
+
+// Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
+type Float64Slice []float64
+
+func (p Float64Slice) Len() int { return len(p) }
+func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
+func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+// isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
+func isNaN(f float64) bool {
+ return f != f
+}
+
+// Sort is a convenience method.
+func (p Float64Slice) Sort() { Sort(p) }
+
+// StringSlice attaches the methods of Interface to []string, sorting in increasing order.
+type StringSlice []string
+
+func (p StringSlice) Len() int { return len(p) }
+func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
+func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+// Sort is a convenience method.
+func (p StringSlice) Sort() { Sort(p) }
+
+// Convenience wrappers for common cases
+
+// Ints sorts a slice of ints in increasing order.
+func Ints(a []int) { Sort(IntSlice(a)) }
+
+// Float64s sorts a slice of float64s in increasing order.
+func Float64s(a []float64) { Sort(Float64Slice(a)) }
+
+// Strings sorts a slice of strings in increasing order.
+func Strings(a []string) { Sort(StringSlice(a)) }
+
+// IntsAreSorted tests whether a slice of ints is sorted in increasing order.
+func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
+
+// Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
+func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
+
+// StringsAreSorted tests whether a slice of strings is sorted in increasing order.
+func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
+
+// Notes on stable sorting:
+// The used algorithms are simple and provable correct on all input and use
+// only logarithmic additional stack space. They perform well if compared
+// experimentally to other stable in-place sorting algorithms.
+//
+// Remarks on other algorithms evaluated:
+// - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
+// Not faster.
+// - GCC's __rotate for block rotations: Not faster.
+// - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen
+// and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
+// The given algorithms are in-place, number of Swap and Assignments
+// grow as n log n but the algorithm is not stable.
+// - "Fast Stable In-Plcae Sorting with O(n) Data Moves" J.I. Munro and
+// V. Raman in Algorithmica (1996) 16, 115-160:
+// This algorithm either needs additional 2n bits or works only if there
+// are enough different elements available to encode some permutations
+// which have to be undone later (so not stable an any input).
+// - All the optimal in-place sorting/merging algorithms I found are either
+// unstable or rely on enough different elements in each step to encode the
+// performed block rearrangements. See also "In-Place Merging Algorithms",
+// Denham Coates-Evely, Department of Computer Science, Kings College,
+// January 2004 and the reverences in there.
+// - Often "optimal" algorithms are optimal in the number of assignments
+// but Interface has only Swap as operation.
+
+// Stable sorts data while keeping the original order of equal elements.
+//
+// It makes one call to data.Len to determine n, O(n*log(n)) calls to
+// data.Less and O(n*log(n)*log(n)) calls to data.Swap.
+func Stable(data Interface) {
+ n := data.Len()
+ blockSize := 20
+ a, b := 0, blockSize
+ for b <= n {
+ insertionSort(data, a, b)
+ a = b
+ b += blockSize
+ }
+ insertionSort(data, a, n)
+
+ for blockSize < n {
+ a, b = 0, 2*blockSize
+ for b <= n {
+ symMerge(data, a, a+blockSize, b)
+ a = b
+ b += 2 * blockSize
+ }
+ symMerge(data, a, a+blockSize, n)
+ blockSize *= 2
+ }
+}
+
+// SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
+// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
+// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
+// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
+// Computer Science, pages 714-723. Springer, 2004.
+//
+// Let M = m-a and N = b-n. Wolog M < N.
+// The recursion depth is bound by ceil(log(N+M)).
+// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
+// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
+//
+// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
+// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
+// in the paper carries through for Swap operations, especially as the block
+// swapping rotate uses only O(M+N) Swaps.
+func symMerge(data Interface, a, m, b int) {
+ if a >= m || m >= b {
+ return
+ }
+
+ mid := a + (b-a)/2
+ n := mid + m
+ start := 0
+ if m > mid {
+ start = n - b
+ r, p := mid, n-1
+ for start < r {
+ c := start + (r-start)/2
+ if !data.Less(p-c, c) {
+ start = c + 1
+ } else {
+ r = c
+ }
+ }
+ } else {
+ start = a
+ r, p := m, n-1
+ for start < r {
+ c := start + (r-start)/2
+ if !data.Less(p-c, c) {
+ start = c + 1
+ } else {
+ r = c
+ }
+ }
+ }
+ end := n - start
+ rotate(data, start, m, end)
+ symMerge(data, a, start, mid)
+ symMerge(data, mid, end, b)
+}
+
+// Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
+// Data of the form 'x u v y' is changed to 'x v u y'.
+// Rotate performs at most b-a many calls to data.Swap.
+func rotate(data Interface, a, m, b int) {
+ i := m - a
+ if i == 0 {
+ return
+ }
+ j := b - m
+ if j == 0 {
+ return
+ }
+
+ if i == j {
+ swapRange(data, a, m, i)
+ return
+ }
+
+ p := a + i
+ for i != j {
+ if i > j {
+ swapRange(data, p-i, p, j)
+ i -= j
+ } else {
+ swapRange(data, p-i, p+j-i, i)
+ j -= i
+ }
+ }
+ swapRange(data, p-i, p, i)
+}
+
+/*
+Complexity of Stable Sorting
+
+
+Complexity of block swapping rotation
+
+Each Swap puts one new element into its correct, final position.
+Elements which reach their final position are no longer moved.
+Thus block swapping rotation needs |u|+|v| calls to Swaps.
+This is best possible as each element might need a move.
+
+Pay attention when comparing to other optimal algorithms which
+typically count the number of assignments instead of swaps:
+E.g. the optimal algorithm of Dudzinski and Dydek for in-place
+rotations uses O(u + v + gcd(u,v)) assignments which is
+better than our O(3 * (u+v)) as gcd(u,v) <= u.
+
+
+Stable sorting by SymMerge and BlockSwap rotations
+
+SymMerg complexity for same size input M = N:
+Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N)
+Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
+
+(The following argument does not fuzz over a missing -1 or
+other stuff which does not impact the final result).
+
+Let n = data.Len(). Assume n = 2^k.
+
+Plain merge sort performs log(n) = k iterations.
+On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
+
+Thus iteration i of merge sort performs:
+Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
+Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
+
+In total k = log(n) iterations are performed; so in total:
+Calls to Less O(log(n) * n)
+Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
+ = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
+
+
+Above results should generalize to arbitrary n = 2^k + p
+and should not be influenced by the initial insertion sort phase:
+Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
+size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort.
+Merge sort iterations start at i = log(bs). With t = log(bs) constant:
+Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
+ = O(n * log(n))
+Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
+
+*/