Skip to content

Commit

Permalink
pkg/list: reimplement sort
Browse files Browse the repository at this point in the history
The reimplemenation replaces uses the high-level
API usage with using the low-level, making use
of the fact that each Vertex used for comparison uses the
same outline and the same conjuncts for "less".
This significantly reduces the number of allocations
and other redundant work.

Before: Benchmark/sort.txtar-12   5  202090158 ns/op
After:  Benchmark/sort.txtar-12  28   43326277 ns/op

Which is about an 80% reduction in running time.

It is possible to reduce runtime further if the type
indicated for x and y are idempotent. There are several
strategies to analyze this. A TODO has been added for this.

This change achieves the lion share of possible performance
improvements for Sort. We leave it open, though, to verify the
result with the various stakeholders.

Issue #745

Signed-off-by: Marcel van Lohuizen <mpvl@gmail.com>
Change-Id: Idd6ac1925f5dd786313ea450218d4d17eb590581
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/549087
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Reviewed-by: Roger Peppe <rogpeppe@gmail.com>
Unity-Result: CUEcueckoo <cueckoo@cuelang.org>
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
  • Loading branch information
mpvl committed Feb 1, 2023
1 parent 5d0a6f2 commit 2e70ac7
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 9 deletions.
105 changes: 96 additions & 9 deletions pkg/list/sort.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,20 @@ import (
"sort"

"cuelang.org/go/cue"
"cuelang.org/go/internal/core/adt"
"cuelang.org/go/internal/types"
)

// valueSorter defines a sort.Interface; implemented in cue/builtinutil.go.
type valueSorter struct {
ctx *adt.OpContext
a []cue.Value
cmp cue.Value
err error

cmp *adt.Vertex
less *adt.Vertex
x *adt.Vertex
y *adt.Vertex
}

func (s *valueSorter) ret() ([]cue.Value, error) {
Expand All @@ -42,16 +49,89 @@ func (s *valueSorter) ret() ([]cue.Value, error) {
func (s *valueSorter) Len() int { return len(s.a) }
func (s *valueSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
func (s *valueSorter) Less(i, j int) bool {
v := s.cmp.Fill(s.a[i], "x")
v = v.Fill(s.a[j], "y")
isLess, err := v.Lookup("less").Bool()
if err != nil && s.err == nil {
s.err = err
if s.err != nil {
return false
}
var x, y types.Value
s.a[i].Core(&x)
s.a[j].Core(&y)

// Save the state of all relevant arcs and restore later for the
// next comparison.
saveCmp := *s.cmp
saveLess := *s.less
saveX := *s.x
saveY := *s.y

for _, c := range x.V.Conjuncts {
s.x.AddConjunct(c)
}
for _, c := range y.V.Conjuncts {
s.y.AddConjunct(c)
}

// TODO(perf): if we can determine that the comparator values for
// x and y are idempotent (no arcs and a basevalue being top or
// a struct or list marker), then we do not need to reevaluate the input.
// In that case, we can use the below code instead of the above two loops
// setting the conjuncts. This may improve performance significantly.
//
// s.x.BaseValue = x.V.BaseValue
// s.x.Arcs = x.V.Arcs
// s.y.BaseValue = y.V.BaseValue
// s.y.Arcs = y.V.Arcs

s.less.Finalize(s.ctx)
isLess := s.ctx.BoolValue(s.less)
if b := s.less.Err(s.ctx, adt.Finalized); b != nil && s.err == nil {
s.err = b.Err
return true
}

*s.less = saveLess
*s.cmp = saveCmp
*s.x = saveX
*s.y = saveY

return isLess
}

var less = cue.ParsePath("less")

func makeValueSorter(list []cue.Value, cmp cue.Value) (s valueSorter) {
if v := cmp.LookupPath(less); !v.Exists() {
return valueSorter{err: v.Err()}
}

var v types.Value
cmp.Core(&v)
ctx := adt.NewContext(v.R, v.V)

n := &adt.Vertex{
Label: v.V.Label,
Parent: v.V.Parent,
Conjuncts: v.V.Conjuncts,
}
ctx.Unify(n, adt.Conjuncts)

s = valueSorter{
a: list,
ctx: ctx,
cmp: n,
less: getArc(ctx, n, "less"),
x: getArc(ctx, n, "x"),
y: getArc(ctx, n, "y"),
}

// TODO(perf): see comment in the Less method. If we can determine
// the conjuncts for x and y are idempotent, we can pre finalize here and
// ignore the values in the Less method.
// s.x.UpdateStatus(adt.Finalized)
// s.y.UpdateStatus(adt.Finalized)

return s
}

// Sort sorts data while keeping the original order of equal elements.
// It does O(n*log(n)) comparisons.
//
Expand All @@ -64,15 +144,22 @@ func (s *valueSorter) Less(i, j int) bool {
//
// Sort([{a: 2}, {a: 3}, {a: 1}], {x: {}, y: {}, less: x.a < y.a})
func Sort(list []cue.Value, cmp cue.Value) (sorted []cue.Value, err error) {
s := valueSorter{list, cmp, nil}
s := makeValueSorter(list, cmp)

// The input slice is already a copy and that we can modify it safely.
sort.Stable(&s)
return s.ret()
}

func getArc(ctx *adt.OpContext, v *adt.Vertex, s string) *adt.Vertex {
f := ctx.StringLabel(s)
arc, _ := v.GetArc(ctx, f, 0)
return arc
}

// Deprecated: use Sort, which is always stable
func SortStable(list []cue.Value, cmp cue.Value) (sorted []cue.Value, err error) {
s := valueSorter{list, cmp, nil}
s := makeValueSorter(list, cmp)
sort.Stable(&s)
return s.ret()
}
Expand All @@ -87,7 +174,7 @@ func SortStrings(a []string) []string {
//
// See Sort for an example comparator.
func IsSorted(list []cue.Value, cmp cue.Value) bool {
s := valueSorter{list, cmp, nil}
s := makeValueSorter(list, cmp)
return sort.IsSorted(&s)
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/list/testdata/gen.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ t36: error in call to list.Slice: slice bounds out of range:
./in.cue:38:6
t40: error in call to list.Sort: 2 errors in empty disjunction::
./in.cue:46:6
list:13:17
t42: invalid list element 0 in argument 0 to call: cannot use value 1 (int) as string:
./in.cue:48:6
./in.cue:48:24
Expand All @@ -109,6 +110,7 @@ t49: error in call to list.Take: negative index:
./in.cue:55:6
t54: error in call to list.Sort: 2 errors in empty disjunction::
./in.cue:60:6
list:13:17

Result:
t1: 2.5
Expand Down

0 comments on commit 2e70ac7

Please sign in to comment.