diff --git a/config.nims b/config.nims index c3b9fc0..824ce34 100644 --- a/config.nims +++ b/config.nims @@ -1,5 +1,5 @@ ---threads: on ---d: release ---opt: speed ---passC: -flto ---passL: -flto \ No newline at end of file +# --threads: on +# --d: release +# --opt: speed +# --passC: -flto +# --passL: -flto \ No newline at end of file diff --git a/fixedseq.nim b/fixedseq.nim index 4bdc88c..9961f2b 100644 --- a/fixedseq.nim +++ b/fixedseq.nim @@ -124,7 +124,7 @@ proc shuffle*(s: var FixedSeq, r: var Rand) = proc moveSubstack*(src, dst: var FixedSeq; start: Natural) = - var count: typeof(src.last) = 0 # have to track this separately apparently + var count: FixedSeq.Pointer = 0 # have to track this separately apparently for idx in start .. src.last: swap(src.data[idx], dst.data[dst.last + 1 + count]) inc count @@ -133,7 +133,7 @@ proc moveSubstack*(src, dst: var FixedSeq; start: Natural) = proc moveSubstackPre*(src, dst: var FixedSeq; start: Natural) = - let ssLen = typeof(src.last)(src.last - start + 1) # length of substack + let ssLen = FixedSeq.Pointer(src.last - start + 1) # length of substack for i in countdown(dst.last, 0): swap(dst.data[i], dst.data[i + ssLen]) @@ -144,3 +144,5 @@ proc moveSubstackPre*(src, dst: var FixedSeq; start: Natural) = dst.last += ssLen src.last -= ssLen + +include shiftstack diff --git a/game.nim b/game.nim index f6e8d8a..598f665 100644 --- a/game.nim +++ b/game.nim @@ -33,7 +33,7 @@ proc abbrev*(c: Color): char = result = colorAbbrevs[c] -proc `$`*(s: ColorStack): string = +proc `$`*[T](s: FixedSeq[T, Color, int8]): string = result.add("St@[") for i, color in s: result.add($color) diff --git a/shiftstack.nim b/shiftstack.nim new file mode 100644 index 0000000..7be8b57 --- /dev/null +++ b/shiftstack.nim @@ -0,0 +1,182 @@ +# optimized bit-shifting versions of the FixedSequence substack operations +import bitops, macros +import game + + +macro show(expr: untyped) = + let node = expr.toStrLit + quote do: + echo `node`, " => ", `expr` + + +proc getMasks(): (array[9, uint64], array[9, uint64]) = + # on little-endian architectures, casting an array[8, Color] + # to uint64 effectively reverses it. So we switch these masks + # so that we can refer to them consistently. + let + left = [ + 0'u64, + 0xff_00_00_00_00_00_00_00'u64, + 0xff_ff_00_00_00_00_00_00'u64, + 0xff_ff_ff_00_00_00_00_00'u64, + 0xff_ff_ff_ff_00_00_00_00'u64, + 0xff_ff_ff_ff_ff_00_00_00'u64, + 0xff_ff_ff_ff_ff_ff_00_00'u64, + 0xff_ff_ff_ff_ff_ff_ff_00'u64, + 0xff_ff_ff_ff_ff_ff_ff_ff'u64, + ] + right = [ + 0'u64, + 0x00_00_00_00_00_00_00_ff'u64, + 0x00_00_00_00_00_00_ff_ff'u64, + 0x00_00_00_00_00_ff_ff_ff'u64, + 0x00_00_00_00_ff_ff_ff_ff'u64, + 0x00_00_00_ff_ff_ff_ff_ff'u64, + 0x00_00_ff_ff_ff_ff_ff_ff'u64, + 0x00_ff_ff_ff_ff_ff_ff_ff'u64, + 0xff_ff_ff_ff_ff_ff_ff_ff'u64, + ] + when cpuEndian == bigEndian: + result = (left, right) + when cpuEndian == littleEndian: + result = (right, left) + + +type ShiftStack = FixedSeq[8, Color, int8] +const (masksLeft, masksRight) = getMasks() + + +template `shl`(a: array[8, Color], offset: Natural): array[8, Color] = + when cpuEndian == bigEndian: + cast[array[8, Color]](cast[uint64](a) shl (offset * 8)) + when cpuEndian == littleEndian: # direction is reversed + cast[array[8, Color]](cast[uint64](a) shr (offset * 8)) + + +template `shr`(a: array[8, Color], offset: Natural): array[8, Color] = + when cpuEndian == bigEndian: + cast[array[8, Color]](cast[uint64](a) shr (offset * 8)) + when cpuEndian == littleEndian: + cast[array[8, Color]](cast[uint64](a) shl (offset * 8)) + + +template `and`(a: array[8, Color], mask: uint64): array[8, Color] = + cast[array[8, Color]](cast[uint64](a) and mask) + + +template `or`(a: array[8, Color], mask: uint64): array[8, Color] = + cast[array[8, Color]](cast[uint64](a) or mask) + + +template `or`(a: array[8, Color], mask: array[8, Color]): array[8, Color] = + cast[array[8, Color]](cast[uint64](a) or cast[uint64](mask)) + + +import strutils # remove later +proc moveSubstack*(src, dst: var ShiftStack; start: Natural) = + # shift the source stack to position the substack above its final resting place + # offset is the length of the destination stack, minus the number of items NOT being moved + # number of items not being moved is the same as the start index + var substack: array[8, Color] + # if dst.len == start, no shift necessary, otherwise: + if dst.len > start: + substack = src.data shr (dst.len - start) + elif dst.len < start: + substack = src.data shl (start) + # next, mask the source data to present only the items being moved + # dst.len of 0 corresponds to last mask in masksRight, aka masksRight[^1] + substack = substack and masksRight[^(dst.len + 1)] + # then combine + dst.data = dst.data or substack + # then git rid of the moved items from the source stack + src.data = src.data and masksLeft[start] + # a little bookkeeping + let ssLen = int8(src.len - start) + src.last -= ssLen + dst.last += ssLen + + +proc moveSubstackPre*(src, dst: var ShiftStack; start: Natural) = + let ssLen = int8(src.len - start) + # shift the destination stack to make room for the new items + dst.data = dst.data shr ssLen + # shift source stack to line up the substack with its final resting place + let substack = src.data shl start + # combine + dst.data = dst.data or substack + # get rid of the moved items + src.data = src.data and masksLeft[start] + # more bookkeeping + src.last -= ssLen + dst.last += ssLen + + +proc `==`[T1: FixedSeq, T2: FixedSeq](a: T1, b: T2): bool = + if a.len != b.len: + return false + for i in 0 ..< a.len: + if a.data[i] != b.data[i]: + return false + return true + + +proc testMove[T1, T2: FixedSeq](a1, a2: var T1; b1, b2: var T2; i: Natural): bool = + let (orig_a1, orig_a2) = (a1, a2) + let (orig_b1, orig_b2) = (b1, b2) + a1.moveSubstackPre(a2, i) + b1.moveSubstackPre(b2, i) + if a1 != b1 or a2 != b2: + echo "Failed!" + show orig_b1 + show orig_b2 + echo "<< move ", i, ">>" + show b1 + show b2 + return false + return true + + +when isMainModule: + var c1 = initFixedSeq(5, Color, int8) + var c2 = initFixedSeq(5, Color, int8) + + var s1: ShiftStack + s1.initFixedSeq + var s2: ShiftStack + s2.initFixedSeq + + # s1.add(cPurple) + # s1.add(cRed) + # s1.add(cYellow) + # s1.add(cBlue) + # s1.add(cGreen) + + # show s1 + # show s2 + # echo "<>" + # s1.moveSubstack(s2, 4) + # show s1 + # show s2 + + import random + randomize() + var r = initRand(rand(int64)) + var success = true + + for n in 1 .. 10000: + if c1.len > 0: + let i = r.rand(c1.high) + if not testMove(c1, c2, s1, s2, i): + success = false + echo "Failed after ", n, " iterations." + break + + if c2.len > 0: + let j = r.rand(c2.high) + if not testMove(c2, c1, s2, s1, j): + success = false + echo "Failed after ", n, " iterations." + break + + if success: + echo "Success."