switch to fixed-width storage for word list
All checks were successful
continuous-integration/drone/tag Build is passing

This commit is contained in:
Joseph Montanaro 2022-01-10 11:20:30 -08:00
parent 183e2fe7ac
commit e8d2f16c9e
3 changed files with 35 additions and 53 deletions

View File

@ -33,6 +33,13 @@ task(dictionary, "Generate dictionary from BNC XML files"):
echo output.strip().splitlines()[^1] echo output.strip().splitlines()[^1]
task(pack, "Pack dictionary into fixed-width file and compress"):
echo "Packing dictionary"
echo runCmd("nim c --run src/dictionary.nim data/dictionary.txt data/dictionary.pack")
before(build): before(build):
if not fileExists("data/dictionary.txt"): if not fileExists("data/dictionary.txt"):
dictionaryTask() dictionaryTask()
if not fileExists("data/dictionary.pack"):
packTask()

View File

@ -5,82 +5,58 @@ import zippy
type type
Dictionary* = object Dictionary* = object
words: string words: string
offsets: seq[uint32] width: uint32
proc `[]`*(d: Dictionary, i: Natural): string = proc `[]`*(d: Dictionary, i: Natural): string =
# last word has no following start index, so we have to fake it let start = i.uint32 * d.width
# also strings are indexed with ints d.words[start ..< start + d.width]
let slice =
if i == d.offsets.high:
d.offsets[i].int .. d.words.high
else:
d.offsets[i].int ..< d.offsets[i + 1].int
result = d.words[slice]
proc len*(d: Dictionary): int = proc len*(d: Dictionary): int =
result = d.offsets.len result = d.words.len div d.width.int
proc addU32(s: var seq[uint8], i: uint32) = proc addU32(s: var string, i: uint32) =
for offset in 0..3: for offset in 0..3:
let b = cast[uint8](i shr (offset * 8)) let b = cast[char](i shr (offset * 8))
s.add(b) s.add(b)
proc getU32(s: seq[uint8]): uint32 = proc getU32(s: string): uint32 =
for offset in 0..3: for offset in 0..3:
result = result or (cast[uint32](s[offset]) shl (offset * 8)) result = result or (cast[uint32](s[offset]) shl (offset * 8))
proc pack*(d: Dictionary): seq[uint8] = proc pack*(d: Dictionary): string =
let compressed = cast[seq[uint8]](d.words) var data: string
let lenWords = compressed.len data.addU32(d.width)
let lenOffsets = d.offsets.len * 4 data.add(d.words)
# 8 extra bytes for the length specifiers data.compress(dataFormat = dfGzip)
var buff = newSeqOfCap[uint8](lenWords + lenOffsets + 8)
buff.addU32(lenWords.uint32)
buff.add(compressed)
buff.addU32(lenOffsets.uint32)
for offset in d.offsets:
buff.addU32(offset)
result = buff.compress(dataFormat = dfGzip)
proc unpack*[T: seq[uint8]|string](p: T): Dictionary = proc unpack*(p: string): Dictionary =
when T is string: let data = p.uncompress(dataFormat = dfGzip)
let buff = cast[seq[uint8]](p.uncompress(dataFormat = dfGzip)) result.width = data.getU32()
else: result.words = data[4..^1]
let buff = p.uncompress(dataFormat = dfGzip)
let lenWords = buff.getU32()
let nextSection = lenWords + 4
let words = cast[string](buff[4 ..< nextSection])
let numOffsets = buff[nextSection ..< (nextSection + 4)].getU32() div 4
var offsets = newSeqOfCap[uint32](numOffsets)
for i in 0 ..< numOffsets:
let idx = nextSection + 4 + (i * 4)
let offset = buff[idx ..< idx + 4].getU32()
offsets.add(offset)
result = Dictionary(words: words, offsets: offsets)
proc loadWords*(): Dictionary = proc loadWords*(path: string): Dictionary =
var i: uint32 result.width = 25
for word in readFile("../data/dictionary.txt").strip().splitLines(): for word in readFile(path).strip().splitLines():
if word.len > 25:
continue
result.words.add(word) result.words.add(word)
result.offsets.add(i) for _ in 0..<(25 - word.len):
i += word.len.uint32 result.words.add(' ')
when isMainModule: when isMainModule:
import std/os
echo "Loading words..." echo "Loading words..."
let dictionary = loadWords() let dictionary = loadWords(paramStr(1))
echo "Packing dictionary..." echo "Packing dictionary..."
let packed = dictionary.pack() let packed = dictionary.pack()
discard open("../data/dictionary.pack", fmWrite).writeBytes(packed, 0, packed.len) writeFile(paramStr(2), packed)
echo "Dictionary packed." echo "Dictionary packed."

View File

@ -13,7 +13,7 @@ proc genPassphrase(dict: Dictionary, length, dictSize: int): string =
var words: seq[string] var words: seq[string]
for r in rands: for r in rands:
let i = r mod dictSize.uint64 let i = r mod dictSize.uint64
words.add(dict[i]) words.add(dict[i].strip())
result = words.join(" ") result = words.join(" ")
@ -52,6 +52,5 @@ proc parseInput(dictLen: int): (int, int) =
when isMainModule: when isMainModule:
let dict = packed.unpack() let dict = packed.unpack()
echo dict.len
let (length, dictSize) = parseInput(dict.len) let (length, dictSize) = parseInput(dict.len)
echo genPassphrase(dict, length, dictSize) echo genPassphrase(dict, length, dictSize)