switch to fixed-width storage for word list
All checks were successful
continuous-integration/drone/tag Build is passing
All checks were successful
continuous-integration/drone/tag Build is passing
This commit is contained in:
parent
183e2fe7ac
commit
e8d2f16c9e
@ -33,6 +33,13 @@ task(dictionary, "Generate dictionary from BNC XML files"):
|
||||
echo output.strip().splitlines()[^1]
|
||||
|
||||
|
||||
task(pack, "Pack dictionary into fixed-width file and compress"):
|
||||
echo "Packing dictionary"
|
||||
echo runCmd("nim c --run src/dictionary.nim data/dictionary.txt data/dictionary.pack")
|
||||
|
||||
|
||||
before(build):
|
||||
if not fileExists("data/dictionary.txt"):
|
||||
dictionaryTask()
|
||||
if not fileExists("data/dictionary.pack"):
|
||||
packTask()
|
||||
|
@ -5,82 +5,58 @@ import zippy
|
||||
type
|
||||
Dictionary* = object
|
||||
words: string
|
||||
offsets: seq[uint32]
|
||||
width: uint32
|
||||
|
||||
|
||||
proc `[]`*(d: Dictionary, i: Natural): string =
|
||||
# last word has no following start index, so we have to fake it
|
||||
# also strings are indexed with ints
|
||||
let slice =
|
||||
if i == d.offsets.high:
|
||||
d.offsets[i].int .. d.words.high
|
||||
else:
|
||||
d.offsets[i].int ..< d.offsets[i + 1].int
|
||||
|
||||
result = d.words[slice]
|
||||
let start = i.uint32 * d.width
|
||||
d.words[start ..< start + d.width]
|
||||
|
||||
|
||||
proc len*(d: Dictionary): int =
|
||||
result = d.offsets.len
|
||||
result = d.words.len div d.width.int
|
||||
|
||||
|
||||
proc addU32(s: var seq[uint8], i: uint32) =
|
||||
proc addU32(s: var string, i: uint32) =
|
||||
for offset in 0..3:
|
||||
let b = cast[uint8](i shr (offset * 8))
|
||||
let b = cast[char](i shr (offset * 8))
|
||||
s.add(b)
|
||||
|
||||
|
||||
proc getU32(s: seq[uint8]): uint32 =
|
||||
proc getU32(s: string): uint32 =
|
||||
for offset in 0..3:
|
||||
result = result or (cast[uint32](s[offset]) shl (offset * 8))
|
||||
|
||||
|
||||
proc pack*(d: Dictionary): seq[uint8] =
|
||||
let compressed = cast[seq[uint8]](d.words)
|
||||
let lenWords = compressed.len
|
||||
let lenOffsets = d.offsets.len * 4
|
||||
# 8 extra bytes for the length specifiers
|
||||
var buff = newSeqOfCap[uint8](lenWords + lenOffsets + 8)
|
||||
buff.addU32(lenWords.uint32)
|
||||
buff.add(compressed)
|
||||
buff.addU32(lenOffsets.uint32)
|
||||
for offset in d.offsets:
|
||||
buff.addU32(offset)
|
||||
result = buff.compress(dataFormat = dfGzip)
|
||||
proc pack*(d: Dictionary): string =
|
||||
var data: string
|
||||
data.addU32(d.width)
|
||||
data.add(d.words)
|
||||
data.compress(dataFormat = dfGzip)
|
||||
|
||||
|
||||
proc unpack*[T: seq[uint8]|string](p: T): Dictionary =
|
||||
when T is string:
|
||||
let buff = cast[seq[uint8]](p.uncompress(dataFormat = dfGzip))
|
||||
else:
|
||||
let buff = p.uncompress(dataFormat = dfGzip)
|
||||
|
||||
let lenWords = buff.getU32()
|
||||
let nextSection = lenWords + 4
|
||||
let words = cast[string](buff[4 ..< nextSection])
|
||||
|
||||
let numOffsets = buff[nextSection ..< (nextSection + 4)].getU32() div 4
|
||||
var offsets = newSeqOfCap[uint32](numOffsets)
|
||||
for i in 0 ..< numOffsets:
|
||||
let idx = nextSection + 4 + (i * 4)
|
||||
let offset = buff[idx ..< idx + 4].getU32()
|
||||
offsets.add(offset)
|
||||
|
||||
result = Dictionary(words: words, offsets: offsets)
|
||||
proc unpack*(p: string): Dictionary =
|
||||
let data = p.uncompress(dataFormat = dfGzip)
|
||||
result.width = data.getU32()
|
||||
result.words = data[4..^1]
|
||||
|
||||
|
||||
proc loadWords*(): Dictionary =
|
||||
var i: uint32
|
||||
for word in readFile("../data/dictionary.txt").strip().splitLines():
|
||||
proc loadWords*(path: string): Dictionary =
|
||||
result.width = 25
|
||||
for word in readFile(path).strip().splitLines():
|
||||
if word.len > 25:
|
||||
continue
|
||||
|
||||
result.words.add(word)
|
||||
result.offsets.add(i)
|
||||
i += word.len.uint32
|
||||
for _ in 0..<(25 - word.len):
|
||||
result.words.add(' ')
|
||||
|
||||
|
||||
when isMainModule:
|
||||
import std/os
|
||||
echo "Loading words..."
|
||||
let dictionary = loadWords()
|
||||
let dictionary = loadWords(paramStr(1))
|
||||
echo "Packing dictionary..."
|
||||
let packed = dictionary.pack()
|
||||
discard open("../data/dictionary.pack", fmWrite).writeBytes(packed, 0, packed.len)
|
||||
writeFile(paramStr(2), packed)
|
||||
echo "Dictionary packed."
|
||||
|
@ -13,7 +13,7 @@ proc genPassphrase(dict: Dictionary, length, dictSize: int): string =
|
||||
var words: seq[string]
|
||||
for r in rands:
|
||||
let i = r mod dictSize.uint64
|
||||
words.add(dict[i])
|
||||
words.add(dict[i].strip())
|
||||
result = words.join(" ")
|
||||
|
||||
|
||||
@ -52,6 +52,5 @@ proc parseInput(dictLen: int): (int, int) =
|
||||
|
||||
when isMainModule:
|
||||
let dict = packed.unpack()
|
||||
echo dict.len
|
||||
let (length, dictSize) = parseInput(dict.len)
|
||||
echo genPassphrase(dict, length, dictSize)
|
||||
|
Loading…
x
Reference in New Issue
Block a user