switch to fixed-width storage for word list
All checks were successful
continuous-integration/drone/tag Build is passing
All checks were successful
continuous-integration/drone/tag Build is passing
This commit is contained in:
parent
183e2fe7ac
commit
e8d2f16c9e
@ -33,6 +33,13 @@ task(dictionary, "Generate dictionary from BNC XML files"):
|
|||||||
echo output.strip().splitlines()[^1]
|
echo output.strip().splitlines()[^1]
|
||||||
|
|
||||||
|
|
||||||
|
task(pack, "Pack dictionary into fixed-width file and compress"):
|
||||||
|
echo "Packing dictionary"
|
||||||
|
echo runCmd("nim c --run src/dictionary.nim data/dictionary.txt data/dictionary.pack")
|
||||||
|
|
||||||
|
|
||||||
before(build):
|
before(build):
|
||||||
if not fileExists("data/dictionary.txt"):
|
if not fileExists("data/dictionary.txt"):
|
||||||
dictionaryTask()
|
dictionaryTask()
|
||||||
|
if not fileExists("data/dictionary.pack"):
|
||||||
|
packTask()
|
||||||
|
@ -5,82 +5,58 @@ import zippy
|
|||||||
type
|
type
|
||||||
Dictionary* = object
|
Dictionary* = object
|
||||||
words: string
|
words: string
|
||||||
offsets: seq[uint32]
|
width: uint32
|
||||||
|
|
||||||
|
|
||||||
proc `[]`*(d: Dictionary, i: Natural): string =
|
proc `[]`*(d: Dictionary, i: Natural): string =
|
||||||
# last word has no following start index, so we have to fake it
|
let start = i.uint32 * d.width
|
||||||
# also strings are indexed with ints
|
d.words[start ..< start + d.width]
|
||||||
let slice =
|
|
||||||
if i == d.offsets.high:
|
|
||||||
d.offsets[i].int .. d.words.high
|
|
||||||
else:
|
|
||||||
d.offsets[i].int ..< d.offsets[i + 1].int
|
|
||||||
|
|
||||||
result = d.words[slice]
|
|
||||||
|
|
||||||
|
|
||||||
proc len*(d: Dictionary): int =
|
proc len*(d: Dictionary): int =
|
||||||
result = d.offsets.len
|
result = d.words.len div d.width.int
|
||||||
|
|
||||||
|
|
||||||
proc addU32(s: var seq[uint8], i: uint32) =
|
proc addU32(s: var string, i: uint32) =
|
||||||
for offset in 0..3:
|
for offset in 0..3:
|
||||||
let b = cast[uint8](i shr (offset * 8))
|
let b = cast[char](i shr (offset * 8))
|
||||||
s.add(b)
|
s.add(b)
|
||||||
|
|
||||||
|
|
||||||
proc getU32(s: seq[uint8]): uint32 =
|
proc getU32(s: string): uint32 =
|
||||||
for offset in 0..3:
|
for offset in 0..3:
|
||||||
result = result or (cast[uint32](s[offset]) shl (offset * 8))
|
result = result or (cast[uint32](s[offset]) shl (offset * 8))
|
||||||
|
|
||||||
|
|
||||||
proc pack*(d: Dictionary): seq[uint8] =
|
proc pack*(d: Dictionary): string =
|
||||||
let compressed = cast[seq[uint8]](d.words)
|
var data: string
|
||||||
let lenWords = compressed.len
|
data.addU32(d.width)
|
||||||
let lenOffsets = d.offsets.len * 4
|
data.add(d.words)
|
||||||
# 8 extra bytes for the length specifiers
|
data.compress(dataFormat = dfGzip)
|
||||||
var buff = newSeqOfCap[uint8](lenWords + lenOffsets + 8)
|
|
||||||
buff.addU32(lenWords.uint32)
|
|
||||||
buff.add(compressed)
|
|
||||||
buff.addU32(lenOffsets.uint32)
|
|
||||||
for offset in d.offsets:
|
|
||||||
buff.addU32(offset)
|
|
||||||
result = buff.compress(dataFormat = dfGzip)
|
|
||||||
|
|
||||||
|
|
||||||
proc unpack*[T: seq[uint8]|string](p: T): Dictionary =
|
proc unpack*(p: string): Dictionary =
|
||||||
when T is string:
|
let data = p.uncompress(dataFormat = dfGzip)
|
||||||
let buff = cast[seq[uint8]](p.uncompress(dataFormat = dfGzip))
|
result.width = data.getU32()
|
||||||
else:
|
result.words = data[4..^1]
|
||||||
let buff = p.uncompress(dataFormat = dfGzip)
|
|
||||||
|
|
||||||
let lenWords = buff.getU32()
|
|
||||||
let nextSection = lenWords + 4
|
|
||||||
let words = cast[string](buff[4 ..< nextSection])
|
|
||||||
|
|
||||||
let numOffsets = buff[nextSection ..< (nextSection + 4)].getU32() div 4
|
|
||||||
var offsets = newSeqOfCap[uint32](numOffsets)
|
|
||||||
for i in 0 ..< numOffsets:
|
|
||||||
let idx = nextSection + 4 + (i * 4)
|
|
||||||
let offset = buff[idx ..< idx + 4].getU32()
|
|
||||||
offsets.add(offset)
|
|
||||||
|
|
||||||
result = Dictionary(words: words, offsets: offsets)
|
|
||||||
|
|
||||||
|
|
||||||
proc loadWords*(): Dictionary =
|
proc loadWords*(path: string): Dictionary =
|
||||||
var i: uint32
|
result.width = 25
|
||||||
for word in readFile("../data/dictionary.txt").strip().splitLines():
|
for word in readFile(path).strip().splitLines():
|
||||||
|
if word.len > 25:
|
||||||
|
continue
|
||||||
|
|
||||||
result.words.add(word)
|
result.words.add(word)
|
||||||
result.offsets.add(i)
|
for _ in 0..<(25 - word.len):
|
||||||
i += word.len.uint32
|
result.words.add(' ')
|
||||||
|
|
||||||
|
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
|
import std/os
|
||||||
echo "Loading words..."
|
echo "Loading words..."
|
||||||
let dictionary = loadWords()
|
let dictionary = loadWords(paramStr(1))
|
||||||
echo "Packing dictionary..."
|
echo "Packing dictionary..."
|
||||||
let packed = dictionary.pack()
|
let packed = dictionary.pack()
|
||||||
discard open("../data/dictionary.pack", fmWrite).writeBytes(packed, 0, packed.len)
|
writeFile(paramStr(2), packed)
|
||||||
echo "Dictionary packed."
|
echo "Dictionary packed."
|
||||||
|
@ -13,7 +13,7 @@ proc genPassphrase(dict: Dictionary, length, dictSize: int): string =
|
|||||||
var words: seq[string]
|
var words: seq[string]
|
||||||
for r in rands:
|
for r in rands:
|
||||||
let i = r mod dictSize.uint64
|
let i = r mod dictSize.uint64
|
||||||
words.add(dict[i])
|
words.add(dict[i].strip())
|
||||||
result = words.join(" ")
|
result = words.join(" ")
|
||||||
|
|
||||||
|
|
||||||
@ -52,6 +52,5 @@ proc parseInput(dictLen: int): (int, int) =
|
|||||||
|
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
let dict = packed.unpack()
|
let dict = packed.unpack()
|
||||||
echo dict.len
|
|
||||||
let (length, dictSize) = parseInput(dict.len)
|
let (length, dictSize) = parseInput(dict.len)
|
||||||
echo genPassphrase(dict, length, dictSize)
|
echo genPassphrase(dict, length, dictSize)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user