store dictionary compressed

This commit is contained in:
Joseph Montanaro 2022-01-10 10:31:05 -08:00
parent 8657f3c13d
commit 183e2fe7ac
4 changed files with 101 additions and 35 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
*.exe *.exe
data/BNC/* data/BNC/*
data/dictionary*
!data/dictionary.txt

View File

@ -12,6 +12,7 @@ bin = @["passphrase"]
requires "nim >= 1.0" requires "nim >= 1.0"
requires "nimcrypto >= 0.4.8" requires "nimcrypto >= 0.4.8"
requires "zippy >= 0.7.3"
# Tasks n scripts # Tasks n scripts

86
src/dictionary.nim Normal file
View File

@ -0,0 +1,86 @@
import std/strutils
import zippy
type
Dictionary* = object
words: string
offsets: seq[uint32]
proc `[]`*(d: Dictionary, i: Natural): string =
# last word has no following start index, so we have to fake it
# also strings are indexed with ints
let slice =
if i == d.offsets.high:
d.offsets[i].int .. d.words.high
else:
d.offsets[i].int ..< d.offsets[i + 1].int
result = d.words[slice]
proc len*(d: Dictionary): int =
result = d.offsets.len
proc addU32(s: var seq[uint8], i: uint32) =
for offset in 0..3:
let b = cast[uint8](i shr (offset * 8))
s.add(b)
proc getU32(s: seq[uint8]): uint32 =
for offset in 0..3:
result = result or (cast[uint32](s[offset]) shl (offset * 8))
proc pack*(d: Dictionary): seq[uint8] =
let compressed = cast[seq[uint8]](d.words)
let lenWords = compressed.len
let lenOffsets = d.offsets.len * 4
# 8 extra bytes for the length specifiers
var buff = newSeqOfCap[uint8](lenWords + lenOffsets + 8)
buff.addU32(lenWords.uint32)
buff.add(compressed)
buff.addU32(lenOffsets.uint32)
for offset in d.offsets:
buff.addU32(offset)
result = buff.compress(dataFormat = dfGzip)
proc unpack*[T: seq[uint8]|string](p: T): Dictionary =
when T is string:
let buff = cast[seq[uint8]](p.uncompress(dataFormat = dfGzip))
else:
let buff = p.uncompress(dataFormat = dfGzip)
let lenWords = buff.getU32()
let nextSection = lenWords + 4
let words = cast[string](buff[4 ..< nextSection])
let numOffsets = buff[nextSection ..< (nextSection + 4)].getU32() div 4
var offsets = newSeqOfCap[uint32](numOffsets)
for i in 0 ..< numOffsets:
let idx = nextSection + 4 + (i * 4)
let offset = buff[idx ..< idx + 4].getU32()
offsets.add(offset)
result = Dictionary(words: words, offsets: offsets)
proc loadWords*(): Dictionary =
var i: uint32
for word in readFile("../data/dictionary.txt").strip().splitLines():
result.words.add(word)
result.offsets.add(i)
i += word.len.uint32
when isMainModule:
echo "Loading words..."
let dictionary = loadWords()
echo "Packing dictionary..."
let packed = dictionary.pack()
discard open("../data/dictionary.pack", fmWrite).writeBytes(packed, 0, packed.len)
echo "Dictionary packed."

View File

@ -1,38 +1,12 @@
import std/[os, strutils] import std/[os, strutils]
import nimcrypto/sysrand import nimcrypto/sysrand
import dictionary
type Dictionary = object const packed = staticRead("../data/dictionary.pack")
words: string
offsets: seq[uint32]
proc `[]`(d: Dictionary, i: Natural): string = proc genPassphrase(dict: Dictionary, length, dictSize: int): string =
# last word has no following start index, so we have to fake it
# also strings are indexed with ints
let slice = if i == d.offsets.high:
d.offsets[i].int .. d.words.high
else:
d.offsets[i].int ..< d.offsets[i + 1].int
result = d.words[slice]
proc len(d: Dictionary): int =
result = d.offsets.len
proc loadWords(): Dictionary =
for word in staticRead("../data/dictionary.txt").strip().splitLines():
let startIdx = result.words.len.uint32
result.offsets.add(startIdx)
result.words.add(word)
const dict = loadWords()
proc genPassphrase(length, dictSize: int): string =
var rands = newSeq[uint64](length) var rands = newSeq[uint64](length)
discard randomBytes(rands) discard randomBytes(rands)
@ -48,7 +22,8 @@ const help = """Usage:
Defaults to length of 4 and dictionary size of 25,000.""" Defaults to length of 4 and dictionary size of 25,000."""
proc parseInput(): (int, int) =
proc parseInput(dictLen: int): (int, int) =
let params = commandLineParams() let params = commandLineParams()
if "-h" in params or "--help" in params: if "-h" in params or "--help" in params:
echo help echo help
@ -67,8 +42,8 @@ proc parseInput(): (int, int) =
if params.len > 1: if params.len > 1:
try: try:
dictSize = parseInt(params[1]) dictSize = parseInt(params[1])
if dictSize < 100 or dictSize > dict.len: if dictSize < 100 or dictSize > dictLen:
quit("Dictionary size must be between 100 and " & $dict.len, 1) quit("Dictionary size must be between 100 and " & $dictLen, 1)
except ValueError: except ValueError:
quit('"' & params[1] & "\" is not a valid dictionary size.", 1) quit('"' & params[1] & "\" is not a valid dictionary size.", 1)
@ -76,5 +51,7 @@ proc parseInput(): (int, int) =
when isMainModule: when isMainModule:
let (length, dictSize) = parseInput() let dict = packed.unpack()
echo genPassphrase(length, dictSize) echo dict.len
let (length, dictSize) = parseInput(dict.len)
echo genPassphrase(dict, length, dictSize)