Compare commits

..

2 Commits

Author SHA1 Message Date
e8d2f16c9e switch to fixed-width storage for word list
All checks were successful
continuous-integration/drone/tag Build is passing
2022-01-10 11:20:30 -08:00
183e2fe7ac store dictionary compressed 2022-01-10 10:31:05 -08:00
4 changed files with 84 additions and 36 deletions

4
.gitignore vendored
View File

@ -1,2 +1,4 @@
*.exe *.exe
data/BNC/* data/BNC/*
data/dictionary*
!data/dictionary.txt

View File

@ -12,6 +12,7 @@ bin = @["passphrase"]
requires "nim >= 1.0" requires "nim >= 1.0"
requires "nimcrypto >= 0.4.8" requires "nimcrypto >= 0.4.8"
requires "zippy >= 0.7.3"
# Tasks n scripts # Tasks n scripts
@ -32,6 +33,13 @@ task(dictionary, "Generate dictionary from BNC XML files"):
echo output.strip().splitlines()[^1] echo output.strip().splitlines()[^1]
task(pack, "Pack dictionary into fixed-width file and compress"):
echo "Packing dictionary"
echo runCmd("nim c --run src/dictionary.nim data/dictionary.txt data/dictionary.pack")
before(build): before(build):
if not fileExists("data/dictionary.txt"): if not fileExists("data/dictionary.txt"):
dictionaryTask() dictionaryTask()
if not fileExists("data/dictionary.pack"):
packTask()

62
src/dictionary.nim Normal file
View File

@ -0,0 +1,62 @@
import std/strutils
import zippy
type
Dictionary* = object
words: string
width: uint32
proc `[]`*(d: Dictionary, i: Natural): string =
let start = i.uint32 * d.width
d.words[start ..< start + d.width]
proc len*(d: Dictionary): int =
result = d.words.len div d.width.int
proc addU32(s: var string, i: uint32) =
for offset in 0..3:
let b = cast[char](i shr (offset * 8))
s.add(b)
proc getU32(s: string): uint32 =
for offset in 0..3:
result = result or (cast[uint32](s[offset]) shl (offset * 8))
proc pack*(d: Dictionary): string =
var data: string
data.addU32(d.width)
data.add(d.words)
data.compress(dataFormat = dfGzip)
proc unpack*(p: string): Dictionary =
let data = p.uncompress(dataFormat = dfGzip)
result.width = data.getU32()
result.words = data[4..^1]
proc loadWords*(path: string): Dictionary =
result.width = 25
for word in readFile(path).strip().splitLines():
if word.len > 25:
continue
result.words.add(word)
for _ in 0..<(25 - word.len):
result.words.add(' ')
when isMainModule:
import std/os
echo "Loading words..."
let dictionary = loadWords(paramStr(1))
echo "Packing dictionary..."
let packed = dictionary.pack()
writeFile(paramStr(2), packed)
echo "Dictionary packed."

View File

@ -1,45 +1,19 @@
import std/[os, strutils] import std/[os, strutils]
import nimcrypto/sysrand import nimcrypto/sysrand
import dictionary
type Dictionary = object const packed = staticRead("../data/dictionary.pack")
words: string
offsets: seq[uint32]
proc `[]`(d: Dictionary, i: Natural): string = proc genPassphrase(dict: Dictionary, length, dictSize: int): string =
# last word has no following start index, so we have to fake it
# also strings are indexed with ints
let slice = if i == d.offsets.high:
d.offsets[i].int .. d.words.high
else:
d.offsets[i].int ..< d.offsets[i + 1].int
result = d.words[slice]
proc len(d: Dictionary): int =
result = d.offsets.len
proc loadWords(): Dictionary =
for word in staticRead("../data/dictionary.txt").strip().splitLines():
let startIdx = result.words.len.uint32
result.offsets.add(startIdx)
result.words.add(word)
const dict = loadWords()
proc genPassphrase(length, dictSize: int): string =
var rands = newSeq[uint64](length) var rands = newSeq[uint64](length)
discard randomBytes(rands) discard randomBytes(rands)
var words: seq[string] var words: seq[string]
for r in rands: for r in rands:
let i = r mod dictSize.uint64 let i = r mod dictSize.uint64
words.add(dict[i]) words.add(dict[i].strip())
result = words.join(" ") result = words.join(" ")
@ -48,7 +22,8 @@ const help = """Usage:
Defaults to length of 4 and dictionary size of 25,000.""" Defaults to length of 4 and dictionary size of 25,000."""
proc parseInput(): (int, int) =
proc parseInput(dictLen: int): (int, int) =
let params = commandLineParams() let params = commandLineParams()
if "-h" in params or "--help" in params: if "-h" in params or "--help" in params:
echo help echo help
@ -67,8 +42,8 @@ proc parseInput(): (int, int) =
if params.len > 1: if params.len > 1:
try: try:
dictSize = parseInt(params[1]) dictSize = parseInt(params[1])
if dictSize < 100 or dictSize > dict.len: if dictSize < 100 or dictSize > dictLen:
quit("Dictionary size must be between 100 and " & $dict.len, 1) quit("Dictionary size must be between 100 and " & $dictLen, 1)
except ValueError: except ValueError:
quit('"' & params[1] & "\" is not a valid dictionary size.", 1) quit('"' & params[1] & "\" is not a valid dictionary size.", 1)
@ -76,5 +51,6 @@ proc parseInput(): (int, int) =
when isMainModule: when isMainModule:
let (length, dictSize) = parseInput() let dict = packed.unpack()
echo genPassphrase(length, dictSize) let (length, dictSize) = parseInput(dict.len)
echo genPassphrase(dict, length, dictSize)