store dictionary compressed
This commit is contained in:
parent
8657f3c13d
commit
183e2fe7ac
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
*.exe
|
||||
data/BNC/*
|
||||
data/BNC/*
|
||||
data/dictionary*
|
||||
!data/dictionary.txt
|
@ -12,6 +12,7 @@ bin = @["passphrase"]
|
||||
|
||||
requires "nim >= 1.0"
|
||||
requires "nimcrypto >= 0.4.8"
|
||||
requires "zippy >= 0.7.3"
|
||||
|
||||
|
||||
# Tasks n scripts
|
||||
|
86
src/dictionary.nim
Normal file
86
src/dictionary.nim
Normal file
@ -0,0 +1,86 @@
|
||||
import std/strutils
|
||||
import zippy
|
||||
|
||||
|
||||
type
|
||||
Dictionary* = object
|
||||
words: string
|
||||
offsets: seq[uint32]
|
||||
|
||||
|
||||
proc `[]`*(d: Dictionary, i: Natural): string =
|
||||
# last word has no following start index, so we have to fake it
|
||||
# also strings are indexed with ints
|
||||
let slice =
|
||||
if i == d.offsets.high:
|
||||
d.offsets[i].int .. d.words.high
|
||||
else:
|
||||
d.offsets[i].int ..< d.offsets[i + 1].int
|
||||
|
||||
result = d.words[slice]
|
||||
|
||||
|
||||
proc len*(d: Dictionary): int =
|
||||
result = d.offsets.len
|
||||
|
||||
|
||||
proc addU32(s: var seq[uint8], i: uint32) =
|
||||
for offset in 0..3:
|
||||
let b = cast[uint8](i shr (offset * 8))
|
||||
s.add(b)
|
||||
|
||||
|
||||
proc getU32(s: seq[uint8]): uint32 =
|
||||
for offset in 0..3:
|
||||
result = result or (cast[uint32](s[offset]) shl (offset * 8))
|
||||
|
||||
|
||||
proc pack*(d: Dictionary): seq[uint8] =
|
||||
let compressed = cast[seq[uint8]](d.words)
|
||||
let lenWords = compressed.len
|
||||
let lenOffsets = d.offsets.len * 4
|
||||
# 8 extra bytes for the length specifiers
|
||||
var buff = newSeqOfCap[uint8](lenWords + lenOffsets + 8)
|
||||
buff.addU32(lenWords.uint32)
|
||||
buff.add(compressed)
|
||||
buff.addU32(lenOffsets.uint32)
|
||||
for offset in d.offsets:
|
||||
buff.addU32(offset)
|
||||
result = buff.compress(dataFormat = dfGzip)
|
||||
|
||||
|
||||
proc unpack*[T: seq[uint8]|string](p: T): Dictionary =
|
||||
when T is string:
|
||||
let buff = cast[seq[uint8]](p.uncompress(dataFormat = dfGzip))
|
||||
else:
|
||||
let buff = p.uncompress(dataFormat = dfGzip)
|
||||
|
||||
let lenWords = buff.getU32()
|
||||
let nextSection = lenWords + 4
|
||||
let words = cast[string](buff[4 ..< nextSection])
|
||||
|
||||
let numOffsets = buff[nextSection ..< (nextSection + 4)].getU32() div 4
|
||||
var offsets = newSeqOfCap[uint32](numOffsets)
|
||||
for i in 0 ..< numOffsets:
|
||||
let idx = nextSection + 4 + (i * 4)
|
||||
let offset = buff[idx ..< idx + 4].getU32()
|
||||
offsets.add(offset)
|
||||
|
||||
result = Dictionary(words: words, offsets: offsets)
|
||||
|
||||
|
||||
proc loadWords*(): Dictionary =
|
||||
var i: uint32
|
||||
for word in readFile("../data/dictionary.txt").strip().splitLines():
|
||||
result.words.add(word)
|
||||
result.offsets.add(i)
|
||||
i += word.len.uint32
|
||||
|
||||
|
||||
when isMainModule:
|
||||
echo "Loading words..."
|
||||
let dictionary = loadWords()
|
||||
echo "Packing dictionary..."
|
||||
let packed = dictionary.pack()
|
||||
discard open("../data/dictionary.pack", fmWrite).writeBytes(packed, 0, packed.len)
|
||||
echo "Dictionary packed."
|
@ -1,38 +1,12 @@
|
||||
import std/[os, strutils]
|
||||
import nimcrypto/sysrand
|
||||
import dictionary
|
||||
|
||||
|
||||
type Dictionary = object
|
||||
words: string
|
||||
offsets: seq[uint32]
|
||||
const packed = staticRead("../data/dictionary.pack")
|
||||
|
||||
|
||||
proc `[]`(d: Dictionary, i: Natural): string =
|
||||
# last word has no following start index, so we have to fake it
|
||||
# also strings are indexed with ints
|
||||
let slice = if i == d.offsets.high:
|
||||
d.offsets[i].int .. d.words.high
|
||||
else:
|
||||
d.offsets[i].int ..< d.offsets[i + 1].int
|
||||
|
||||
result = d.words[slice]
|
||||
|
||||
|
||||
proc len(d: Dictionary): int =
|
||||
result = d.offsets.len
|
||||
|
||||
|
||||
proc loadWords(): Dictionary =
|
||||
for word in staticRead("../data/dictionary.txt").strip().splitLines():
|
||||
let startIdx = result.words.len.uint32
|
||||
result.offsets.add(startIdx)
|
||||
result.words.add(word)
|
||||
|
||||
|
||||
const dict = loadWords()
|
||||
|
||||
|
||||
proc genPassphrase(length, dictSize: int): string =
|
||||
proc genPassphrase(dict: Dictionary, length, dictSize: int): string =
|
||||
var rands = newSeq[uint64](length)
|
||||
discard randomBytes(rands)
|
||||
|
||||
@ -48,7 +22,8 @@ const help = """Usage:
|
||||
|
||||
Defaults to length of 4 and dictionary size of 25,000."""
|
||||
|
||||
proc parseInput(): (int, int) =
|
||||
|
||||
proc parseInput(dictLen: int): (int, int) =
|
||||
let params = commandLineParams()
|
||||
if "-h" in params or "--help" in params:
|
||||
echo help
|
||||
@ -67,8 +42,8 @@ proc parseInput(): (int, int) =
|
||||
if params.len > 1:
|
||||
try:
|
||||
dictSize = parseInt(params[1])
|
||||
if dictSize < 100 or dictSize > dict.len:
|
||||
quit("Dictionary size must be between 100 and " & $dict.len, 1)
|
||||
if dictSize < 100 or dictSize > dictLen:
|
||||
quit("Dictionary size must be between 100 and " & $dictLen, 1)
|
||||
except ValueError:
|
||||
quit('"' & params[1] & "\" is not a valid dictionary size.", 1)
|
||||
|
||||
@ -76,5 +51,7 @@ proc parseInput(): (int, int) =
|
||||
|
||||
|
||||
when isMainModule:
|
||||
let (length, dictSize) = parseInput()
|
||||
echo genPassphrase(length, dictSize)
|
||||
let dict = packed.unpack()
|
||||
echo dict.len
|
||||
let (length, dictSize) = parseInput(dict.len)
|
||||
echo genPassphrase(dict, length, dictSize)
|
||||
|
Loading…
x
Reference in New Issue
Block a user