9 Commits

Author SHA1 Message Date
e8d2f16c9e switch to fixed-width storage for word list
All checks were successful
continuous-integration/drone/tag Build is passing
2022-01-10 11:20:30 -08:00
183e2fe7ac store dictionary compressed 2022-01-10 10:31:05 -08:00
8657f3c13d add quotes to error messages
All checks were successful
continuous-integration/drone/tag Build is passing
2021-07-28 20:15:40 -07:00
93954add96 remove duplicate error message 2021-07-28 18:23:35 -07:00
5793a18358 fix drone config
All checks were successful
continuous-integration/drone/tag Build is passing
2021-07-28 18:17:21 -07:00
a7d531e4b7 restrict CI to tag events 2021-07-28 17:06:25 -07:00
b30b12962d v0.2.0
All checks were successful
continuous-integration/drone/push Build is passing
2021-07-28 17:03:29 -07:00
2a7de7dab2 add CI config
All checks were successful
continuous-integration/drone/push Build is passing
2021-07-28 17:02:09 -07:00
74795db477 more refactoring 2021-07-28 17:01:49 -07:00
9 changed files with 145 additions and 61 deletions

33
.drone.yml Normal file
View File

@ -0,0 +1,33 @@
kind: pipeline
type: docker
name: main
trigger:
event: tag
steps:
- name: build-windows
image: nimlang/nim
commands:
- apt update
- apt install -y mingw-w64
- nimble build -y -d:mingw
- name: build-linux
image: nimlang/nim
commands:
- nimble build -y
- mv passphrase passphrase_linux
- name: release
image: plugins/gitea-release
depends_on:
- build-windows
- build-linux
settings:
base_url: 'https://git.jfmonty2.com'
files:
- passphrase_linux
- passphrase.exe
api_key:
from_secret: gitea_token

4
.gitignore vendored
View File

@ -1,2 +1,4 @@
*.exe
BNC/*
data/BNC/*
data/dictionary*
!data/dictionary.txt

View File

@ -1,6 +1,6 @@
# Package
version = "0.1.0"
version = "0.3.1"
author = "Joseph Montanaro"
description = "Passphrase generator and dictionary builder"
license = "none"
@ -12,11 +12,11 @@ bin = @["passphrase"]
requires "nim >= 1.0"
requires "nimcrypto >= 0.4.8"
requires "zippy >= 0.7.3"
# Tasks n scripts
import strutils
proc runCmd(command: string, input = "", cache = ""): string =
let (output, exitCode) = gorgeEx(command, input, cache)
@ -29,10 +29,17 @@ proc runCmd(command: string, input = "", cache = ""): string =
task(dictionary, "Generate dictionary from BNC XML files"):
echo "Building dictionary"
let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim BNC/2554/download/Texts src/")
let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim data/BNC/2554/download/Texts src/")
echo output.strip().splitlines()[^1]
task(pack, "Pack dictionary into fixed-width file and compress"):
echo "Packing dictionary"
echo runCmd("nim c --run src/dictionary.nim data/dictionary.txt data/dictionary.pack")
before(build):
if not fileExists("src/dictionary.txt"):
if not fileExists("data/dictionary.txt"):
dictionaryTask()
if not fileExists("data/dictionary.pack"):
packTask()

62
src/dictionary.nim Normal file
View File

@ -0,0 +1,62 @@
import std/strutils
import zippy
type
Dictionary* = object
words: string
width: uint32
proc `[]`*(d: Dictionary, i: Natural): string =
let start = i.uint32 * d.width
d.words[start ..< start + d.width]
proc len*(d: Dictionary): int =
result = d.words.len div d.width.int
proc addU32(s: var string, i: uint32) =
for offset in 0..3:
let b = cast[char](i shr (offset * 8))
s.add(b)
proc getU32(s: string): uint32 =
for offset in 0..3:
result = result or (cast[uint32](s[offset]) shl (offset * 8))
proc pack*(d: Dictionary): string =
var data: string
data.addU32(d.width)
data.add(d.words)
data.compress(dataFormat = dfGzip)
proc unpack*(p: string): Dictionary =
let data = p.uncompress(dataFormat = dfGzip)
result.width = data.getU32()
result.words = data[4..^1]
proc loadWords*(path: string): Dictionary =
result.width = 25
for word in readFile(path).strip().splitLines():
if word.len > 25:
continue
result.words.add(word)
for _ in 0..<(25 - word.len):
result.words.add(' ')
when isMainModule:
import std/os
echo "Loading words..."
let dictionary = loadWords(paramStr(1))
echo "Packing dictionary..."
let packed = dictionary.pack()
writeFile(paramStr(2), packed)
echo "Dictionary packed."

View File

@ -1,48 +1,19 @@
import std/[os, strutils]
import nimcrypto/sysrand
import dictionary
type Dictionary = object
words: string
offsets: seq[uint32]
const packed = staticRead("../data/dictionary.pack")
proc `[]`(d: Dictionary, i: Natural): string =
# last word has no following start index, so we have to fake it
# also strings are indexed with ints
let slice = if i == d.offsets.high:
d.offsets[i].int .. d.words.high
else:
d.offsets[i].int ..< d.offsets[i + 1].int
result = d.words[slice]
proc len(d: Dictionary): int =
result = d.offsets.len
proc loadWords(): Dictionary =
for word in staticRead("dictionary.txt").strip().splitLines():
let startIdx = result.words.len.uint32
result.offsets.add(startIdx)
result.words.add(word)
const dict = loadWords()
proc genPassphrase(length, dictSize: int): string =
if dictSize < 100 or dictSize > dict.len:
quit("Dictionary size must be between 100 and " & $dict.len, 1)
proc genPassphrase(dict: Dictionary, length, dictSize: int): string =
var rands = newSeq[uint64](length)
discard randomBytes(rands)
var words: seq[string]
for r in rands:
let i = r mod dictSize.uint64
words.add(dict[i])
words.add(dict[i].strip())
result = words.join(" ")
@ -51,7 +22,8 @@ const help = """Usage:
Defaults to length of 4 and dictionary size of 25,000."""
proc parseInput(): (int, int) =
proc parseInput(dictLen: int): (int, int) =
let params = commandLineParams()
if "-h" in params or "--help" in params:
echo help
@ -65,19 +37,20 @@ proc parseInput(): (int, int) =
try:
length = parseInt(params[0])
except ValueError:
quit(params[0] & " is not a valid passphrase length.", 1)
quit('"' & params[0] & "\" is not a valid passphrase length.", 1)
if params.len > 1:
try:
dictSize = parseInt(params[1])
if dictSize < 100 or dictSize > dict.len:
quit("Dictionary size must be between 100 and " & $dict.len, 1)
if dictSize < 100 or dictSize > dictLen:
quit("Dictionary size must be between 100 and " & $dictLen, 1)
except ValueError:
quit(params[1] & " is not a valid dictionary size.", 1)
quit('"' & params[1] & "\" is not a valid dictionary size.", 1)
result = (length, dictSize)
when isMainModule:
let (length, dictSize) = parseInput()
echo genPassphrase(length, dictSize)
let dict = packed.unpack()
let (length, dictSize) = parseInput(dict.len)
echo genPassphrase(dict, length, dictSize)

1
src/passphrase.nims Normal file
View File

@ -0,0 +1 @@
--d: release

View File

@ -27,6 +27,24 @@ proc save(wordCounts: CountTable; dictName, countName: string) =
break
type Config = object
srcPath: string
dstPath: string
proc parseInput(): Config =
result.srcPath = r"../data/BNC/2554/download/Texts/"
result.dstPath = "."
if paramCount() > 0:
result.srcPath = paramStr(1)
if paramCount() > 1:
result.dstPath = paramStr(2)
if not dirExists(result.srcPath):
quit("Could not locate datafiles: directory " & result.srcPath & " does not exist.")
var
threadResults: Channel[CountTable[string]]
progress: Channel[int]
@ -46,17 +64,10 @@ proc processFiles(filenames: seq[string]) =
when isMainModule:
let start = getMonoTime()
let basePath =
if paramCount() > 0:
paramStr(1)
else:
r"../BNC/2554/download/Texts/"
if not dirExists(basePath):
quit("Could not locate datafiles: directory " & basePath & " does not exist.")
let config = parseInput()
var paths: seq[string]
for path in walkDirRec(basePath):
for path in walkDirRec(config.srcPath):
if path.endsWith(".xml"):
paths.add(path)
@ -79,15 +90,10 @@ when isMainModule:
counts.inc(word, count)
counts.sort()
let outPath =
if paramCount() > 1:
paramStr(2)
else:
"."
let
dPath = joinPath(outPath, "dictionary.txt")
cPath = joinPath(outPath, "counts.txt")
dPath = joinPath(config.dstPath, "dictionary.txt")
cPath = joinPath(config.dstPath, "counts.txt")
save(counts, dPath, cPath)
echo "Done. Finished in ", (getMonoTime() - start).inMilliseconds.float / 1000, " seconds."