diff --git a/src/counts.txt b/data/counts.txt similarity index 100% rename from src/counts.txt rename to data/counts.txt diff --git a/src/dictionary.txt b/data/dictionary.txt similarity index 100% rename from src/dictionary.txt rename to data/dictionary.txt diff --git a/passphrase.nimble b/passphrase.nimble index 76fbfad..5df45eb 100644 --- a/passphrase.nimble +++ b/passphrase.nimble @@ -16,7 +16,6 @@ requires "nimcrypto >= 0.4.8" # Tasks n scripts -import strutils proc runCmd(command: string, input = "", cache = ""): string = let (output, exitCode) = gorgeEx(command, input, cache) @@ -29,10 +28,10 @@ proc runCmd(command: string, input = "", cache = ""): string = task(dictionary, "Generate dictionary from BNC XML files"): echo "Building dictionary" - let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim BNC/2554/download/Texts src/") + let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim data/BNC/2554/download/Texts src/") echo output.strip().splitlines()[^1] before(build): - if not fileExists("src/dictionary.txt"): + if not fileExists("data/dictionary.txt"): dictionaryTask() diff --git a/src/passphrase.nim b/src/passphrase.nim index d7d2de9..369f9a7 100644 --- a/src/passphrase.nim +++ b/src/passphrase.nim @@ -23,7 +23,7 @@ proc len(d: Dictionary): int = proc loadWords(): Dictionary = - for word in staticRead("dictionary.txt").strip().splitLines(): + for word in staticRead("../data/dictionary.txt").strip().splitLines(): let startIdx = result.words.len.uint32 result.offsets.add(startIdx) result.words.add(word) diff --git a/src/passphrase.nims b/src/passphrase.nims new file mode 100644 index 0000000..99f2a9a --- /dev/null +++ b/src/passphrase.nims @@ -0,0 +1 @@ +--d: release \ No newline at end of file diff --git a/src/process.nim b/src/process.nim index c4badb7..ec11254 100644 --- a/src/process.nim +++ b/src/process.nim @@ -27,6 +27,24 @@ proc save(wordCounts: CountTable; dictName, countName: string) = break +type Config = object + srcPath: string, + dstPath: string, + + +proc parseInput(): Config = + result.srcPath = r"../data/BNC/2554/download/Texts/" + result.dstPath = "." + + if paramCount() > 0: + result.srcPath = paramStr(1) + if paramCount() > 1: + result.dstPath = paramStr(2) + + if not dirExists(basePath): + quit("Could not locate datafiles: directory " & basePath & " does not exist.") + + var threadResults: Channel[CountTable[string]] progress: Channel[int] @@ -46,17 +64,10 @@ proc processFiles(filenames: seq[string]) = when isMainModule: let start = getMonoTime() - let basePath = - if paramCount() > 0: - paramStr(1) - else: - r"../BNC/2554/download/Texts/" - - if not dirExists(basePath): - quit("Could not locate datafiles: directory " & basePath & " does not exist.") + let config = parseInput() var paths: seq[string] - for path in walkDirRec(basePath): + for path in walkDirRec(config.srcPath): if path.endsWith(".xml"): paths.add(path) @@ -79,15 +90,10 @@ when isMainModule: counts.inc(word, count) counts.sort() - let outPath = - if paramCount() > 1: - paramStr(2) - else: - "." let - dPath = joinPath(outPath, "dictionary.txt") - cPath = joinPath(outPath, "counts.txt") + dPath = joinPath(config.dstPath, "dictionary.txt") + cPath = joinPath(config.dstPath, "counts.txt") save(counts, dPath, cPath) echo "Done. Finished in ", (getMonoTime() - start).inMilliseconds.float / 1000, " seconds."