From 74795db477c7d5d03faeda159cdc58356f8f53fa Mon Sep 17 00:00:00 2001 From: Joseph Montanaro Date: Wed, 28 Jul 2021 17:01:49 -0700 Subject: [PATCH] more refactoring --- .gitignore | 2 +- {src => data}/counts.txt | 0 {src => data}/dictionary.txt | 0 passphrase.nimble | 5 ++--- src/passphrase.nim | 2 +- src/passphrase.nims | 1 + src/process.nim | 38 +++++++++++++++++++++--------------- 7 files changed, 27 insertions(+), 21 deletions(-) rename {src => data}/counts.txt (100%) rename {src => data}/dictionary.txt (100%) create mode 100644 src/passphrase.nims diff --git a/.gitignore b/.gitignore index b0b6a2a..e222c89 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ *.exe -BNC/* \ No newline at end of file +data/BNC/* \ No newline at end of file diff --git a/src/counts.txt b/data/counts.txt similarity index 100% rename from src/counts.txt rename to data/counts.txt diff --git a/src/dictionary.txt b/data/dictionary.txt similarity index 100% rename from src/dictionary.txt rename to data/dictionary.txt diff --git a/passphrase.nimble b/passphrase.nimble index 76fbfad..5df45eb 100644 --- a/passphrase.nimble +++ b/passphrase.nimble @@ -16,7 +16,6 @@ requires "nimcrypto >= 0.4.8" # Tasks n scripts -import strutils proc runCmd(command: string, input = "", cache = ""): string = let (output, exitCode) = gorgeEx(command, input, cache) @@ -29,10 +28,10 @@ proc runCmd(command: string, input = "", cache = ""): string = task(dictionary, "Generate dictionary from BNC XML files"): echo "Building dictionary" - let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim BNC/2554/download/Texts src/") + let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim data/BNC/2554/download/Texts src/") echo output.strip().splitlines()[^1] before(build): - if not fileExists("src/dictionary.txt"): + if not fileExists("data/dictionary.txt"): dictionaryTask() diff --git a/src/passphrase.nim b/src/passphrase.nim index d7d2de9..369f9a7 100644 --- a/src/passphrase.nim +++ b/src/passphrase.nim @@ -23,7 +23,7 @@ proc len(d: Dictionary): int = proc loadWords(): Dictionary = - for word in staticRead("dictionary.txt").strip().splitLines(): + for word in staticRead("../data/dictionary.txt").strip().splitLines(): let startIdx = result.words.len.uint32 result.offsets.add(startIdx) result.words.add(word) diff --git a/src/passphrase.nims b/src/passphrase.nims new file mode 100644 index 0000000..99f2a9a --- /dev/null +++ b/src/passphrase.nims @@ -0,0 +1 @@ +--d: release \ No newline at end of file diff --git a/src/process.nim b/src/process.nim index c4badb7..a638cfd 100644 --- a/src/process.nim +++ b/src/process.nim @@ -27,6 +27,24 @@ proc save(wordCounts: CountTable; dictName, countName: string) = break +type Config = object + srcPath: string + dstPath: string + + +proc parseInput(): Config = + result.srcPath = r"../data/BNC/2554/download/Texts/" + result.dstPath = "." + + if paramCount() > 0: + result.srcPath = paramStr(1) + if paramCount() > 1: + result.dstPath = paramStr(2) + + if not dirExists(result.srcPath): + quit("Could not locate datafiles: directory " & result.srcPath & " does not exist.") + + var threadResults: Channel[CountTable[string]] progress: Channel[int] @@ -46,17 +64,10 @@ proc processFiles(filenames: seq[string]) = when isMainModule: let start = getMonoTime() - let basePath = - if paramCount() > 0: - paramStr(1) - else: - r"../BNC/2554/download/Texts/" - - if not dirExists(basePath): - quit("Could not locate datafiles: directory " & basePath & " does not exist.") + let config = parseInput() var paths: seq[string] - for path in walkDirRec(basePath): + for path in walkDirRec(config.srcPath): if path.endsWith(".xml"): paths.add(path) @@ -79,15 +90,10 @@ when isMainModule: counts.inc(word, count) counts.sort() - let outPath = - if paramCount() > 1: - paramStr(2) - else: - "." let - dPath = joinPath(outPath, "dictionary.txt") - cPath = joinPath(outPath, "counts.txt") + dPath = joinPath(config.dstPath, "dictionary.txt") + cPath = joinPath(config.dstPath, "counts.txt") save(counts, dPath, cPath) echo "Done. Finished in ", (getMonoTime() - start).inMilliseconds.float / 1000, " seconds."