more refactoring

This commit is contained in:
Joseph Montanaro 2021-07-28 17:01:49 -07:00
parent bfd2868b87
commit 74795db477
7 changed files with 27 additions and 21 deletions

2
.gitignore vendored
View File

@ -1,2 +1,2 @@
*.exe *.exe
BNC/* data/BNC/*

View File

@ -16,7 +16,6 @@ requires "nimcrypto >= 0.4.8"
# Tasks n scripts # Tasks n scripts
import strutils
proc runCmd(command: string, input = "", cache = ""): string = proc runCmd(command: string, input = "", cache = ""): string =
let (output, exitCode) = gorgeEx(command, input, cache) let (output, exitCode) = gorgeEx(command, input, cache)
@ -29,10 +28,10 @@ proc runCmd(command: string, input = "", cache = ""): string =
task(dictionary, "Generate dictionary from BNC XML files"): task(dictionary, "Generate dictionary from BNC XML files"):
echo "Building dictionary" echo "Building dictionary"
let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim BNC/2554/download/Texts src/") let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim data/BNC/2554/download/Texts src/")
echo output.strip().splitlines()[^1] echo output.strip().splitlines()[^1]
before(build): before(build):
if not fileExists("src/dictionary.txt"): if not fileExists("data/dictionary.txt"):
dictionaryTask() dictionaryTask()

View File

@ -23,7 +23,7 @@ proc len(d: Dictionary): int =
proc loadWords(): Dictionary = proc loadWords(): Dictionary =
for word in staticRead("dictionary.txt").strip().splitLines(): for word in staticRead("../data/dictionary.txt").strip().splitLines():
let startIdx = result.words.len.uint32 let startIdx = result.words.len.uint32
result.offsets.add(startIdx) result.offsets.add(startIdx)
result.words.add(word) result.words.add(word)

1
src/passphrase.nims Normal file
View File

@ -0,0 +1 @@
--d: release

View File

@ -27,6 +27,24 @@ proc save(wordCounts: CountTable; dictName, countName: string) =
break break
type Config = object
srcPath: string
dstPath: string
proc parseInput(): Config =
result.srcPath = r"../data/BNC/2554/download/Texts/"
result.dstPath = "."
if paramCount() > 0:
result.srcPath = paramStr(1)
if paramCount() > 1:
result.dstPath = paramStr(2)
if not dirExists(result.srcPath):
quit("Could not locate datafiles: directory " & result.srcPath & " does not exist.")
var var
threadResults: Channel[CountTable[string]] threadResults: Channel[CountTable[string]]
progress: Channel[int] progress: Channel[int]
@ -46,17 +64,10 @@ proc processFiles(filenames: seq[string]) =
when isMainModule: when isMainModule:
let start = getMonoTime() let start = getMonoTime()
let basePath = let config = parseInput()
if paramCount() > 0:
paramStr(1)
else:
r"../BNC/2554/download/Texts/"
if not dirExists(basePath):
quit("Could not locate datafiles: directory " & basePath & " does not exist.")
var paths: seq[string] var paths: seq[string]
for path in walkDirRec(basePath): for path in walkDirRec(config.srcPath):
if path.endsWith(".xml"): if path.endsWith(".xml"):
paths.add(path) paths.add(path)
@ -79,15 +90,10 @@ when isMainModule:
counts.inc(word, count) counts.inc(word, count)
counts.sort() counts.sort()
let outPath =
if paramCount() > 1:
paramStr(2)
else:
"."
let let
dPath = joinPath(outPath, "dictionary.txt") dPath = joinPath(config.dstPath, "dictionary.txt")
cPath = joinPath(outPath, "counts.txt") cPath = joinPath(config.dstPath, "counts.txt")
save(counts, dPath, cPath) save(counts, dPath, cPath)
echo "Done. Finished in ", (getMonoTime() - start).inMilliseconds.float / 1000, " seconds." echo "Done. Finished in ", (getMonoTime() - start).inMilliseconds.float / 1000, " seconds."