more refactoring
This commit is contained in:
parent
bfd2868b87
commit
74795db477
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,2 @@
|
|||||||
*.exe
|
*.exe
|
||||||
BNC/*
|
data/BNC/*
|
@ -16,7 +16,6 @@ requires "nimcrypto >= 0.4.8"
|
|||||||
|
|
||||||
# Tasks n scripts
|
# Tasks n scripts
|
||||||
|
|
||||||
import strutils
|
|
||||||
|
|
||||||
proc runCmd(command: string, input = "", cache = ""): string =
|
proc runCmd(command: string, input = "", cache = ""): string =
|
||||||
let (output, exitCode) = gorgeEx(command, input, cache)
|
let (output, exitCode) = gorgeEx(command, input, cache)
|
||||||
@ -29,10 +28,10 @@ proc runCmd(command: string, input = "", cache = ""): string =
|
|||||||
|
|
||||||
task(dictionary, "Generate dictionary from BNC XML files"):
|
task(dictionary, "Generate dictionary from BNC XML files"):
|
||||||
echo "Building dictionary"
|
echo "Building dictionary"
|
||||||
let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim BNC/2554/download/Texts src/")
|
let output = runCmd("nim c --run --threads:on -d:release -d:lto src/process.nim data/BNC/2554/download/Texts src/")
|
||||||
echo output.strip().splitlines()[^1]
|
echo output.strip().splitlines()[^1]
|
||||||
|
|
||||||
|
|
||||||
before(build):
|
before(build):
|
||||||
if not fileExists("src/dictionary.txt"):
|
if not fileExists("data/dictionary.txt"):
|
||||||
dictionaryTask()
|
dictionaryTask()
|
||||||
|
@ -23,7 +23,7 @@ proc len(d: Dictionary): int =
|
|||||||
|
|
||||||
|
|
||||||
proc loadWords(): Dictionary =
|
proc loadWords(): Dictionary =
|
||||||
for word in staticRead("dictionary.txt").strip().splitLines():
|
for word in staticRead("../data/dictionary.txt").strip().splitLines():
|
||||||
let startIdx = result.words.len.uint32
|
let startIdx = result.words.len.uint32
|
||||||
result.offsets.add(startIdx)
|
result.offsets.add(startIdx)
|
||||||
result.words.add(word)
|
result.words.add(word)
|
||||||
|
1
src/passphrase.nims
Normal file
1
src/passphrase.nims
Normal file
@ -0,0 +1 @@
|
|||||||
|
--d: release
|
@ -27,6 +27,24 @@ proc save(wordCounts: CountTable; dictName, countName: string) =
|
|||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
type Config = object
|
||||||
|
srcPath: string
|
||||||
|
dstPath: string
|
||||||
|
|
||||||
|
|
||||||
|
proc parseInput(): Config =
|
||||||
|
result.srcPath = r"../data/BNC/2554/download/Texts/"
|
||||||
|
result.dstPath = "."
|
||||||
|
|
||||||
|
if paramCount() > 0:
|
||||||
|
result.srcPath = paramStr(1)
|
||||||
|
if paramCount() > 1:
|
||||||
|
result.dstPath = paramStr(2)
|
||||||
|
|
||||||
|
if not dirExists(result.srcPath):
|
||||||
|
quit("Could not locate datafiles: directory " & result.srcPath & " does not exist.")
|
||||||
|
|
||||||
|
|
||||||
var
|
var
|
||||||
threadResults: Channel[CountTable[string]]
|
threadResults: Channel[CountTable[string]]
|
||||||
progress: Channel[int]
|
progress: Channel[int]
|
||||||
@ -46,17 +64,10 @@ proc processFiles(filenames: seq[string]) =
|
|||||||
when isMainModule:
|
when isMainModule:
|
||||||
let start = getMonoTime()
|
let start = getMonoTime()
|
||||||
|
|
||||||
let basePath =
|
let config = parseInput()
|
||||||
if paramCount() > 0:
|
|
||||||
paramStr(1)
|
|
||||||
else:
|
|
||||||
r"../BNC/2554/download/Texts/"
|
|
||||||
|
|
||||||
if not dirExists(basePath):
|
|
||||||
quit("Could not locate datafiles: directory " & basePath & " does not exist.")
|
|
||||||
|
|
||||||
var paths: seq[string]
|
var paths: seq[string]
|
||||||
for path in walkDirRec(basePath):
|
for path in walkDirRec(config.srcPath):
|
||||||
if path.endsWith(".xml"):
|
if path.endsWith(".xml"):
|
||||||
paths.add(path)
|
paths.add(path)
|
||||||
|
|
||||||
@ -79,15 +90,10 @@ when isMainModule:
|
|||||||
counts.inc(word, count)
|
counts.inc(word, count)
|
||||||
|
|
||||||
counts.sort()
|
counts.sort()
|
||||||
let outPath =
|
|
||||||
if paramCount() > 1:
|
|
||||||
paramStr(2)
|
|
||||||
else:
|
|
||||||
"."
|
|
||||||
|
|
||||||
let
|
let
|
||||||
dPath = joinPath(outPath, "dictionary.txt")
|
dPath = joinPath(config.dstPath, "dictionary.txt")
|
||||||
cPath = joinPath(outPath, "counts.txt")
|
cPath = joinPath(config.dstPath, "counts.txt")
|
||||||
save(counts, dPath, cPath)
|
save(counts, dPath, cPath)
|
||||||
|
|
||||||
echo "Done. Finished in ", (getMonoTime() - start).inMilliseconds.float / 1000, " seconds."
|
echo "Done. Finished in ", (getMonoTime() - start).inMilliseconds.float / 1000, " seconds."
|
||||||
|
Loading…
x
Reference in New Issue
Block a user