dynamic data path

This commit is contained in:
Joseph Montanaro 2021-07-26 15:50:43 -07:00
parent b388552a27
commit d9b542823b

View File

@ -1,8 +1,21 @@
import std/[os, cpuinfo, times, monotimes]
import std/[streams, strutils, tables]
import std/[streams, sequtils, strutils, tables]
import scanxml
iterator ichunkate(items: seq, numChunks: Natural): (int, seq) =
var chunkStart = 0
for i in 0 ..< numChunks:
let chunkLen =
if i < items.len mod numChunks:
(items.len div numChunks) + 1
else:
items.len div numChunks
let chunkEnd = chunkStart + chunkLen
yield (i, items[chunkStart ..< chunkEnd])
chunkStart = chunkEnd
proc save(wordCounts: CountTable; dictName, countName: string) =
let dictFile = openFileStream(dictName, fmWrite)
let countFile = openFileStream(countName, fmWrite)
@ -32,7 +45,12 @@ proc processFiles(filenames: seq[string]) =
when isMainModule:
let start = getMonoTime()
let basePath = r"../BNC/2554/download/Texts/"
let basePath =
if paramCount() > 0:
paramStr(1)
else:
r"../BNC/2554/download/Texts/"
var paths: seq[string]
for path in walkDirRec(basePath):
@ -41,17 +59,8 @@ when isMainModule:
let numThreads = countProcessors()
var threads = newSeq[Thread[seq[string]]](numThreads)
var lastIdx = 0
for i, t in threads.mpairs:
var chunksize = paths.len div numThreads
if i < paths.len mod numThreads:
chunksize += 1
let newIdx = lastIdx + chunksize
let chunk = paths[lastIdx ..< newIdx]
lastIdx = newIdx
createThread(t, processFiles, chunk)
for i, chunk in paths.ichunkate(numThreads):
createThread(threads[i], processFiles, chunk)
var processed = 0
for i in 0 .. paths.high: