From d9b542823bf9d8b85ae91e6361775504f67c94d0 Mon Sep 17 00:00:00 2001 From: Joseph Montanaro Date: Mon, 26 Jul 2021 15:50:43 -0700 Subject: [PATCH] dynamic data path --- src/process.nim | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/process.nim b/src/process.nim index 5b51e0e..4b1bffb 100644 --- a/src/process.nim +++ b/src/process.nim @@ -1,8 +1,21 @@ import std/[os, cpuinfo, times, monotimes] -import std/[streams, strutils, tables] +import std/[streams, sequtils, strutils, tables] import scanxml +iterator ichunkate(items: seq, numChunks: Natural): (int, seq) = + var chunkStart = 0 + for i in 0 ..< numChunks: + let chunkLen = + if i < items.len mod numChunks: + (items.len div numChunks) + 1 + else: + items.len div numChunks + let chunkEnd = chunkStart + chunkLen + yield (i, items[chunkStart ..< chunkEnd]) + chunkStart = chunkEnd + + proc save(wordCounts: CountTable; dictName, countName: string) = let dictFile = openFileStream(dictName, fmWrite) let countFile = openFileStream(countName, fmWrite) @@ -32,7 +45,12 @@ proc processFiles(filenames: seq[string]) = when isMainModule: let start = getMonoTime() - let basePath = r"../BNC/2554/download/Texts/" + + let basePath = + if paramCount() > 0: + paramStr(1) + else: + r"../BNC/2554/download/Texts/" var paths: seq[string] for path in walkDirRec(basePath): @@ -41,17 +59,8 @@ when isMainModule: let numThreads = countProcessors() var threads = newSeq[Thread[seq[string]]](numThreads) - var lastIdx = 0 - for i, t in threads.mpairs: - var chunksize = paths.len div numThreads - if i < paths.len mod numThreads: - chunksize += 1 - - let newIdx = lastIdx + chunksize - let chunk = paths[lastIdx ..< newIdx] - lastIdx = newIdx - - createThread(t, processFiles, chunk) + for i, chunk in paths.ichunkate(numThreads): + createThread(threads[i], processFiles, chunk) var processed = 0 for i in 0 .. paths.high: