Bladeren bron

properly terminate created subprocesses

This fixes a memory / ressource leak that manifests when computing
stats over big sets of repositories. It was eating more than 8G of
memory for ~15 git repositories.

Signed-off-by: Heikki Hokkanen <hoxu@users.sf.net>
Sylvain Joyeux 11 jaren geleden
bovenliggende
commit
e56e7b6f91
1 gewijzigde bestanden met toevoegingen van 14 en 6 verwijderingen
  1. 14
    6
      gitstats

+ 14
- 6
gitstats Bestand weergeven

@@ -56,12 +56,14 @@ def getpipeoutput(cmds, quiet = False):
56 56
 	if not quiet and ON_LINUX and os.isatty(1):
57 57
 		print '>> ' + ' | '.join(cmds),
58 58
 		sys.stdout.flush()
59
-	p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
60
-	p = p0
59
+	p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
60
+	processes=[p]
61 61
 	for x in cmds[1:]:
62
-		p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
63
-		p0 = p
62
+		p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True)
63
+		processes.append(p)
64 64
 	output = p.communicate()[0]
65
+	for p in processes:
66
+		p.wait()
65 67
 	end = time.time()
66 68
 	if not quiet:
67 69
 		if ON_LINUX and os.isatty(1):
@@ -449,7 +451,10 @@ class GitDataCollector(DataCollector):
449 451
 				revs_to_read.append((time,rev))
450 452
 
451 453
 		#Read revisions from repo
452
-		time_rev_count = Pool(processes=conf['processes']).map(getnumoffilesfromrev, revs_to_read)
454
+		pool = Pool(processes=conf['processes'])
455
+		time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
456
+		pool.terminate()
457
+		pool.join()
453 458
 
454 459
 		#Update cache with new revisions and append then to general list
455 460
 		for (time, rev, count) in time_rev_count:
@@ -507,7 +512,10 @@ class GitDataCollector(DataCollector):
507 512
 				blobs_to_read.append((ext,blob_id))
508 513
 
509 514
 		#Get info abount line count for new blob's that wasn't found in cache
510
-		ext_blob_linecount = Pool(processes=conf['processes']).map(getnumoflinesinblob, blobs_to_read)
515
+		pool = Pool(processes=conf['processes'])
516
+		ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
517
+		pool.terminate()
518
+		pool.join()
511 519
 
512 520
 		#Update cache and write down info about number of number of lines
513 521
 		for (ext, blob_id, linecount) in ext_blob_linecount: