gitstats 48KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2014 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/AUTHOR)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import getopt
  6. import glob
  7. import os
  8. import pickle
  9. import platform
  10. import re
  11. import shutil
  12. import subprocess
  13. import sys
  14. import time
  15. import zlib
  16. if sys.version_info < (2, 6):
  17. print >> sys.stderr, "Python 2.6 or higher is required for gitstats"
  18. sys.exit(1)
  19. from multiprocessing import Pool
  20. os.environ['LC_ALL'] = 'C'
  21. GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
  22. ON_LINUX = (platform.system() == 'Linux')
  23. WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
  24. exectime_internal = 0.0
  25. exectime_external = 0.0
  26. time_start = time.time()
  27. # By default, gnuplot is searched from path, but can be overridden with the
  28. # environment variable "GNUPLOT"
  29. gnuplot_cmd = 'gnuplot'
  30. if 'GNUPLOT' in os.environ:
  31. gnuplot_cmd = os.environ['GNUPLOT']
  32. conf = {
  33. 'max_domains': 10,
  34. 'max_ext_length': 10,
  35. 'style': 'gitstats.css',
  36. 'max_authors': 20,
  37. 'authors_top': 5,
  38. 'commit_begin': '',
  39. 'commit_end': 'HEAD',
  40. 'linear_linestats': 1,
  41. 'project_name': '',
  42. 'merge_authors': {},
  43. 'processes': 8,
  44. 'start_date': ''
  45. }
  46. def getpipeoutput(cmds, quiet = False):
  47. global exectime_external
  48. start = time.time()
  49. if not quiet and ON_LINUX and os.isatty(1):
  50. print '>> ' + ' | '.join(cmds),
  51. sys.stdout.flush()
  52. p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  53. processes=[p]
  54. for x in cmds[1:]:
  55. p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True)
  56. processes.append(p)
  57. output = p.communicate()[0]
  58. for p in processes:
  59. p.wait()
  60. end = time.time()
  61. if not quiet:
  62. if ON_LINUX and os.isatty(1):
  63. print '\r',
  64. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  65. exectime_external += (end - start)
  66. return output.rstrip('\n')
  67. def getlogrange(defaultrange = 'HEAD', end_only = True):
  68. commit_range = getcommitrange(defaultrange, end_only)
  69. if len(conf['start_date']) > 0:
  70. return '--since=%s %s' % (conf['start_date'], commit_range)
  71. return commit_range
  72. def getcommitrange(defaultrange = 'HEAD', end_only = False):
  73. if len(conf['commit_end']) > 0:
  74. if end_only or len(conf['commit_begin']) == 0:
  75. return conf['commit_end']
  76. return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
  77. return defaultrange
  78. def getkeyssortedbyvalues(dict):
  79. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  80. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  81. def getkeyssortedbyvaluekey(d, key):
  82. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  83. def getstatsummarycounts(line):
  84. numbers = re.findall('\d+', line)
  85. if len(numbers) == 1:
  86. # neither insertions nor deletions: may probably only happen for "0 files changed"
  87. numbers.append(0);
  88. numbers.append(0);
  89. elif len(numbers) == 2 and line.find('(+)') != -1:
  90. numbers.append(0); # only insertions were printed on line
  91. elif len(numbers) == 2 and line.find('(-)') != -1:
  92. numbers.insert(1, 0); # only deletions were printed on line
  93. return numbers
  94. VERSION = 0
  95. def getversion():
  96. global VERSION
  97. if VERSION == 0:
  98. gitstats_repo = os.path.dirname(os.path.abspath(__file__))
  99. VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" %
  100. (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])])
  101. return VERSION
  102. def getgitversion():
  103. return getpipeoutput(['git --version']).split('\n')[0]
  104. def getgnuplotversion():
  105. return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
  106. def getnumoffilesfromrev(time_rev):
  107. """
  108. Get number of files changed in commit
  109. """
  110. time, rev = time_rev
  111. return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))
  112. def getnumoflinesinblob(ext_blob):
  113. """
  114. Get number of lines in blob
  115. """
  116. ext, blob_id = ext_blob
  117. return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0]))
  118. class DataCollector:
  119. """Manages data collection from a revision control repository."""
  120. def __init__(self):
  121. self.stamp_created = time.time()
  122. self.cache = {}
  123. self.total_authors = 0
  124. self.activity_by_hour_of_day = {} # hour -> commits
  125. self.activity_by_day_of_week = {} # day -> commits
  126. self.activity_by_month_of_year = {} # month [1-12] -> commits
  127. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  128. self.activity_by_hour_of_day_busiest = 0
  129. self.activity_by_hour_of_week_busiest = 0
  130. self.activity_by_year_week = {} # yy_wNN -> commits
  131. self.activity_by_year_week_peak = 0
  132. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
  133. self.total_commits = 0
  134. self.total_files = 0
  135. self.authors_by_commits = 0
  136. # domains
  137. self.domains = {} # domain -> commits
  138. # author of the month
  139. self.author_of_month = {} # month -> author -> commits
  140. self.author_of_year = {} # year -> author -> commits
  141. self.commits_by_month = {} # month -> commits
  142. self.commits_by_year = {} # year -> commits
  143. self.lines_added_by_month = {} # month -> lines added
  144. self.lines_added_by_year = {} # year -> lines added
  145. self.lines_removed_by_month = {} # month -> lines removed
  146. self.lines_removed_by_year = {} # year -> lines removed
  147. self.first_commit_stamp = 0
  148. self.last_commit_stamp = 0
  149. self.last_active_day = None
  150. self.active_days = set()
  151. # lines
  152. self.total_lines = 0
  153. self.total_lines_added = 0
  154. self.total_lines_removed = 0
  155. # size
  156. self.total_size = 0
  157. # timezone
  158. self.commits_by_timezone = {} # timezone -> commits
  159. # tags
  160. self.tags = {}
  161. self.files_by_stamp = {} # stamp -> files
  162. # extensions
  163. self.extensions = {} # extension -> files, lines
  164. # line statistics
  165. self.changes_by_date = {} # stamp -> { files, ins, del }
  166. ##
  167. # This should be the main function to extract data from the repository.
  168. def collect(self, dir):
  169. self.dir = dir
  170. if len(conf['project_name']) == 0:
  171. self.projectname = os.path.basename(os.path.abspath(dir))
  172. else:
  173. self.projectname = conf['project_name']
  174. ##
  175. # Load cacheable data
  176. def loadCache(self, cachefile):
  177. if not os.path.exists(cachefile):
  178. return
  179. print 'Loading cache...'
  180. f = open(cachefile, 'rb')
  181. try:
  182. self.cache = pickle.loads(zlib.decompress(f.read()))
  183. except:
  184. # temporary hack to upgrade non-compressed caches
  185. f.seek(0)
  186. self.cache = pickle.load(f)
  187. f.close()
  188. ##
  189. # Produce any additional statistics from the extracted data.
  190. def refine(self):
  191. pass
  192. ##
  193. # : get a dictionary of author
  194. def getAuthorInfo(self, author):
  195. return None
  196. def getActivityByDayOfWeek(self):
  197. return {}
  198. def getActivityByHourOfDay(self):
  199. return {}
  200. # : get a dictionary of domains
  201. def getDomainInfo(self, domain):
  202. return None
  203. ##
  204. # Get a list of authors
  205. def getAuthors(self):
  206. return []
  207. def getFirstCommitDate(self):
  208. return datetime.datetime.now()
  209. def getLastCommitDate(self):
  210. return datetime.datetime.now()
  211. def getStampCreated(self):
  212. return self.stamp_created
  213. def getTags(self):
  214. return []
  215. def getTotalAuthors(self):
  216. return -1
  217. def getTotalCommits(self):
  218. return -1
  219. def getTotalFiles(self):
  220. return -1
  221. def getTotalLOC(self):
  222. return -1
  223. ##
  224. # Save cacheable data
  225. def saveCache(self, cachefile):
  226. print 'Saving cache...'
  227. tempfile = cachefile + '.tmp'
  228. f = open(tempfile, 'wb')
  229. #pickle.dump(self.cache, f)
  230. data = zlib.compress(pickle.dumps(self.cache))
  231. f.write(data)
  232. f.close()
  233. try:
  234. os.remove(cachefile)
  235. except OSError:
  236. pass
  237. os.rename(tempfile, cachefile)
  238. class GitDataCollector(DataCollector):
  239. def collect(self, dir):
  240. DataCollector.collect(self, dir)
  241. self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
  242. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  243. # tags
  244. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  245. for line in lines:
  246. if len(line) == 0:
  247. continue
  248. (hash, tag) = line.split(' ')
  249. tag = tag.replace('refs/tags/', '')
  250. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
  251. if len(output) > 0:
  252. parts = output.split(' ')
  253. stamp = 0
  254. try:
  255. stamp = int(parts[0])
  256. except ValueError:
  257. stamp = 0
  258. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  259. # collect info on tags, starting from latest
  260. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items()))))
  261. prev = None
  262. for tag in reversed(tags_sorted_by_date_desc):
  263. cmd = 'git shortlog -s "%s"' % tag
  264. if prev != None:
  265. cmd += ' "^%s"' % prev
  266. output = getpipeoutput([cmd])
  267. if len(output) == 0:
  268. continue
  269. prev = tag
  270. for line in output.split('\n'):
  271. parts = re.split('\s+', line, 2)
  272. commits = int(parts[1])
  273. author = parts[2]
  274. if author in conf['merge_authors']:
  275. author = conf['merge_authors'][author]
  276. self.tags[tag]['commits'] += commits
  277. self.tags[tag]['authors'][author] = commits
  278. # Collect revision statistics
  279. # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
  280. lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
  281. for line in lines:
  282. parts = line.split(' ', 4)
  283. author = ''
  284. try:
  285. stamp = int(parts[0])
  286. except ValueError:
  287. stamp = 0
  288. timezone = parts[3]
  289. author, mail = parts[4].split('<', 1)
  290. author = author.rstrip()
  291. if author in conf['merge_authors']:
  292. author = conf['merge_authors'][author]
  293. mail = mail.rstrip('>')
  294. domain = '?'
  295. if mail.find('@') != -1:
  296. domain = mail.rsplit('@', 1)[1]
  297. date = datetime.datetime.fromtimestamp(float(stamp))
  298. # First and last commit stamp (may be in any order because of cherry-picking and patches)
  299. if stamp > self.last_commit_stamp:
  300. self.last_commit_stamp = stamp
  301. if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
  302. self.first_commit_stamp = stamp
  303. # activity
  304. # hour
  305. hour = date.hour
  306. self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
  307. # most active hour?
  308. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  309. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  310. # day of week
  311. day = date.weekday()
  312. self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1
  313. # domain stats
  314. if domain not in self.domains:
  315. self.domains[domain] = {}
  316. # commits
  317. self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1
  318. # hour of week
  319. if day not in self.activity_by_hour_of_week:
  320. self.activity_by_hour_of_week[day] = {}
  321. self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
  322. # most active hour?
  323. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  324. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  325. # month of year
  326. month = date.month
  327. self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1
  328. # yearly/weekly activity
  329. yyw = date.strftime('%Y-%W')
  330. self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
  331. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  332. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  333. # author stats
  334. if author not in self.authors:
  335. self.authors[author] = {}
  336. # commits, note again that commits may be in any date order because of cherry-picking and patches
  337. if 'last_commit_stamp' not in self.authors[author]:
  338. self.authors[author]['last_commit_stamp'] = stamp
  339. if stamp > self.authors[author]['last_commit_stamp']:
  340. self.authors[author]['last_commit_stamp'] = stamp
  341. if 'first_commit_stamp' not in self.authors[author]:
  342. self.authors[author]['first_commit_stamp'] = stamp
  343. if stamp < self.authors[author]['first_commit_stamp']:
  344. self.authors[author]['first_commit_stamp'] = stamp
  345. # author of the month/year
  346. yymm = date.strftime('%Y-%m')
  347. if yymm in self.author_of_month:
  348. self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
  349. else:
  350. self.author_of_month[yymm] = {}
  351. self.author_of_month[yymm][author] = 1
  352. self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1
  353. yy = date.year
  354. if yy in self.author_of_year:
  355. self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
  356. else:
  357. self.author_of_year[yy] = {}
  358. self.author_of_year[yy][author] = 1
  359. self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1
  360. # authors: active days
  361. yymmdd = date.strftime('%Y-%m-%d')
  362. if 'last_active_day' not in self.authors[author]:
  363. self.authors[author]['last_active_day'] = yymmdd
  364. self.authors[author]['active_days'] = set([yymmdd])
  365. elif yymmdd != self.authors[author]['last_active_day']:
  366. self.authors[author]['last_active_day'] = yymmdd
  367. self.authors[author]['active_days'].add(yymmdd)
  368. # project: active days
  369. if yymmdd != self.last_active_day:
  370. self.last_active_day = yymmdd
  371. self.active_days.add(yymmdd)
  372. # timezone
  373. self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
  374. # outputs "<stamp> <files>" for each revision
  375. revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
  376. lines = []
  377. revs_to_read = []
  378. time_rev_count = []
  379. #Look up rev in cache and take info from cache if found
  380. #If not append rev to list of rev to read from repo
  381. for revline in revlines:
  382. time, rev = revline.split(' ')
  383. #if cache empty then add time and rev to list of new rev's
  384. #otherwise try to read needed info from cache
  385. if 'files_in_tree' not in self.cache.keys():
  386. revs_to_read.append((time,rev))
  387. continue
  388. if rev in self.cache['files_in_tree'].keys():
  389. lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
  390. else:
  391. revs_to_read.append((time,rev))
  392. #Read revisions from repo
  393. pool = Pool(processes=conf['processes'])
  394. time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
  395. pool.terminate()
  396. pool.join()
  397. #Update cache with new revisions and append then to general list
  398. for (time, rev, count) in time_rev_count:
  399. if 'files_in_tree' not in self.cache:
  400. self.cache['files_in_tree'] = {}
  401. self.cache['files_in_tree'][rev] = count
  402. lines.append('%d %d' % (int(time), count))
  403. self.total_commits += len(lines)
  404. for line in lines:
  405. parts = line.split(' ')
  406. if len(parts) != 2:
  407. continue
  408. (stamp, files) = parts[0:2]
  409. try:
  410. self.files_by_stamp[int(stamp)] = int(files)
  411. except ValueError:
  412. print 'Warning: failed to parse line "%s"' % line
  413. # extensions and size of files
  414. lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
  415. blobs_to_read = []
  416. for line in lines:
  417. if len(line) == 0:
  418. continue
  419. parts = re.split('\s+', line, 5)
  420. if parts[0] == '160000' and parts[3] == '-':
  421. # skip submodules
  422. continue
  423. blob_id = parts[2]
  424. size = int(parts[3])
  425. fullpath = parts[4]
  426. self.total_size += size
  427. self.total_files += 1
  428. filename = fullpath.split('/')[-1] # strip directories
  429. if filename.find('.') == -1 or filename.rfind('.') == 0:
  430. ext = ''
  431. else:
  432. ext = filename[(filename.rfind('.') + 1):]
  433. if len(ext) > conf['max_ext_length']:
  434. ext = ''
  435. if ext not in self.extensions:
  436. self.extensions[ext] = {'files': 0, 'lines': 0}
  437. self.extensions[ext]['files'] += 1
  438. #if cache empty then add ext and blob id to list of new blob's
  439. #otherwise try to read needed info from cache
  440. if 'lines_in_blob' not in self.cache.keys():
  441. blobs_to_read.append((ext,blob_id))
  442. continue
  443. if blob_id in self.cache['lines_in_blob'].keys():
  444. self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
  445. else:
  446. blobs_to_read.append((ext,blob_id))
  447. #Get info abount line count for new blob's that wasn't found in cache
  448. pool = Pool(processes=conf['processes'])
  449. ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
  450. pool.terminate()
  451. pool.join()
  452. #Update cache and write down info about number of number of lines
  453. for (ext, blob_id, linecount) in ext_blob_linecount:
  454. if 'lines_in_blob' not in self.cache:
  455. self.cache['lines_in_blob'] = {}
  456. self.cache['lines_in_blob'][blob_id] = linecount
  457. self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
  458. # line statistics
  459. # outputs:
  460. # N files changed, N insertions (+), N deletions(-)
  461. # <stamp> <author>
  462. self.changes_by_date = {} # stamp -> { files, ins, del }
  463. # computation of lines of code by date is better done
  464. # on a linear history.
  465. extra = ''
  466. if conf['linear_linestats']:
  467. extra = '--first-parent -m'
  468. lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
  469. lines.reverse()
  470. files = 0; inserted = 0; deleted = 0; total_lines = 0
  471. author = None
  472. for line in lines:
  473. if len(line) == 0:
  474. continue
  475. # <stamp> <author>
  476. if re.search('files? changed', line) == None:
  477. pos = line.find(' ')
  478. if pos != -1:
  479. try:
  480. (stamp, author) = (int(line[:pos]), line[pos+1:])
  481. if author in conf['merge_authors']:
  482. author = conf['merge_authors'][author]
  483. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  484. date = datetime.datetime.fromtimestamp(stamp)
  485. yymm = date.strftime('%Y-%m')
  486. self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
  487. self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
  488. yy = date.year
  489. self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
  490. self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
  491. files, inserted, deleted = 0, 0, 0
  492. except ValueError:
  493. print 'Warning: unexpected line "%s"' % line
  494. else:
  495. print 'Warning: unexpected line "%s"' % line
  496. else:
  497. numbers = getstatsummarycounts(line)
  498. if len(numbers) == 3:
  499. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  500. total_lines += inserted
  501. total_lines -= deleted
  502. self.total_lines_added += inserted
  503. self.total_lines_removed += deleted
  504. else:
  505. print 'Warning: failed to handle line "%s"' % line
  506. (files, inserted, deleted) = (0, 0, 0)
  507. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  508. self.total_lines += total_lines
  509. # Per-author statistics
  510. # defined for stamp, author only if author commited at this timestamp.
  511. self.changes_by_date_by_author = {} # stamp -> author -> lines_added
  512. # Similar to the above, but never use --first-parent
  513. # (we need to walk through every commit to know who
  514. # committed what, not just through mainline)
  515. lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
  516. lines.reverse()
  517. files = 0; inserted = 0; deleted = 0
  518. author = None
  519. stamp = 0
  520. for line in lines:
  521. if len(line) == 0:
  522. continue
  523. # <stamp> <author>
  524. if re.search('files? changed', line) == None:
  525. pos = line.find(' ')
  526. if pos != -1:
  527. try:
  528. oldstamp = stamp
  529. (stamp, author) = (int(line[:pos]), line[pos+1:])
  530. if author in conf['merge_authors']:
  531. author = conf['merge_authors'][author]
  532. if oldstamp > stamp:
  533. # clock skew, keep old timestamp to avoid having ugly graph
  534. stamp = oldstamp
  535. if author not in self.authors:
  536. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
  537. self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
  538. self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
  539. self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
  540. if stamp not in self.changes_by_date_by_author:
  541. self.changes_by_date_by_author[stamp] = {}
  542. if author not in self.changes_by_date_by_author[stamp]:
  543. self.changes_by_date_by_author[stamp][author] = {}
  544. self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
  545. self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
  546. files, inserted, deleted = 0, 0, 0
  547. except ValueError:
  548. print 'Warning: unexpected line "%s"' % line
  549. else:
  550. print 'Warning: unexpected line "%s"' % line
  551. else:
  552. numbers = getstatsummarycounts(line);
  553. if len(numbers) == 3:
  554. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  555. else:
  556. print 'Warning: failed to handle line "%s"' % line
  557. (files, inserted, deleted) = (0, 0, 0)
  558. def refine(self):
  559. # authors
  560. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  561. self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  562. self.authors_by_commits.reverse() # most first
  563. for i, name in enumerate(self.authors_by_commits):
  564. self.authors[name]['place_by_commits'] = i + 1
  565. for name in self.authors.keys():
  566. a = self.authors[name]
  567. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  568. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  569. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  570. delta = date_last - date_first
  571. a['date_first'] = date_first.strftime('%Y-%m-%d')
  572. a['date_last'] = date_last.strftime('%Y-%m-%d')
  573. a['timedelta'] = delta
  574. if 'lines_added' not in a: a['lines_added'] = 0
  575. if 'lines_removed' not in a: a['lines_removed'] = 0
  576. def getActiveDays(self):
  577. return self.active_days
  578. def getActivityByDayOfWeek(self):
  579. return self.activity_by_day_of_week
  580. def getActivityByHourOfDay(self):
  581. return self.activity_by_hour_of_day
  582. def getAuthorInfo(self, author):
  583. return self.authors[author]
  584. def getAuthors(self, limit = None):
  585. res = getkeyssortedbyvaluekey(self.authors, 'commits')
  586. res.reverse()
  587. return res[:limit]
  588. def getCommitDeltaDays(self):
  589. return (self.last_commit_stamp / 86400 - self.first_commit_stamp / 86400) + 1
  590. def getDomainInfo(self, domain):
  591. return self.domains[domain]
  592. def getDomains(self):
  593. return self.domains.keys()
  594. def getFirstCommitDate(self):
  595. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  596. def getLastCommitDate(self):
  597. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  598. def getTags(self):
  599. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  600. return lines.split('\n')
  601. def getTagDate(self, tag):
  602. return self.revToDate('tags/' + tag)
  603. def getTotalAuthors(self):
  604. return self.total_authors
  605. def getTotalCommits(self):
  606. return self.total_commits
  607. def getTotalFiles(self):
  608. return self.total_files
  609. def getTotalLOC(self):
  610. return self.total_lines
  611. def getTotalSize(self):
  612. return self.total_size
  613. def revToDate(self, rev):
  614. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  615. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  616. class ReportCreator:
  617. """Creates the actual report based on given data."""
  618. def __init__(self):
  619. pass
  620. def create(self, data, path):
  621. self.data = data
  622. self.path = path
  623. def html_linkify(text):
  624. return text.lower().replace(' ', '_')
  625. def html_header(level, text):
  626. name = html_linkify(text)
  627. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  628. class HTMLReportCreator(ReportCreator):
  629. def create(self, data, path):
  630. ReportCreator.create(self, data, path)
  631. self.title = data.projectname
  632. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  633. binarypath = os.path.dirname(os.path.abspath(__file__))
  634. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  635. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  636. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  637. for base in basedirs:
  638. src = base + '/' + file
  639. if os.path.exists(src):
  640. shutil.copyfile(src, path + '/' + file)
  641. break
  642. else:
  643. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  644. f = open(path + "/index.html", 'w')
  645. format = '%Y-%m-%d %H:%M:%S'
  646. self.printHeader(f)
  647. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  648. self.printNav(f)
  649. f.write('<dl>')
  650. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  651. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  652. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
  653. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  654. f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
  655. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  656. f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
  657. f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
  658. f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
  659. f.write('</dl>')
  660. f.write('</body>\n</html>')
  661. f.close()
  662. ###
  663. # Activity
  664. f = open(path + '/activity.html', 'w')
  665. self.printHeader(f)
  666. f.write('<h1>Activity</h1>')
  667. self.printNav(f)
  668. #f.write('<h2>Last 30 days</h2>')
  669. #f.write('<h2>Last 12 months</h2>')
  670. # Weekly activity
  671. WEEKS = 32
  672. f.write(html_header(2, 'Weekly activity'))
  673. f.write('<p>Last %d weeks</p>' % WEEKS)
  674. # generate weeks to show (previous N weeks from now)
  675. now = datetime.datetime.now()
  676. deltaweek = datetime.timedelta(7)
  677. weeks = []
  678. stampcur = now
  679. for i in range(0, WEEKS):
  680. weeks.insert(0, stampcur.strftime('%Y-%W'))
  681. stampcur -= deltaweek
  682. # top row: commits & bar
  683. f.write('<table class="noborders"><tr>')
  684. for i in range(0, WEEKS):
  685. commits = 0
  686. if weeks[i] in data.activity_by_year_week:
  687. commits = data.activity_by_year_week[weeks[i]]
  688. percentage = 0
  689. if weeks[i] in data.activity_by_year_week:
  690. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  691. height = max(1, int(200 * percentage))
  692. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  693. # bottom row: year/week
  694. f.write('</tr><tr>')
  695. for i in range(0, WEEKS):
  696. f.write('<td>%s</td>' % (WEEKS - i))
  697. f.write('</tr></table>')
  698. # Hour of Day
  699. f.write(html_header(2, 'Hour of Day'))
  700. hour_of_day = data.getActivityByHourOfDay()
  701. f.write('<table><tr><th>Hour</th>')
  702. for i in range(0, 24):
  703. f.write('<th>%d</th>' % i)
  704. f.write('</tr>\n<tr><th>Commits</th>')
  705. fp = open(path + '/hour_of_day.dat', 'w')
  706. for i in range(0, 24):
  707. if i in hour_of_day:
  708. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  709. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  710. fp.write('%d %d\n' % (i, hour_of_day[i]))
  711. else:
  712. f.write('<td>0</td>')
  713. fp.write('%d 0\n' % i)
  714. fp.close()
  715. f.write('</tr>\n<tr><th>%</th>')
  716. totalcommits = data.getTotalCommits()
  717. for i in range(0, 24):
  718. if i in hour_of_day:
  719. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  720. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  721. else:
  722. f.write('<td>0.00</td>')
  723. f.write('</tr></table>')
  724. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  725. fg = open(path + '/hour_of_day.dat', 'w')
  726. for i in range(0, 24):
  727. if i in hour_of_day:
  728. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  729. else:
  730. fg.write('%d 0\n' % (i + 1))
  731. fg.close()
  732. # Day of Week
  733. f.write(html_header(2, 'Day of Week'))
  734. day_of_week = data.getActivityByDayOfWeek()
  735. f.write('<div class="vtable"><table>')
  736. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  737. fp = open(path + '/day_of_week.dat', 'w')
  738. for d in range(0, 7):
  739. commits = 0
  740. if d in day_of_week:
  741. commits = day_of_week[d]
  742. fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
  743. f.write('<tr>')
  744. f.write('<th>%s</th>' % (WEEKDAYS[d]))
  745. if d in day_of_week:
  746. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  747. else:
  748. f.write('<td>0</td>')
  749. f.write('</tr>')
  750. f.write('</table></div>')
  751. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  752. fp.close()
  753. # Hour of Week
  754. f.write(html_header(2, 'Hour of Week'))
  755. f.write('<table>')
  756. f.write('<tr><th>Weekday</th>')
  757. for hour in range(0, 24):
  758. f.write('<th>%d</th>' % (hour))
  759. f.write('</tr>')
  760. for weekday in range(0, 7):
  761. f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
  762. for hour in range(0, 24):
  763. try:
  764. commits = data.activity_by_hour_of_week[weekday][hour]
  765. except KeyError:
  766. commits = 0
  767. if commits != 0:
  768. f.write('<td')
  769. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  770. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  771. f.write('>%d</td>' % commits)
  772. else:
  773. f.write('<td></td>')
  774. f.write('</tr>')
  775. f.write('</table>')
  776. # Month of Year
  777. f.write(html_header(2, 'Month of Year'))
  778. f.write('<div class="vtable"><table>')
  779. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  780. fp = open (path + '/month_of_year.dat', 'w')
  781. for mm in range(1, 13):
  782. commits = 0
  783. if mm in data.activity_by_month_of_year:
  784. commits = data.activity_by_month_of_year[mm]
  785. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  786. fp.write('%d %d\n' % (mm, commits))
  787. fp.close()
  788. f.write('</table></div>')
  789. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  790. # Commits by year/month
  791. f.write(html_header(2, 'Commits by year/month'))
  792. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
  793. for yymm in reversed(sorted(data.commits_by_month.keys())):
  794. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
  795. f.write('</table></div>')
  796. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  797. fg = open(path + '/commits_by_year_month.dat', 'w')
  798. for yymm in sorted(data.commits_by_month.keys()):
  799. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  800. fg.close()
  801. # Commits by year
  802. f.write(html_header(2, 'Commits by Year'))
  803. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
  804. for yy in reversed(sorted(data.commits_by_year.keys())):
  805. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, data.commits_by_year.get(yy,0), (100.0 * data.commits_by_year.get(yy,0)) / data.getTotalCommits(), data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
  806. f.write('</table></div>')
  807. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  808. fg = open(path + '/commits_by_year.dat', 'w')
  809. for yy in sorted(data.commits_by_year.keys()):
  810. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  811. fg.close()
  812. # Commits by timezone
  813. f.write(html_header(2, 'Commits by Timezone'))
  814. f.write('<table><tr>')
  815. f.write('<th>Timezone</th><th>Commits</th>')
  816. max_commits_on_tz = max(data.commits_by_timezone.values())
  817. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  818. commits = data.commits_by_timezone[i]
  819. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  820. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  821. f.write('</tr></table>')
  822. f.write('</body></html>')
  823. f.close()
  824. ###
  825. # Authors
  826. f = open(path + '/authors.html', 'w')
  827. self.printHeader(f)
  828. f.write('<h1>Authors</h1>')
  829. self.printNav(f)
  830. # Authors :: List of authors
  831. f.write(html_header(2, 'List of Authors'))
  832. f.write('<table class="authors sortable" id="authors">')
  833. f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  834. for author in data.getAuthors(conf['max_authors']):
  835. info = data.getAuthorInfo(author)
  836. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
  837. f.write('</table>')
  838. allauthors = data.getAuthors()
  839. if len(allauthors) > conf['max_authors']:
  840. rest = allauthors[conf['max_authors']:]
  841. f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
  842. f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
  843. f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author" />')
  844. if len(allauthors) > conf['max_authors']:
  845. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  846. f.write(html_header(2, 'Commits per Author'))
  847. f.write('<img src="commits_by_author.png" alt="Commits per Author" />')
  848. if len(allauthors) > conf['max_authors']:
  849. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  850. fgl = open(path + '/lines_of_code_by_author.dat', 'w')
  851. fgc = open(path + '/commits_by_author.dat', 'w')
  852. lines_by_authors = {} # cumulated added lines by
  853. # author. to save memory,
  854. # changes_by_date_by_author[stamp][author] is defined
  855. # only at points where author commits.
  856. # lines_by_authors allows us to generate all the
  857. # points in the .dat file.
  858. # Don't rely on getAuthors to give the same order each
  859. # time. Be robust and keep the list in a variable.
  860. commits_by_authors = {} # cumulated added lines by
  861. self.authors_to_plot = data.getAuthors(conf['max_authors'])
  862. for author in self.authors_to_plot:
  863. lines_by_authors[author] = 0
  864. commits_by_authors[author] = 0
  865. for stamp in sorted(data.changes_by_date_by_author.keys()):
  866. fgl.write('%d' % stamp)
  867. fgc.write('%d' % stamp)
  868. for author in self.authors_to_plot:
  869. if author in data.changes_by_date_by_author[stamp].keys():
  870. lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
  871. commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
  872. fgl.write(' %d' % lines_by_authors[author])
  873. fgc.write(' %d' % commits_by_authors[author])
  874. fgl.write('\n')
  875. fgc.write('\n')
  876. fgl.close()
  877. fgc.close()
  878. # Authors :: Author of Month
  879. f.write(html_header(2, 'Author of Month'))
  880. f.write('<table class="sortable" id="aom">')
  881. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  882. for yymm in reversed(sorted(data.author_of_month.keys())):
  883. authordict = data.author_of_month[yymm]
  884. authors = getkeyssortedbyvalues(authordict)
  885. authors.reverse()
  886. commits = data.author_of_month[yymm][authors[0]]
  887. next = ', '.join(authors[1:conf['authors_top']+1])
  888. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))
  889. f.write('</table>')
  890. f.write(html_header(2, 'Author of Year'))
  891. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  892. for yy in reversed(sorted(data.author_of_year.keys())):
  893. authordict = data.author_of_year[yy]
  894. authors = getkeyssortedbyvalues(authordict)
  895. authors.reverse()
  896. commits = data.author_of_year[yy][authors[0]]
  897. next = ', '.join(authors[1:conf['authors_top']+1])
  898. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
  899. f.write('</table>')
  900. # Domains
  901. f.write(html_header(2, 'Commits by Domains'))
  902. domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
  903. domains_by_commits.reverse() # most first
  904. f.write('<div class="vtable"><table>')
  905. f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
  906. fp = open(path + '/domains.dat', 'w')
  907. n = 0
  908. for domain in domains_by_commits:
  909. if n == conf['max_domains']:
  910. break
  911. commits = 0
  912. n += 1
  913. info = data.getDomainInfo(domain)
  914. fp.write('%s %d %d\n' % (domain, n , info['commits']))
  915. f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], (100.0 * info['commits'] / totalcommits)))
  916. f.write('</table></div>')
  917. f.write('<img src="domains.png" alt="Commits by Domains" />')
  918. fp.close()
  919. f.write('</body></html>')
  920. f.close()
  921. ###
  922. # Files
  923. f = open(path + '/files.html', 'w')
  924. self.printHeader(f)
  925. f.write('<h1>Files</h1>')
  926. self.printNav(f)
  927. f.write('<dl>\n')
  928. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  929. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  930. try:
  931. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % (float(data.getTotalSize()) / data.getTotalFiles()))
  932. except ZeroDivisionError:
  933. pass
  934. f.write('</dl>\n')
  935. # Files :: File count by date
  936. f.write(html_header(2, 'File count by date'))
  937. # use set to get rid of duplicate/unnecessary entries
  938. files_by_date = set()
  939. for stamp in sorted(data.files_by_stamp.keys()):
  940. files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  941. fg = open(path + '/files_by_date.dat', 'w')
  942. for line in sorted(list(files_by_date)):
  943. fg.write('%s\n' % line)
  944. #for stamp in sorted(data.files_by_stamp.keys()):
  945. # fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  946. fg.close()
  947. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  948. #f.write('<h2>Average file size by date</h2>')
  949. # Files :: Extensions
  950. f.write(html_header(2, 'Extensions'))
  951. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  952. for ext in sorted(data.extensions.keys()):
  953. files = data.extensions[ext]['files']
  954. lines = data.extensions[ext]['lines']
  955. try:
  956. loc_percentage = (100.0 * lines) / data.getTotalLOC()
  957. except ZeroDivisionError:
  958. loc_percentage = 0
  959. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, loc_percentage, lines / files))
  960. f.write('</table>')
  961. f.write('</body></html>')
  962. f.close()
  963. ###
  964. # Lines
  965. f = open(path + '/lines.html', 'w')
  966. self.printHeader(f)
  967. f.write('<h1>Lines</h1>')
  968. self.printNav(f)
  969. f.write('<dl>\n')
  970. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  971. f.write('</dl>\n')
  972. f.write(html_header(2, 'Lines of Code'))
  973. f.write('<img src="lines_of_code.png" />')
  974. fg = open(path + '/lines_of_code.dat', 'w')
  975. for stamp in sorted(data.changes_by_date.keys()):
  976. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  977. fg.close()
  978. f.write('</body></html>')
  979. f.close()
  980. ###
  981. # tags.html
  982. f = open(path + '/tags.html', 'w')
  983. self.printHeader(f)
  984. f.write('<h1>Tags</h1>')
  985. self.printNav(f)
  986. f.write('<dl>')
  987. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  988. if len(data.tags) > 0:
  989. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  990. f.write('</dl>')
  991. f.write('<table class="tags">')
  992. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  993. # sort the tags by date desc
  994. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  995. for tag in tags_sorted_by_date_desc:
  996. authorinfo = []
  997. self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  998. for i in reversed(self.authors_by_commits):
  999. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  1000. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  1001. f.write('</table>')
  1002. f.write('</body></html>')
  1003. f.close()
  1004. self.createGraphs(path)
  1005. def createGraphs(self, path):
  1006. print 'Generating graphs...'
  1007. # hour of day
  1008. f = open(path + '/hour_of_day.plot', 'w')
  1009. f.write(GNUPLOT_COMMON)
  1010. f.write(
  1011. """
  1012. set output 'hour_of_day.png'
  1013. unset key
  1014. set xrange [0.5:24.5]
  1015. set yrange [0:]
  1016. set xtics 4
  1017. set grid y
  1018. set ylabel "Commits"
  1019. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  1020. """)
  1021. f.close()
  1022. # day of week
  1023. f = open(path + '/day_of_week.plot', 'w')
  1024. f.write(GNUPLOT_COMMON)
  1025. f.write(
  1026. """
  1027. set output 'day_of_week.png'
  1028. unset key
  1029. set xrange [0.5:7.5]
  1030. set yrange [0:]
  1031. set xtics 1
  1032. set grid y
  1033. set ylabel "Commits"
  1034. plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
  1035. """)
  1036. f.close()
  1037. # Domains
  1038. f = open(path + '/domains.plot', 'w')
  1039. f.write(GNUPLOT_COMMON)
  1040. f.write(
  1041. """
  1042. set output 'domains.png'
  1043. unset key
  1044. unset xtics
  1045. set yrange [0:]
  1046. set grid y
  1047. set ylabel "Commits"
  1048. plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
  1049. """)
  1050. f.close()
  1051. # Month of Year
  1052. f = open(path + '/month_of_year.plot', 'w')
  1053. f.write(GNUPLOT_COMMON)
  1054. f.write(
  1055. """
  1056. set output 'month_of_year.png'
  1057. unset key
  1058. set xrange [0.5:12.5]
  1059. set yrange [0:]
  1060. set xtics 1
  1061. set grid y
  1062. set ylabel "Commits"
  1063. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  1064. """)
  1065. f.close()
  1066. # commits_by_year_month
  1067. f = open(path + '/commits_by_year_month.plot', 'w')
  1068. f.write(GNUPLOT_COMMON)
  1069. f.write(
  1070. """
  1071. set output 'commits_by_year_month.png'
  1072. unset key
  1073. set yrange [0:]
  1074. set xdata time
  1075. set timefmt "%Y-%m"
  1076. set format x "%Y-%m"
  1077. set xtics rotate
  1078. set bmargin 5
  1079. set grid y
  1080. set ylabel "Commits"
  1081. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  1082. """)
  1083. f.close()
  1084. # commits_by_year
  1085. f = open(path + '/commits_by_year.plot', 'w')
  1086. f.write(GNUPLOT_COMMON)
  1087. f.write(
  1088. """
  1089. set output 'commits_by_year.png'
  1090. unset key
  1091. set yrange [0:]
  1092. set xtics 1 rotate
  1093. set grid y
  1094. set ylabel "Commits"
  1095. set yrange [0:]
  1096. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  1097. """)
  1098. f.close()
  1099. # Files by date
  1100. f = open(path + '/files_by_date.plot', 'w')
  1101. f.write(GNUPLOT_COMMON)
  1102. f.write(
  1103. """
  1104. set output 'files_by_date.png'
  1105. unset key
  1106. set yrange [0:]
  1107. set xdata time
  1108. set timefmt "%Y-%m-%d"
  1109. set format x "%Y-%m-%d"
  1110. set grid y
  1111. set ylabel "Files"
  1112. set xtics rotate
  1113. set ytics autofreq
  1114. set bmargin 6
  1115. plot 'files_by_date.dat' using 1:2 w steps
  1116. """)
  1117. f.close()
  1118. # Lines of Code
  1119. f = open(path + '/lines_of_code.plot', 'w')
  1120. f.write(GNUPLOT_COMMON)
  1121. f.write(
  1122. """
  1123. set output 'lines_of_code.png'
  1124. unset key
  1125. set yrange [0:]
  1126. set xdata time
  1127. set timefmt "%s"
  1128. set format x "%Y-%m-%d"
  1129. set grid y
  1130. set ylabel "Lines"
  1131. set xtics rotate
  1132. set bmargin 6
  1133. plot 'lines_of_code.dat' using 1:2 w lines
  1134. """)
  1135. f.close()
  1136. # Lines of Code Added per author
  1137. f = open(path + '/lines_of_code_by_author.plot', 'w')
  1138. f.write(GNUPLOT_COMMON)
  1139. f.write(
  1140. """
  1141. set terminal png transparent size 640,480
  1142. set output 'lines_of_code_by_author.png'
  1143. set key left top
  1144. set yrange [0:]
  1145. set xdata time
  1146. set timefmt "%s"
  1147. set format x "%Y-%m-%d"
  1148. set grid y
  1149. set ylabel "Lines"
  1150. set xtics rotate
  1151. set bmargin 6
  1152. plot """
  1153. )
  1154. i = 1
  1155. plots = []
  1156. for a in self.authors_to_plot:
  1157. i = i + 1
  1158. author = a.replace("\"", "\\\"").replace("`", "")
  1159. plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
  1160. f.write(", ".join(plots))
  1161. f.write('\n')
  1162. f.close()
  1163. # Commits per author
  1164. f = open(path + '/commits_by_author.plot', 'w')
  1165. f.write(GNUPLOT_COMMON)
  1166. f.write(
  1167. """
  1168. set terminal png transparent size 640,480
  1169. set output 'commits_by_author.png'
  1170. set key left top
  1171. set yrange [0:]
  1172. set xdata time
  1173. set timefmt "%s"
  1174. set format x "%Y-%m-%d"
  1175. set grid y
  1176. set ylabel "Commits"
  1177. set xtics rotate
  1178. set bmargin 6
  1179. plot """
  1180. )
  1181. i = 1
  1182. plots = []
  1183. for a in self.authors_to_plot:
  1184. i = i + 1
  1185. author = a.replace("\"", "\\\"").replace("`", "")
  1186. plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
  1187. f.write(", ".join(plots))
  1188. f.write('\n')
  1189. f.close()
  1190. os.chdir(path)
  1191. files = glob.glob(path + '/*.plot')
  1192. for f in files:
  1193. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  1194. if len(out) > 0:
  1195. print out
  1196. def printHeader(self, f, title = ''):
  1197. f.write(
  1198. """<?xml version="1.0" encoding="UTF-8"?>
  1199. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  1200. <html xmlns="http://www.w3.org/1999/xhtml">
  1201. <head>
  1202. <title>GitStats - %s</title>
  1203. <link rel="stylesheet" href="%s" type="text/css" />
  1204. <meta name="generator" content="GitStats %s" />
  1205. <script type="text/javascript" src="sortable.js"></script>
  1206. </head>
  1207. <body>
  1208. """ % (self.title, conf['style'], getversion()))
  1209. def printNav(self, f):
  1210. f.write("""
  1211. <div class="nav">
  1212. <ul>
  1213. <li><a href="index.html">General</a></li>
  1214. <li><a href="activity.html">Activity</a></li>
  1215. <li><a href="authors.html">Authors</a></li>
  1216. <li><a href="files.html">Files</a></li>
  1217. <li><a href="lines.html">Lines</a></li>
  1218. <li><a href="tags.html">Tags</a></li>
  1219. </ul>
  1220. </div>
  1221. """)
  1222. def usage():
  1223. print """
  1224. Usage: gitstats [options] <gitpath..> <outputpath>
  1225. Options:
  1226. -c key=value Override configuration value
  1227. Default config values:
  1228. %s
  1229. Please see the manual page for more details.
  1230. """ % conf
  1231. class GitStats:
  1232. def run(self, args_orig):
  1233. optlist, args = getopt.getopt(args_orig, 'hc:', ["help"])
  1234. for o,v in optlist:
  1235. if o == '-c':
  1236. key, value = v.split('=', 1)
  1237. if key not in conf:
  1238. raise KeyError('no such key "%s" in config' % key)
  1239. if isinstance(conf[key], int):
  1240. conf[key] = int(value)
  1241. elif isinstance(conf[key], dict):
  1242. kk,vv = value.split(',', 1)
  1243. conf[key][kk] = vv
  1244. else:
  1245. conf[key] = value
  1246. elif o in ('-h', '--help'):
  1247. usage()
  1248. sys.exit()
  1249. if len(args) < 2:
  1250. usage()
  1251. sys.exit(0)
  1252. outputpath = os.path.abspath(args[-1])
  1253. rundir = os.getcwd()
  1254. try:
  1255. os.makedirs(outputpath)
  1256. except OSError:
  1257. pass
  1258. if not os.path.isdir(outputpath):
  1259. print 'FATAL: Output path is not a directory or does not exist'
  1260. sys.exit(1)
  1261. if not getgnuplotversion():
  1262. print 'gnuplot not found'
  1263. sys.exit(1)
  1264. print 'Output path: %s' % outputpath
  1265. cachefile = os.path.join(outputpath, 'gitstats.cache')
  1266. data = GitDataCollector()
  1267. data.loadCache(cachefile)
  1268. for gitpath in args[0:-1]:
  1269. print 'Git path: %s' % gitpath
  1270. prevdir = os.getcwd()
  1271. os.chdir(gitpath)
  1272. print 'Collecting data...'
  1273. data.collect(gitpath)
  1274. os.chdir(prevdir)
  1275. print 'Refining data...'
  1276. data.saveCache(cachefile)
  1277. data.refine()
  1278. os.chdir(rundir)
  1279. print 'Generating report...'
  1280. report = HTMLReportCreator()
  1281. report.create(data, outputpath)
  1282. time_end = time.time()
  1283. exectime_internal = time_end - time_start
  1284. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)
  1285. if sys.stdin.isatty():
  1286. print 'You may now run:'
  1287. print
  1288. print ' sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''")
  1289. print
  1290. if __name__=='__main__':
  1291. g = GitStats()
  1292. g.run(sys.argv[1:])