123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2011 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import getopt
  6. import glob
  7. import os
  8. import pickle
  9. import platform
  10. import re
  11. import shutil
  12. import subprocess
  13. import sys
  14. import time
  15. import zlib
  16. GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
  17. ON_LINUX = (platform.system() == 'Linux')
  18. WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
  19. exectime_internal = 0.0
  20. exectime_external = 0.0
  21. time_start = time.time()
  22. # By default, gnuplot is searched from path, but can be overridden with the
  23. # environment variable "GNUPLOT"
  24. gnuplot_cmd = 'gnuplot'
  25. if 'GNUPLOT' in os.environ:
  26. gnuplot_cmd = os.environ['GNUPLOT']
  27. conf = {
  28. 'max_domains': 10,
  29. 'max_ext_length': 10,
  30. 'style': 'gitstats.css',
  31. 'max_authors': 20,
  32. 'authors_top': 5,
  33. 'commit_begin': '',
  34. 'commit_end': 'HEAD',
  35. 'linear_linestats': 1,
  36. 'project_name': '',
  37. }
  38. def getpipeoutput(cmds, quiet = False):
  39. global exectime_external
  40. start = time.time()
  41. if not quiet and ON_LINUX and os.isatty(1):
  42. print '>> ' + ' | '.join(cmds),
  43. sys.stdout.flush()
  44. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  45. p = p0
  46. for x in cmds[1:]:
  47. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  48. p0 = p
  49. output = p.communicate()[0]
  50. end = time.time()
  51. if not quiet:
  52. if ON_LINUX and os.isatty(1):
  53. print '\r',
  54. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  55. exectime_external += (end - start)
  56. return output.rstrip('\n')
  57. def getcommitrange(defaultrange = 'HEAD', end_only = False):
  58. if len(conf['commit_end']) > 0:
  59. if end_only or len(conf['commit_begin']) == 0:
  60. return conf['commit_end']
  61. return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
  62. return defaultrange
  63. def getkeyssortedbyvalues(dict):
  64. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  65. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  66. def getkeyssortedbyvaluekey(d, key):
  67. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  68. def getstatsummarycounts(line):
  69. numbers = re.findall('\d+', line)
  70. if len(numbers) == 1:
  71. # neither insertions nor deletions: may probably only happen for "0 files changed"
  72. numbers.append(0);
  73. numbers.append(0);
  74. elif len(numbers) == 2 and line.find('(+)') != -1:
  75. numbers.append(0); # only insertions were printed on line
  76. elif len(numbers) == 2 and line.find('(-)') != -1:
  77. numbers.insert(1, 0); # only deletions were printed on line
  78. return numbers
  79. VERSION = 0
  80. def getversion():
  81. global VERSION
  82. if VERSION == 0:
  83. VERSION = getpipeoutput(["git rev-parse --short %s" % getcommitrange('HEAD')]).split('\n')[0]
  84. return VERSION
  85. def getgitversion():
  86. return getpipeoutput(['git --version']).split('\n')[0]
  87. def getgnuplotversion():
  88. return getpipeoutput(['gnuplot --version']).split('\n')[0]
  89. class DataCollector:
  90. """Manages data collection from a revision control repository."""
  91. def __init__(self):
  92. self.stamp_created = time.time()
  93. self.cache = {}
  94. self.total_authors = 0
  95. self.activity_by_hour_of_day = {} # hour -> commits
  96. self.activity_by_day_of_week = {} # day -> commits
  97. self.activity_by_month_of_year = {} # month [1-12] -> commits
  98. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  99. self.activity_by_hour_of_day_busiest = 0
  100. self.activity_by_hour_of_week_busiest = 0
  101. self.activity_by_year_week = {} # yy_wNN -> commits
  102. self.activity_by_year_week_peak = 0
  103. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
  104. self.total_commits = 0
  105. self.total_files = 0
  106. self.authors_by_commits = 0
  107. # domains
  108. self.domains = {} # domain -> commits
  109. # author of the month
  110. self.author_of_month = {} # month -> author -> commits
  111. self.author_of_year = {} # year -> author -> commits
  112. self.commits_by_month = {} # month -> commits
  113. self.commits_by_year = {} # year -> commits
  114. self.lines_added_by_month = {} # month -> lines added
  115. self.lines_added_by_year = {} # year -> lines added
  116. self.lines_removed_by_month = {} # month -> lines removed
  117. self.lines_removed_by_year = {} # year -> lines removed
  118. self.first_commit_stamp = 0
  119. self.last_commit_stamp = 0
  120. self.last_active_day = None
  121. self.active_days = set()
  122. # lines
  123. self.total_lines = 0
  124. self.total_lines_added = 0
  125. self.total_lines_removed = 0
  126. # size
  127. self.total_size = 0
  128. # timezone
  129. self.commits_by_timezone = {} # timezone -> commits
  130. # tags
  131. self.tags = {}
  132. self.files_by_stamp = {} # stamp -> files
  133. # extensions
  134. self.extensions = {} # extension -> files, lines
  135. # line statistics
  136. self.changes_by_date = {} # stamp -> { files, ins, del }
  137. ##
  138. # This should be the main function to extract data from the repository.
  139. def collect(self, dir):
  140. self.dir = dir
  141. if len(conf['project_name']) == 0:
  142. self.projectname = os.path.basename(os.path.abspath(dir))
  143. else:
  144. self.projectname = conf['project_name']
  145. ##
  146. # Load cacheable data
  147. def loadCache(self, cachefile):
  148. if not os.path.exists(cachefile):
  149. return
  150. print 'Loading cache...'
  151. f = open(cachefile, 'rb')
  152. try:
  153. self.cache = pickle.loads(zlib.decompress(f.read()))
  154. except:
  155. # temporary hack to upgrade non-compressed caches
  156. f.seek(0)
  157. self.cache = pickle.load(f)
  158. f.close()
  159. ##
  160. # Produce any additional statistics from the extracted data.
  161. def refine(self):
  162. pass
  163. ##
  164. # : get a dictionary of author
  165. def getAuthorInfo(self, author):
  166. return None
  167. def getActivityByDayOfWeek(self):
  168. return {}
  169. def getActivityByHourOfDay(self):
  170. return {}
  171. # : get a dictionary of domains
  172. def getDomainInfo(self, domain):
  173. return None
  174. ##
  175. # Get a list of authors
  176. def getAuthors(self):
  177. return []
  178. def getFirstCommitDate(self):
  179. return datetime.datetime.now()
  180. def getLastCommitDate(self):
  181. return datetime.datetime.now()
  182. def getStampCreated(self):
  183. return self.stamp_created
  184. def getTags(self):
  185. return []
  186. def getTotalAuthors(self):
  187. return -1
  188. def getTotalCommits(self):
  189. return -1
  190. def getTotalFiles(self):
  191. return -1
  192. def getTotalLOC(self):
  193. return -1
  194. ##
  195. # Save cacheable data
  196. def saveCache(self, cachefile):
  197. print 'Saving cache...'
  198. f = open(cachefile, 'wb')
  199. #pickle.dump(self.cache, f)
  200. data = zlib.compress(pickle.dumps(self.cache))
  201. f.write(data)
  202. f.close()
  203. class GitDataCollector(DataCollector):
  204. def collect(self, dir):
  205. DataCollector.collect(self, dir)
  206. try:
  207. self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
  208. except:
  209. self.total_authors = 0
  210. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  211. # tags
  212. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  213. for line in lines:
  214. if len(line) == 0:
  215. continue
  216. (hash, tag) = line.split(' ')
  217. tag = tag.replace('refs/tags/', '')
  218. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
  219. if len(output) > 0:
  220. parts = output.split(' ')
  221. stamp = 0
  222. try:
  223. stamp = int(parts[0])
  224. except ValueError:
  225. stamp = 0
  226. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  227. # collect info on tags, starting from latest
  228. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items()))))
  229. prev = None
  230. for tag in reversed(tags_sorted_by_date_desc):
  231. cmd = 'git shortlog -s "%s"' % tag
  232. if prev != None:
  233. cmd += ' "^%s"' % prev
  234. output = getpipeoutput([cmd])
  235. if len(output) == 0:
  236. continue
  237. prev = tag
  238. for line in output.split('\n'):
  239. parts = re.split('\s+', line, 2)
  240. commits = int(parts[1])
  241. author = parts[2]
  242. self.tags[tag]['commits'] += commits
  243. self.tags[tag]['authors'][author] = commits
  244. # Collect revision statistics
  245. # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
  246. lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).split('\n')
  247. for line in lines:
  248. parts = line.split(' ', 4)
  249. author = ''
  250. try:
  251. stamp = int(parts[0])
  252. except ValueError:
  253. stamp = 0
  254. timezone = parts[3]
  255. author, mail = parts[4].split('<', 1)
  256. author = author.rstrip()
  257. mail = mail.rstrip('>')
  258. domain = '?'
  259. if mail.find('@') != -1:
  260. domain = mail.rsplit('@', 1)[1]
  261. date = datetime.datetime.fromtimestamp(float(stamp))
  262. # First and last commit stamp (may be in any order because of cherry-picking and patches)
  263. if stamp > self.last_commit_stamp:
  264. self.last_commit_stamp = stamp
  265. if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
  266. self.first_commit_stamp = stamp
  267. # activity
  268. # hour
  269. hour = date.hour
  270. self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
  271. # most active hour?
  272. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  273. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  274. # day of week
  275. day = date.weekday()
  276. self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1
  277. # domain stats
  278. if domain not in self.domains:
  279. self.domains[domain] = {}
  280. # commits
  281. self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1
  282. # hour of week
  283. if day not in self.activity_by_hour_of_week:
  284. self.activity_by_hour_of_week[day] = {}
  285. self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
  286. # most active hour?
  287. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  288. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  289. # month of year
  290. month = date.month
  291. self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1
  292. # yearly/weekly activity
  293. yyw = date.strftime('%Y-%W')
  294. self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
  295. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  296. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  297. # author stats
  298. if author not in self.authors:
  299. self.authors[author] = {}
  300. # commits, note again that commits may be in any date order because of cherry-picking and patches
  301. if 'last_commit_stamp' not in self.authors[author]:
  302. self.authors[author]['last_commit_stamp'] = stamp
  303. if stamp > self.authors[author]['last_commit_stamp']:
  304. self.authors[author]['last_commit_stamp'] = stamp
  305. if 'first_commit_stamp' not in self.authors[author]:
  306. self.authors[author]['first_commit_stamp'] = stamp
  307. if stamp < self.authors[author]['first_commit_stamp']:
  308. self.authors[author]['first_commit_stamp'] = stamp
  309. # author of the month/year
  310. yymm = date.strftime('%Y-%m')
  311. if yymm in self.author_of_month:
  312. self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
  313. else:
  314. self.author_of_month[yymm] = {}
  315. self.author_of_month[yymm][author] = 1
  316. self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1
  317. yy = date.year
  318. if yy in self.author_of_year:
  319. self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
  320. else:
  321. self.author_of_year[yy] = {}
  322. self.author_of_year[yy][author] = 1
  323. self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1
  324. # authors: active days
  325. yymmdd = date.strftime('%Y-%m-%d')
  326. if 'last_active_day' not in self.authors[author]:
  327. self.authors[author]['last_active_day'] = yymmdd
  328. self.authors[author]['active_days'] = set([yymmdd])
  329. elif yymmdd != self.authors[author]['last_active_day']:
  330. self.authors[author]['last_active_day'] = yymmdd
  331. self.authors[author]['active_days'].add(yymmdd)
  332. # project: active days
  333. if yymmdd != self.last_active_day:
  334. self.last_active_day = yymmdd
  335. self.active_days.add(yymmdd)
  336. # timezone
  337. self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
  338. # TODO Optimize this, it's the worst bottleneck
  339. # outputs "<stamp> <files>" for each revision
  340. revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
  341. lines = []
  342. for revline in revlines:
  343. time, rev = revline.split(' ')
  344. linecount = self.getFilesInCommit(rev)
  345. lines.append('%d %d' % (int(time), linecount))
  346. self.total_commits += len(lines)
  347. for line in lines:
  348. parts = line.split(' ')
  349. if len(parts) != 2:
  350. continue
  351. (stamp, files) = parts[0:2]
  352. try:
  353. self.files_by_stamp[int(stamp)] = int(files)
  354. except ValueError:
  355. print 'Warning: failed to parse line "%s"' % line
  356. # extensions and size of files
  357. lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
  358. for line in lines:
  359. if len(line) == 0:
  360. continue
  361. parts = re.split('\s+', line, 5)
  362. if parts[0] == '160000' and parts[3] == '-':
  363. # skip submodules
  364. continue
  365. sha1 = parts[2]
  366. size = int(parts[3])
  367. fullpath = parts[4]
  368. self.total_size += size
  369. self.total_files += 1
  370. filename = fullpath.split('/')[-1] # strip directories
  371. if filename.find('.') == -1 or filename.rfind('.') == 0:
  372. ext = ''
  373. else:
  374. ext = filename[(filename.rfind('.') + 1):]
  375. if len(ext) > conf['max_ext_length']:
  376. ext = ''
  377. if ext not in self.extensions:
  378. self.extensions[ext] = {'files': 0, 'lines': 0}
  379. self.extensions[ext]['files'] += 1
  380. try:
  381. self.extensions[ext]['lines'] += self.getLinesInBlob(sha1)
  382. except:
  383. print 'Warning: Could not count lines for file "%s"' % line
  384. # line statistics
  385. # outputs:
  386. # N files changed, N insertions (+), N deletions(-)
  387. # <stamp> <author>
  388. self.changes_by_date = {} # stamp -> { files, ins, del }
  389. # computation of lines of code by date is better done
  390. # on a linear history.
  391. extra = ''
  392. if conf['linear_linestats']:
  393. extra = '--first-parent -m'
  394. lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getcommitrange('HEAD'))]).split('\n')
  395. lines.reverse()
  396. files = 0; inserted = 0; deleted = 0; total_lines = 0
  397. author = None
  398. for line in lines:
  399. if len(line) == 0:
  400. continue
  401. # <stamp> <author>
  402. if re.search('files? changed', line) == None:
  403. pos = line.find(' ')
  404. if pos != -1:
  405. try:
  406. (stamp, author) = (int(line[:pos]), line[pos+1:])
  407. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  408. date = datetime.datetime.fromtimestamp(stamp)
  409. yymm = date.strftime('%Y-%m')
  410. self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
  411. self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
  412. yy = date.year
  413. self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
  414. self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
  415. files, inserted, deleted = 0, 0, 0
  416. except ValueError:
  417. print 'Warning: unexpected line "%s"' % line
  418. else:
  419. print 'Warning: unexpected line "%s"' % line
  420. else:
  421. numbers = getstatsummarycounts(line)
  422. if len(numbers) == 3:
  423. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  424. total_lines += inserted
  425. total_lines -= deleted
  426. self.total_lines_added += inserted
  427. self.total_lines_removed += deleted
  428. else:
  429. print 'Warning: failed to handle line "%s"' % line
  430. (files, inserted, deleted) = (0, 0, 0)
  431. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  432. self.total_lines = total_lines
  433. # Per-author statistics
  434. # defined for stamp, author only if author commited at this timestamp.
  435. self.changes_by_date_by_author = {} # stamp -> author -> lines_added
  436. # Similar to the above, but never use --first-parent
  437. # (we need to walk through every commit to know who
  438. # committed what, not just through mainline)
  439. lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getcommitrange('HEAD'))]).split('\n')
  440. lines.reverse()
  441. files = 0; inserted = 0; deleted = 0
  442. author = None
  443. stamp = 0
  444. for line in lines:
  445. if len(line) == 0:
  446. continue
  447. # <stamp> <author>
  448. if re.search('files? changed', line) == None:
  449. pos = line.find(' ')
  450. if pos != -1:
  451. try:
  452. oldstamp = stamp
  453. (stamp, author) = (int(line[:pos]), line[pos+1:])
  454. if oldstamp > stamp:
  455. # clock skew, keep old timestamp to avoid having ugly graph
  456. stamp = oldstamp
  457. if author not in self.authors:
  458. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
  459. self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
  460. self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
  461. self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
  462. if stamp not in self.changes_by_date_by_author:
  463. self.changes_by_date_by_author[stamp] = {}
  464. if author not in self.changes_by_date_by_author[stamp]:
  465. self.changes_by_date_by_author[stamp][author] = {}
  466. self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
  467. self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
  468. files, inserted, deleted = 0, 0, 0
  469. except ValueError:
  470. print 'Warning: unexpected line "%s"' % line
  471. else:
  472. print 'Warning: unexpected line "%s"' % line
  473. else:
  474. numbers = getstatsummarycounts(line);
  475. if len(numbers) == 3:
  476. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  477. else:
  478. print 'Warning: failed to handle line "%s"' % line
  479. (files, inserted, deleted) = (0, 0, 0)
  480. def refine(self):
  481. # authors
  482. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  483. self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  484. self.authors_by_commits.reverse() # most first
  485. for i, name in enumerate(self.authors_by_commits):
  486. self.authors[name]['place_by_commits'] = i + 1
  487. for name in self.authors.keys():
  488. a = self.authors[name]
  489. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  490. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  491. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  492. delta = date_last - date_first
  493. a['date_first'] = date_first.strftime('%Y-%m-%d')
  494. a['date_last'] = date_last.strftime('%Y-%m-%d')
  495. a['timedelta'] = delta
  496. if 'lines_added' not in a: a['lines_added'] = 0
  497. if 'lines_removed' not in a: a['lines_removed'] = 0
  498. def getActiveDays(self):
  499. return self.active_days
  500. def getActivityByDayOfWeek(self):
  501. return self.activity_by_day_of_week
  502. def getActivityByHourOfDay(self):
  503. return self.activity_by_hour_of_day
  504. def getAuthorInfo(self, author):
  505. return self.authors[author]
  506. def getAuthors(self, limit = None):
  507. res = getkeyssortedbyvaluekey(self.authors, 'commits')
  508. res.reverse()
  509. return res[:limit]
  510. def getCommitDeltaDays(self):
  511. return (self.last_commit_stamp / 86400 - self.first_commit_stamp / 86400) + 1
  512. def getDomainInfo(self, domain):
  513. return self.domains[domain]
  514. def getDomains(self):
  515. return self.domains.keys()
  516. def getFilesInCommit(self, rev):
  517. try:
  518. res = self.cache['files_in_tree'][rev]
  519. except:
  520. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  521. if 'files_in_tree' not in self.cache:
  522. self.cache['files_in_tree'] = {}
  523. self.cache['files_in_tree'][rev] = res
  524. return res
  525. def getFirstCommitDate(self):
  526. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  527. def getLastCommitDate(self):
  528. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  529. def getLinesInBlob(self, sha1):
  530. try:
  531. res = self.cache['lines_in_blob'][sha1]
  532. except:
  533. res = int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
  534. if 'lines_in_blob' not in self.cache:
  535. self.cache['lines_in_blob'] = {}
  536. self.cache['lines_in_blob'][sha1] = res
  537. return res
  538. def getTags(self):
  539. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  540. return lines.split('\n')
  541. def getTagDate(self, tag):
  542. return self.revToDate('tags/' + tag)
  543. def getTotalAuthors(self):
  544. return self.total_authors
  545. def getTotalCommits(self):
  546. return self.total_commits
  547. def getTotalFiles(self):
  548. return self.total_files
  549. def getTotalLOC(self):
  550. return self.total_lines
  551. def getTotalSize(self):
  552. return self.total_size
  553. def revToDate(self, rev):
  554. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  555. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  556. class ReportCreator:
  557. """Creates the actual report based on given data."""
  558. def __init__(self):
  559. pass
  560. def create(self, data, path):
  561. self.data = data
  562. self.path = path
  563. def html_linkify(text):
  564. return text.lower().replace(' ', '_')
  565. def html_header(level, text):
  566. name = html_linkify(text)
  567. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  568. class HTMLReportCreator(ReportCreator):
  569. def create(self, data, path):
  570. ReportCreator.create(self, data, path)
  571. self.title = data.projectname
  572. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  573. binarypath = os.path.dirname(os.path.abspath(__file__))
  574. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  575. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  576. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  577. for base in basedirs:
  578. src = base + '/' + file
  579. if os.path.exists(src):
  580. shutil.copyfile(src, path + '/' + file)
  581. break
  582. else:
  583. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  584. f = open(path + "/index.html", 'w')
  585. format = '%Y-%m-%d %H:%M:%S'
  586. self.printHeader(f)
  587. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  588. self.printNav(f)
  589. f.write('<dl>')
  590. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  591. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  592. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
  593. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  594. f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
  595. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  596. f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
  597. f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
  598. f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
  599. f.write('</dl>')
  600. f.write('</body>\n</html>')
  601. f.close()
  602. ###
  603. # Activity
  604. f = open(path + '/activity.html', 'w')
  605. self.printHeader(f)
  606. f.write('<h1>Activity</h1>')
  607. self.printNav(f)
  608. #f.write('<h2>Last 30 days</h2>')
  609. #f.write('<h2>Last 12 months</h2>')
  610. # Weekly activity
  611. WEEKS = 32
  612. f.write(html_header(2, 'Weekly activity'))
  613. f.write('<p>Last %d weeks</p>' % WEEKS)
  614. # generate weeks to show (previous N weeks from now)
  615. now = datetime.datetime.now()
  616. deltaweek = datetime.timedelta(7)
  617. weeks = []
  618. stampcur = now
  619. for i in range(0, WEEKS):
  620. weeks.insert(0, stampcur.strftime('%Y-%W'))
  621. stampcur -= deltaweek
  622. # top row: commits & bar
  623. f.write('<table class="noborders"><tr>')
  624. for i in range(0, WEEKS):
  625. commits = 0
  626. if weeks[i] in data.activity_by_year_week:
  627. commits = data.activity_by_year_week[weeks[i]]
  628. percentage = 0
  629. if weeks[i] in data.activity_by_year_week:
  630. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  631. height = max(1, int(200 * percentage))
  632. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  633. # bottom row: year/week
  634. f.write('</tr><tr>')
  635. for i in range(0, WEEKS):
  636. f.write('<td>%s</td>' % (WEEKS - i))
  637. f.write('</tr></table>')
  638. # Hour of Day
  639. f.write(html_header(2, 'Hour of Day'))
  640. hour_of_day = data.getActivityByHourOfDay()
  641. f.write('<table><tr><th>Hour</th>')
  642. for i in range(0, 24):
  643. f.write('<th>%d</th>' % i)
  644. f.write('</tr>\n<tr><th>Commits</th>')
  645. fp = open(path + '/hour_of_day.dat', 'w')
  646. for i in range(0, 24):
  647. if i in hour_of_day:
  648. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  649. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  650. fp.write('%d %d\n' % (i, hour_of_day[i]))
  651. else:
  652. f.write('<td>0</td>')
  653. fp.write('%d 0\n' % i)
  654. fp.close()
  655. f.write('</tr>\n<tr><th>%</th>')
  656. totalcommits = data.getTotalCommits()
  657. for i in range(0, 24):
  658. if i in hour_of_day:
  659. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  660. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  661. else:
  662. f.write('<td>0.00</td>')
  663. f.write('</tr></table>')
  664. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  665. fg = open(path + '/hour_of_day.dat', 'w')
  666. for i in range(0, 24):
  667. if i in hour_of_day:
  668. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  669. else:
  670. fg.write('%d 0\n' % (i + 1))
  671. fg.close()
  672. # Day of Week
  673. f.write(html_header(2, 'Day of Week'))
  674. day_of_week = data.getActivityByDayOfWeek()
  675. f.write('<div class="vtable"><table>')
  676. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  677. fp = open(path + '/day_of_week.dat', 'w')
  678. for d in range(0, 7):
  679. commits = 0
  680. if d in day_of_week:
  681. commits = day_of_week[d]
  682. fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
  683. f.write('<tr>')
  684. f.write('<th>%s</th>' % (WEEKDAYS[d]))
  685. if d in day_of_week:
  686. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  687. else:
  688. f.write('<td>0</td>')
  689. f.write('</tr>')
  690. f.write('</table></div>')
  691. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  692. fp.close()
  693. # Hour of Week
  694. f.write(html_header(2, 'Hour of Week'))
  695. f.write('<table>')
  696. f.write('<tr><th>Weekday</th>')
  697. for hour in range(0, 24):
  698. f.write('<th>%d</th>' % (hour))
  699. f.write('</tr>')
  700. for weekday in range(0, 7):
  701. f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
  702. for hour in range(0, 24):
  703. try:
  704. commits = data.activity_by_hour_of_week[weekday][hour]
  705. except KeyError:
  706. commits = 0
  707. if commits != 0:
  708. f.write('<td')
  709. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  710. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  711. f.write('>%d</td>' % commits)
  712. else:
  713. f.write('<td></td>')
  714. f.write('</tr>')
  715. f.write('</table>')
  716. # Month of Year
  717. f.write(html_header(2, 'Month of Year'))
  718. f.write('<div class="vtable"><table>')
  719. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  720. fp = open (path + '/month_of_year.dat', 'w')
  721. for mm in range(1, 13):
  722. commits = 0
  723. if mm in data.activity_by_month_of_year:
  724. commits = data.activity_by_month_of_year[mm]
  725. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  726. fp.write('%d %d\n' % (mm, commits))
  727. fp.close()
  728. f.write('</table></div>')
  729. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  730. # Commits by year/month
  731. f.write(html_header(2, 'Commits by year/month'))
  732. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
  733. for yymm in reversed(sorted(data.commits_by_month.keys())):
  734. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
  735. f.write('</table></div>')
  736. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  737. fg = open(path + '/commits_by_year_month.dat', 'w')
  738. for yymm in sorted(data.commits_by_month.keys()):
  739. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  740. fg.close()
  741. # Commits by year
  742. f.write(html_header(2, 'Commits by Year'))
  743. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
  744. for yy in reversed(sorted(data.commits_by_year.keys())):
  745. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, data.commits_by_year.get(yy,0), (100.0 * data.commits_by_year.get(yy,0)) / data.getTotalCommits(), data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
  746. f.write('</table></div>')
  747. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  748. fg = open(path + '/commits_by_year.dat', 'w')
  749. for yy in sorted(data.commits_by_year.keys()):
  750. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  751. fg.close()
  752. # Commits by timezone
  753. f.write(html_header(2, 'Commits by Timezone'))
  754. f.write('<table><tr>')
  755. f.write('<th>Timezone</th><th>Commits</th>')
  756. max_commits_on_tz = max(data.commits_by_timezone.values())
  757. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  758. commits = data.commits_by_timezone[i]
  759. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  760. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  761. f.write('</tr></table>')
  762. f.write('</body></html>')
  763. f.close()
  764. ###
  765. # Authors
  766. f = open(path + '/authors.html', 'w')
  767. self.printHeader(f)
  768. f.write('<h1>Authors</h1>')
  769. self.printNav(f)
  770. # Authors :: List of authors
  771. f.write(html_header(2, 'List of Authors'))
  772. f.write('<table class="authors sortable" id="authors">')
  773. f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  774. for author in data.getAuthors(conf['max_authors']):
  775. info = data.getAuthorInfo(author)
  776. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
  777. f.write('</table>')
  778. allauthors = data.getAuthors()
  779. if len(allauthors) > conf['max_authors']:
  780. rest = allauthors[conf['max_authors']:]
  781. f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
  782. f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
  783. f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author" />')
  784. if len(allauthors) > conf['max_authors']:
  785. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  786. f.write(html_header(2, 'Commits per Author'))
  787. f.write('<img src="commits_by_author.png" alt="Commits per Author" />')
  788. if len(allauthors) > conf['max_authors']:
  789. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  790. fgl = open(path + '/lines_of_code_by_author.dat', 'w')
  791. fgc = open(path + '/commits_by_author.dat', 'w')
  792. lines_by_authors = {} # cumulated added lines by
  793. # author. to save memory,
  794. # changes_by_date_by_author[stamp][author] is defined
  795. # only at points where author commits.
  796. # lines_by_authors allows us to generate all the
  797. # points in the .dat file.
  798. # Don't rely on getAuthors to give the same order each
  799. # time. Be robust and keep the list in a variable.
  800. commits_by_authors = {} # cumulated added lines by
  801. self.authors_to_plot = data.getAuthors(conf['max_authors'])
  802. for author in self.authors_to_plot:
  803. lines_by_authors[author] = 0
  804. commits_by_authors[author] = 0
  805. for stamp in sorted(data.changes_by_date_by_author.keys()):
  806. fgl.write('%d' % stamp)
  807. fgc.write('%d' % stamp)
  808. for author in self.authors_to_plot:
  809. if author in data.changes_by_date_by_author[stamp].keys():
  810. lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
  811. commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
  812. fgl.write(' %d' % lines_by_authors[author])
  813. fgc.write(' %d' % commits_by_authors[author])
  814. fgl.write('\n')
  815. fgc.write('\n')
  816. fgl.close()
  817. fgc.close()
  818. # Authors :: Author of Month
  819. f.write(html_header(2, 'Author of Month'))
  820. f.write('<table class="sortable" id="aom">')
  821. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  822. for yymm in reversed(sorted(data.author_of_month.keys())):
  823. authordict = data.author_of_month[yymm]
  824. authors = getkeyssortedbyvalues(authordict)
  825. authors.reverse()
  826. commits = data.author_of_month[yymm][authors[0]]
  827. next = ', '.join(authors[1:conf['authors_top']+1])
  828. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))
  829. f.write('</table>')
  830. f.write(html_header(2, 'Author of Year'))
  831. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  832. for yy in reversed(sorted(data.author_of_year.keys())):
  833. authordict = data.author_of_year[yy]
  834. authors = getkeyssortedbyvalues(authordict)
  835. authors.reverse()
  836. commits = data.author_of_year[yy][authors[0]]
  837. next = ', '.join(authors[1:conf['authors_top']+1])
  838. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
  839. f.write('</table>')
  840. # Domains
  841. f.write(html_header(2, 'Commits by Domains'))
  842. domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
  843. domains_by_commits.reverse() # most first
  844. f.write('<div class="vtable"><table>')
  845. f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
  846. fp = open(path + '/domains.dat', 'w')
  847. n = 0
  848. for domain in domains_by_commits:
  849. if n == conf['max_domains']:
  850. break
  851. commits = 0
  852. n += 1
  853. info = data.getDomainInfo(domain)
  854. fp.write('%s %d %d\n' % (domain, n , info['commits']))
  855. f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], (100.0 * info['commits'] / totalcommits)))
  856. f.write('</table></div>')
  857. f.write('<img src="domains.png" alt="Commits by Domains" />')
  858. fp.close()
  859. f.write('</body></html>')
  860. f.close()
  861. ###
  862. # Files
  863. f = open(path + '/files.html', 'w')
  864. self.printHeader(f)
  865. f.write('<h1>Files</h1>')
  866. self.printNav(f)
  867. f.write('<dl>\n')
  868. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  869. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  870. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % (float(data.getTotalSize()) / data.getTotalFiles()))
  871. f.write('</dl>\n')
  872. # Files :: File count by date
  873. f.write(html_header(2, 'File count by date'))
  874. # use set to get rid of duplicate/unnecessary entries
  875. files_by_date = set()
  876. for stamp in sorted(data.files_by_stamp.keys()):
  877. files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  878. fg = open(path + '/files_by_date.dat', 'w')
  879. for line in sorted(list(files_by_date)):
  880. fg.write('%s\n' % line)
  881. #for stamp in sorted(data.files_by_stamp.keys()):
  882. # fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  883. fg.close()
  884. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  885. #f.write('<h2>Average file size by date</h2>')
  886. # Files :: Extensions
  887. f.write(html_header(2, 'Extensions'))
  888. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  889. for ext in sorted(data.extensions.keys()):
  890. files = data.extensions[ext]['files']
  891. lines = data.extensions[ext]['lines']
  892. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  893. f.write('</table>')
  894. f.write('</body></html>')
  895. f.close()
  896. ###
  897. # Lines
  898. f = open(path + '/lines.html', 'w')
  899. self.printHeader(f)
  900. f.write('<h1>Lines</h1>')
  901. self.printNav(f)
  902. f.write('<dl>\n')
  903. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  904. f.write('</dl>\n')
  905. f.write(html_header(2, 'Lines of Code'))
  906. f.write('<img src="lines_of_code.png" />')
  907. fg = open(path + '/lines_of_code.dat', 'w')
  908. for stamp in sorted(data.changes_by_date.keys()):
  909. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  910. fg.close()
  911. f.write('</body></html>')
  912. f.close()
  913. ###
  914. # tags.html
  915. f = open(path + '/tags.html', 'w')
  916. self.printHeader(f)
  917. f.write('<h1>Tags</h1>')
  918. self.printNav(f)
  919. f.write('<dl>')
  920. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  921. if len(data.tags) > 0:
  922. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  923. f.write('</dl>')
  924. f.write('<table class="tags">')
  925. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  926. # sort the tags by date desc
  927. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  928. for tag in tags_sorted_by_date_desc:
  929. authorinfo = []
  930. self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  931. for i in reversed(self.authors_by_commits):
  932. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  933. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  934. f.write('</table>')
  935. f.write('</body></html>')
  936. f.close()
  937. self.createGraphs(path)
  938. def createGraphs(self, path):
  939. print 'Generating graphs...'
  940. # hour of day
  941. f = open(path + '/hour_of_day.plot', 'w')
  942. f.write(GNUPLOT_COMMON)
  943. f.write(
  944. """
  945. set output 'hour_of_day.png'
  946. unset key
  947. set xrange [0.5:24.5]
  948. set xtics 4
  949. set grid y
  950. set ylabel "Commits"
  951. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  952. """)
  953. f.close()
  954. # day of week
  955. f = open(path + '/day_of_week.plot', 'w')
  956. f.write(GNUPLOT_COMMON)
  957. f.write(
  958. """
  959. set output 'day_of_week.png'
  960. unset key
  961. set xrange [0.5:7.5]
  962. set xtics 1
  963. set grid y
  964. set ylabel "Commits"
  965. plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
  966. """)
  967. f.close()
  968. # Domains
  969. f = open(path + '/domains.plot', 'w')
  970. f.write(GNUPLOT_COMMON)
  971. f.write(
  972. """
  973. set output 'domains.png'
  974. unset key
  975. unset xtics
  976. set yrange [0:]
  977. set grid y
  978. set ylabel "Commits"
  979. plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
  980. """)
  981. f.close()
  982. # Month of Year
  983. f = open(path + '/month_of_year.plot', 'w')
  984. f.write(GNUPLOT_COMMON)
  985. f.write(
  986. """
  987. set output 'month_of_year.png'
  988. unset key
  989. set xrange [0.5:12.5]
  990. set xtics 1
  991. set grid y
  992. set ylabel "Commits"
  993. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  994. """)
  995. f.close()
  996. # commits_by_year_month
  997. f = open(path + '/commits_by_year_month.plot', 'w')
  998. f.write(GNUPLOT_COMMON)
  999. f.write(
  1000. """
  1001. set output 'commits_by_year_month.png'
  1002. unset key
  1003. set xdata time
  1004. set timefmt "%Y-%m"
  1005. set format x "%Y-%m"
  1006. set xtics rotate
  1007. set bmargin 5
  1008. set grid y
  1009. set ylabel "Commits"
  1010. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  1011. """)
  1012. f.close()
  1013. # commits_by_year
  1014. f = open(path + '/commits_by_year.plot', 'w')
  1015. f.write(GNUPLOT_COMMON)
  1016. f.write(
  1017. """
  1018. set output 'commits_by_year.png'
  1019. unset key
  1020. set xtics 1 rotate
  1021. set grid y
  1022. set ylabel "Commits"
  1023. set yrange [0:]
  1024. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  1025. """)
  1026. f.close()
  1027. # Files by date
  1028. f = open(path + '/files_by_date.plot', 'w')
  1029. f.write(GNUPLOT_COMMON)
  1030. f.write(
  1031. """
  1032. set output 'files_by_date.png'
  1033. unset key
  1034. set xdata time
  1035. set timefmt "%Y-%m-%d"
  1036. set format x "%Y-%m-%d"
  1037. set grid y
  1038. set ylabel "Files"
  1039. set xtics rotate
  1040. set ytics autofreq
  1041. set bmargin 6
  1042. plot 'files_by_date.dat' using 1:2 w steps
  1043. """)
  1044. f.close()
  1045. # Lines of Code
  1046. f = open(path + '/lines_of_code.plot', 'w')
  1047. f.write(GNUPLOT_COMMON)
  1048. f.write(
  1049. """
  1050. set output 'lines_of_code.png'
  1051. unset key
  1052. set xdata time
  1053. set timefmt "%s"
  1054. set format x "%Y-%m-%d"
  1055. set grid y
  1056. set ylabel "Lines"
  1057. set xtics rotate
  1058. set bmargin 6
  1059. plot 'lines_of_code.dat' using 1:2 w lines
  1060. """)
  1061. f.close()
  1062. # Lines of Code Added per author
  1063. f = open(path + '/lines_of_code_by_author.plot', 'w')
  1064. f.write(GNUPLOT_COMMON)
  1065. f.write(
  1066. """
  1067. set terminal png transparent size 640,480
  1068. set output 'lines_of_code_by_author.png'
  1069. set key left top
  1070. set xdata time
  1071. set timefmt "%s"
  1072. set format x "%Y-%m-%d"
  1073. set grid y
  1074. set ylabel "Lines"
  1075. set xtics rotate
  1076. set bmargin 6
  1077. plot """
  1078. )
  1079. i = 1
  1080. plots = []
  1081. for a in self.authors_to_plot:
  1082. i = i + 1
  1083. plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, a.replace("\"", "\\\"")))
  1084. f.write(", ".join(plots))
  1085. f.write('\n')
  1086. f.close()
  1087. # Commits per author
  1088. f = open(path + '/commits_by_author.plot', 'w')
  1089. f.write(GNUPLOT_COMMON)
  1090. f.write(
  1091. """
  1092. set terminal png transparent size 640,480
  1093. set output 'commits_by_author.png'
  1094. set key left top
  1095. set xdata time
  1096. set timefmt "%s"
  1097. set format x "%Y-%m-%d"
  1098. set grid y
  1099. set ylabel "Commits"
  1100. set xtics rotate
  1101. set bmargin 6
  1102. plot """
  1103. )
  1104. i = 1
  1105. plots = []
  1106. for a in self.authors_to_plot:
  1107. i = i + 1
  1108. plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, a.replace("\"", "\\\"")))
  1109. f.write(", ".join(plots))
  1110. f.write('\n')
  1111. f.close()
  1112. os.chdir(path)
  1113. files = glob.glob(path + '/*.plot')
  1114. for f in files:
  1115. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  1116. if len(out) > 0:
  1117. print out
  1118. def printHeader(self, f, title = ''):
  1119. f.write(
  1120. """<?xml version="1.0" encoding="UTF-8"?>
  1121. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  1122. <html xmlns="http://www.w3.org/1999/xhtml">
  1123. <head>
  1124. <title>GitStats - %s</title>
  1125. <link rel="stylesheet" href="%s" type="text/css" />
  1126. <meta name="generator" content="GitStats %s" />
  1127. <script type="text/javascript" src="sortable.js"></script>
  1128. </head>
  1129. <body>
  1130. """ % (self.title, conf['style'], getversion()))
  1131. def printNav(self, f):
  1132. f.write("""
  1133. <div class="nav">
  1134. <ul>
  1135. <li><a href="index.html">General</a></li>
  1136. <li><a href="activity.html">Activity</a></li>
  1137. <li><a href="authors.html">Authors</a></li>
  1138. <li><a href="files.html">Files</a></li>
  1139. <li><a href="lines.html">Lines</a></li>
  1140. <li><a href="tags.html">Tags</a></li>
  1141. </ul>
  1142. </div>
  1143. """)
  1144. class GitStats:
  1145. def run(self, args_orig):
  1146. optlist, args = getopt.getopt(args_orig, 'c:')
  1147. for o,v in optlist:
  1148. if o == '-c':
  1149. key, value = v.split('=', 1)
  1150. if key not in conf:
  1151. raise KeyError('no such key "%s" in config' % key)
  1152. if isinstance(conf[key], int):
  1153. conf[key] = int(value)
  1154. else:
  1155. conf[key] = value
  1156. if len(args) < 2:
  1157. print """
  1158. Usage: gitstats [options] <gitpath..> <outputpath>
  1159. Options:
  1160. -c key=value Override configuration value
  1161. Default config values:
  1162. %s
  1163. """ % conf
  1164. sys.exit(0)
  1165. outputpath = os.path.abspath(args[-1])
  1166. rundir = os.getcwd()
  1167. try:
  1168. os.makedirs(outputpath)
  1169. except OSError:
  1170. pass
  1171. if not os.path.isdir(outputpath):
  1172. print 'FATAL: Output path is not a directory or does not exist'
  1173. sys.exit(1)
  1174. print 'Output path: %s' % outputpath
  1175. cachefile = os.path.join(outputpath, 'gitstats.cache')
  1176. data = GitDataCollector()
  1177. data.loadCache(cachefile)
  1178. for gitpath in args[0:-1]:
  1179. print 'Git path: %s' % gitpath
  1180. os.chdir(gitpath)
  1181. print 'Collecting data...'
  1182. data.collect(gitpath)
  1183. print 'Refining data...'
  1184. data.saveCache(cachefile)
  1185. data.refine()
  1186. os.chdir(rundir)
  1187. print 'Generating report...'
  1188. report = HTMLReportCreator()
  1189. report.create(data, outputpath)
  1190. time_end = time.time()
  1191. exectime_internal = time_end - time_start
  1192. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)
  1193. if sys.stdin.isatty():
  1194. print 'You may now run:'
  1195. print
  1196. print ' sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''")
  1197. print
  1198. if __name__=='__main__':
  1199. g = GitStats()
  1200. g.run(sys.argv[1:])