123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491
  1. #!/usr/bin/env python2
  2. # Copyright (c) 2007-2014 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/AUTHOR)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import getopt
  6. import glob
  7. import os
  8. import pickle
  9. import platform
  10. import re
  11. import shutil
  12. import subprocess
  13. import sys
  14. import time
  15. import zlib
  16. if sys.version_info < (2, 6):
  17. print >> sys.stderr, "Python 2.6 or higher is required for gitstats"
  18. sys.exit(1)
  19. from multiprocessing import Pool
  20. os.environ['LC_ALL'] = 'C'
  21. GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
  22. ON_LINUX = (platform.system() == 'Linux')
  23. WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
  24. exectime_internal = 0.0
  25. exectime_external = 0.0
  26. time_start = time.time()
  27. # By default, gnuplot is searched from path, but can be overridden with the
  28. # environment variable "GNUPLOT"
  29. gnuplot_cmd = 'gnuplot'
  30. if 'GNUPLOT' in os.environ:
  31. gnuplot_cmd = os.environ['GNUPLOT']
  32. conf = {
  33. 'max_domains': 10,
  34. 'max_ext_length': 10,
  35. 'style': 'gitstats.css',
  36. 'max_authors': 20,
  37. 'authors_top': 5,
  38. 'commit_begin': '',
  39. 'commit_end': 'HEAD',
  40. 'linear_linestats': 1,
  41. 'project_name': '',
  42. 'processes': 8,
  43. 'start_date': ''
  44. }
  45. def getpipeoutput(cmds, quiet = False):
  46. global exectime_external
  47. start = time.time()
  48. if not quiet and ON_LINUX and os.isatty(1):
  49. print '>> ' + ' | '.join(cmds),
  50. sys.stdout.flush()
  51. p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  52. processes=[p]
  53. for x in cmds[1:]:
  54. p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True)
  55. processes.append(p)
  56. output = p.communicate()[0]
  57. for p in processes:
  58. p.wait()
  59. end = time.time()
  60. if not quiet:
  61. if ON_LINUX and os.isatty(1):
  62. print '\r',
  63. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  64. exectime_external += (end - start)
  65. return output.rstrip('\n')
  66. def getlogrange(defaultrange = 'HEAD', end_only = False):
  67. commit_range = getcommitrange(defaultrange, end_only)
  68. if len(conf['start_date']) > 0:
  69. return '--since="%s" "%s"' % (conf['start_date'], commit_range)
  70. return commit_range
  71. def getcommitrange(defaultrange = 'HEAD', end_only = False):
  72. if len(conf['commit_end']) > 0:
  73. if end_only or len(conf['commit_begin']) == 0:
  74. return conf['commit_end']
  75. return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
  76. return defaultrange
  77. def getkeyssortedbyvalues(dict):
  78. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  79. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  80. def getkeyssortedbyvaluekey(d, key):
  81. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  82. def getstatsummarycounts(line):
  83. numbers = re.findall('\d+', line)
  84. if len(numbers) == 1:
  85. # neither insertions nor deletions: may probably only happen for "0 files changed"
  86. numbers.append(0);
  87. numbers.append(0);
  88. elif len(numbers) == 2 and line.find('(+)') != -1:
  89. numbers.append(0); # only insertions were printed on line
  90. elif len(numbers) == 2 and line.find('(-)') != -1:
  91. numbers.insert(1, 0); # only deletions were printed on line
  92. return numbers
  93. VERSION = 0
  94. def getversion():
  95. global VERSION
  96. if VERSION == 0:
  97. gitstats_repo = os.path.dirname(os.path.abspath(__file__))
  98. VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" %
  99. (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])])
  100. return VERSION
  101. def getgitversion():
  102. return getpipeoutput(['git --version']).split('\n')[0]
  103. def getgnuplotversion():
  104. return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
  105. def getnumoffilesfromrev(time_rev):
  106. """
  107. Get number of files changed in commit
  108. """
  109. time, rev = time_rev
  110. return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))
  111. def getnumoflinesinblob(ext_blob):
  112. """
  113. Get number of lines in blob
  114. """
  115. ext, blob_id = ext_blob
  116. return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0]))
  117. class DataCollector:
  118. """Manages data collection from a revision control repository."""
  119. def __init__(self):
  120. self.stamp_created = time.time()
  121. self.cache = {}
  122. self.total_authors = 0
  123. self.activity_by_hour_of_day = {} # hour -> commits
  124. self.activity_by_day_of_week = {} # day -> commits
  125. self.activity_by_month_of_year = {} # month [1-12] -> commits
  126. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  127. self.activity_by_hour_of_day_busiest = 0
  128. self.activity_by_hour_of_week_busiest = 0
  129. self.activity_by_year_week = {} # yy_wNN -> commits
  130. self.activity_by_year_week_peak = 0
  131. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
  132. self.total_commits = 0
  133. self.total_files = 0
  134. self.authors_by_commits = 0
  135. # domains
  136. self.domains = {} # domain -> commits
  137. # author of the month
  138. self.author_of_month = {} # month -> author -> commits
  139. self.author_of_year = {} # year -> author -> commits
  140. self.commits_by_month = {} # month -> commits
  141. self.commits_by_year = {} # year -> commits
  142. self.lines_added_by_month = {} # month -> lines added
  143. self.lines_added_by_year = {} # year -> lines added
  144. self.lines_removed_by_month = {} # month -> lines removed
  145. self.lines_removed_by_year = {} # year -> lines removed
  146. self.first_commit_stamp = 0
  147. self.last_commit_stamp = 0
  148. self.last_active_day = None
  149. self.active_days = set()
  150. # lines
  151. self.total_lines = 0
  152. self.total_lines_added = 0
  153. self.total_lines_removed = 0
  154. # size
  155. self.total_size = 0
  156. # timezone
  157. self.commits_by_timezone = {} # timezone -> commits
  158. # tags
  159. self.tags = {}
  160. self.files_by_stamp = {} # stamp -> files
  161. # extensions
  162. self.extensions = {} # extension -> files, lines
  163. # line statistics
  164. self.changes_by_date = {} # stamp -> { files, ins, del }
  165. ##
  166. # This should be the main function to extract data from the repository.
  167. def collect(self, dir):
  168. self.dir = dir
  169. if len(conf['project_name']) == 0:
  170. self.projectname = os.path.basename(os.path.abspath(dir))
  171. else:
  172. self.projectname = conf['project_name']
  173. ##
  174. # Load cacheable data
  175. def loadCache(self, cachefile):
  176. if not os.path.exists(cachefile):
  177. return
  178. print 'Loading cache...'
  179. f = open(cachefile, 'rb')
  180. try:
  181. self.cache = pickle.loads(zlib.decompress(f.read()))
  182. except:
  183. # temporary hack to upgrade non-compressed caches
  184. f.seek(0)
  185. self.cache = pickle.load(f)
  186. f.close()
  187. ##
  188. # Produce any additional statistics from the extracted data.
  189. def refine(self):
  190. pass
  191. ##
  192. # : get a dictionary of author
  193. def getAuthorInfo(self, author):
  194. return None
  195. def getActivityByDayOfWeek(self):
  196. return {}
  197. def getActivityByHourOfDay(self):
  198. return {}
  199. # : get a dictionary of domains
  200. def getDomainInfo(self, domain):
  201. return None
  202. ##
  203. # Get a list of authors
  204. def getAuthors(self):
  205. return []
  206. def getFirstCommitDate(self):
  207. return datetime.datetime.now()
  208. def getLastCommitDate(self):
  209. return datetime.datetime.now()
  210. def getStampCreated(self):
  211. return self.stamp_created
  212. def getTags(self):
  213. return []
  214. def getTotalAuthors(self):
  215. return -1
  216. def getTotalCommits(self):
  217. return -1
  218. def getTotalFiles(self):
  219. return -1
  220. def getTotalLOC(self):
  221. return -1
  222. ##
  223. # Save cacheable data
  224. def saveCache(self, cachefile):
  225. print 'Saving cache...'
  226. tempfile = cachefile + '.tmp'
  227. f = open(tempfile, 'wb')
  228. #pickle.dump(self.cache, f)
  229. data = zlib.compress(pickle.dumps(self.cache))
  230. f.write(data)
  231. f.close()
  232. try:
  233. os.remove(cachefile)
  234. except OSError:
  235. pass
  236. os.rename(tempfile, cachefile)
  237. class GitDataCollector(DataCollector):
  238. def collect(self, dir):
  239. DataCollector.collect(self, dir)
  240. self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
  241. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  242. # tags
  243. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  244. for line in lines:
  245. if len(line) == 0:
  246. continue
  247. (hash, tag) = line.split(' ')
  248. tag = tag.replace('refs/tags/', '')
  249. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
  250. if len(output) > 0:
  251. parts = output.split(' ')
  252. stamp = 0
  253. try:
  254. stamp = int(parts[0])
  255. except ValueError:
  256. stamp = 0
  257. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  258. # collect info on tags, starting from latest
  259. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items()))))
  260. prev = None
  261. for tag in reversed(tags_sorted_by_date_desc):
  262. cmd = 'git shortlog -s "%s"' % tag
  263. if prev != None:
  264. cmd += ' "^%s"' % prev
  265. output = getpipeoutput([cmd])
  266. if len(output) == 0:
  267. continue
  268. prev = tag
  269. for line in output.split('\n'):
  270. parts = re.split('\s+', line, 2)
  271. commits = int(parts[1])
  272. author = parts[2]
  273. self.tags[tag]['commits'] += commits
  274. self.tags[tag]['authors'][author] = commits
  275. # Collect revision statistics
  276. # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
  277. lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
  278. for line in lines:
  279. parts = line.split(' ', 4)
  280. author = ''
  281. try:
  282. stamp = int(parts[0])
  283. except ValueError:
  284. stamp = 0
  285. timezone = parts[3]
  286. author, mail = parts[4].split('<', 1)
  287. author = author.rstrip()
  288. mail = mail.rstrip('>')
  289. domain = '?'
  290. if mail.find('@') != -1:
  291. domain = mail.rsplit('@', 1)[1]
  292. date = datetime.datetime.fromtimestamp(float(stamp))
  293. # First and last commit stamp (may be in any order because of cherry-picking and patches)
  294. if stamp > self.last_commit_stamp:
  295. self.last_commit_stamp = stamp
  296. if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
  297. self.first_commit_stamp = stamp
  298. # activity
  299. # hour
  300. hour = date.hour
  301. self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
  302. # most active hour?
  303. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  304. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  305. # day of week
  306. day = date.weekday()
  307. self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1
  308. # domain stats
  309. if domain not in self.domains:
  310. self.domains[domain] = {}
  311. # commits
  312. self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1
  313. # hour of week
  314. if day not in self.activity_by_hour_of_week:
  315. self.activity_by_hour_of_week[day] = {}
  316. self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
  317. # most active hour?
  318. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  319. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  320. # month of year
  321. month = date.month
  322. self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1
  323. # yearly/weekly activity
  324. yyw = date.strftime('%Y-%W')
  325. self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
  326. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  327. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  328. # author stats
  329. if author not in self.authors:
  330. self.authors[author] = {}
  331. # commits, note again that commits may be in any date order because of cherry-picking and patches
  332. if 'last_commit_stamp' not in self.authors[author]:
  333. self.authors[author]['last_commit_stamp'] = stamp
  334. if stamp > self.authors[author]['last_commit_stamp']:
  335. self.authors[author]['last_commit_stamp'] = stamp
  336. if 'first_commit_stamp' not in self.authors[author]:
  337. self.authors[author]['first_commit_stamp'] = stamp
  338. if stamp < self.authors[author]['first_commit_stamp']:
  339. self.authors[author]['first_commit_stamp'] = stamp
  340. # author of the month/year
  341. yymm = date.strftime('%Y-%m')
  342. if yymm in self.author_of_month:
  343. self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
  344. else:
  345. self.author_of_month[yymm] = {}
  346. self.author_of_month[yymm][author] = 1
  347. self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1
  348. yy = date.year
  349. if yy in self.author_of_year:
  350. self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
  351. else:
  352. self.author_of_year[yy] = {}
  353. self.author_of_year[yy][author] = 1
  354. self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1
  355. # authors: active days
  356. yymmdd = date.strftime('%Y-%m-%d')
  357. if 'last_active_day' not in self.authors[author]:
  358. self.authors[author]['last_active_day'] = yymmdd
  359. self.authors[author]['active_days'] = set([yymmdd])
  360. elif yymmdd != self.authors[author]['last_active_day']:
  361. self.authors[author]['last_active_day'] = yymmdd
  362. self.authors[author]['active_days'].add(yymmdd)
  363. # project: active days
  364. if yymmdd != self.last_active_day:
  365. self.last_active_day = yymmdd
  366. self.active_days.add(yymmdd)
  367. # timezone
  368. self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
  369. # outputs "<stamp> <files>" for each revision
  370. revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
  371. lines = []
  372. revs_to_read = []
  373. time_rev_count = []
  374. #Look up rev in cache and take info from cache if found
  375. #If not append rev to list of rev to read from repo
  376. for revline in revlines:
  377. time, rev = revline.split(' ')
  378. #if cache empty then add time and rev to list of new rev's
  379. #otherwise try to read needed info from cache
  380. if 'files_in_tree' not in self.cache.keys():
  381. revs_to_read.append((time,rev))
  382. continue
  383. if rev in self.cache['files_in_tree'].keys():
  384. lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
  385. else:
  386. revs_to_read.append((time,rev))
  387. #Read revisions from repo
  388. pool = Pool(processes=conf['processes'])
  389. time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
  390. pool.terminate()
  391. pool.join()
  392. #Update cache with new revisions and append then to general list
  393. for (time, rev, count) in time_rev_count:
  394. if 'files_in_tree' not in self.cache:
  395. self.cache['files_in_tree'] = {}
  396. self.cache['files_in_tree'][rev] = count
  397. lines.append('%d %d' % (int(time), count))
  398. self.total_commits += len(lines)
  399. for line in lines:
  400. parts = line.split(' ')
  401. if len(parts) != 2:
  402. continue
  403. (stamp, files) = parts[0:2]
  404. try:
  405. self.files_by_stamp[int(stamp)] = int(files)
  406. except ValueError:
  407. print 'Warning: failed to parse line "%s"' % line
  408. # extensions and size of files
  409. lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
  410. blobs_to_read = []
  411. for line in lines:
  412. if len(line) == 0:
  413. continue
  414. parts = re.split('\s+', line, 4)
  415. if parts[0] == '160000' and parts[3] == '-':
  416. # skip submodules
  417. continue
  418. blob_id = parts[2]
  419. size = int(parts[3])
  420. fullpath = parts[4]
  421. self.total_size += size
  422. self.total_files += 1
  423. filename = fullpath.split('/')[-1] # strip directories
  424. if filename.find('.') == -1 or filename.rfind('.') == 0:
  425. ext = ''
  426. else:
  427. ext = filename[(filename.rfind('.') + 1):]
  428. if len(ext) > conf['max_ext_length']:
  429. ext = ''
  430. if ext not in self.extensions:
  431. self.extensions[ext] = {'files': 0, 'lines': 0}
  432. self.extensions[ext]['files'] += 1
  433. #if cache empty then add ext and blob id to list of new blob's
  434. #otherwise try to read needed info from cache
  435. if 'lines_in_blob' not in self.cache.keys():
  436. blobs_to_read.append((ext,blob_id))
  437. continue
  438. if blob_id in self.cache['lines_in_blob'].keys():
  439. self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
  440. else:
  441. blobs_to_read.append((ext,blob_id))
  442. #Get info abount line count for new blob's that wasn't found in cache
  443. pool = Pool(processes=conf['processes'])
  444. ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
  445. pool.terminate()
  446. pool.join()
  447. #Update cache and write down info about number of number of lines
  448. for (ext, blob_id, linecount) in ext_blob_linecount:
  449. if 'lines_in_blob' not in self.cache:
  450. self.cache['lines_in_blob'] = {}
  451. self.cache['lines_in_blob'][blob_id] = linecount
  452. self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
  453. # line statistics
  454. # outputs:
  455. # N files changed, N insertions (+), N deletions(-)
  456. # <stamp> <author>
  457. self.changes_by_date = {} # stamp -> { files, ins, del }
  458. # computation of lines of code by date is better done
  459. # on a linear history.
  460. extra = ''
  461. if conf['linear_linestats']:
  462. extra = '--first-parent -m'
  463. lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
  464. lines.reverse()
  465. files = 0; inserted = 0; deleted = 0; total_lines = 0
  466. author = None
  467. for line in lines:
  468. if len(line) == 0:
  469. continue
  470. # <stamp> <author>
  471. if re.search('files? changed', line) == None:
  472. pos = line.find(' ')
  473. if pos != -1:
  474. try:
  475. (stamp, author) = (int(line[:pos]), line[pos+1:])
  476. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  477. date = datetime.datetime.fromtimestamp(stamp)
  478. yymm = date.strftime('%Y-%m')
  479. self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
  480. self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
  481. yy = date.year
  482. self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
  483. self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
  484. files, inserted, deleted = 0, 0, 0
  485. except ValueError:
  486. print 'Warning: unexpected line "%s"' % line
  487. else:
  488. print 'Warning: unexpected line "%s"' % line
  489. else:
  490. numbers = getstatsummarycounts(line)
  491. if len(numbers) == 3:
  492. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  493. total_lines += inserted
  494. total_lines -= deleted
  495. self.total_lines_added += inserted
  496. self.total_lines_removed += deleted
  497. else:
  498. print 'Warning: failed to handle line "%s"' % line
  499. (files, inserted, deleted) = (0, 0, 0)
  500. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  501. self.total_lines += total_lines
  502. # Per-author statistics
  503. # defined for stamp, author only if author commited at this timestamp.
  504. self.changes_by_date_by_author = {} # stamp -> author -> lines_added
  505. # Similar to the above, but never use --first-parent
  506. # (we need to walk through every commit to know who
  507. # committed what, not just through mainline)
  508. lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
  509. lines.reverse()
  510. files = 0; inserted = 0; deleted = 0
  511. author = None
  512. stamp = 0
  513. for line in lines:
  514. if len(line) == 0:
  515. continue
  516. # <stamp> <author>
  517. if re.search('files? changed', line) == None:
  518. pos = line.find(' ')
  519. if pos != -1:
  520. try:
  521. oldstamp = stamp
  522. (stamp, author) = (int(line[:pos]), line[pos+1:])
  523. if oldstamp > stamp:
  524. # clock skew, keep old timestamp to avoid having ugly graph
  525. stamp = oldstamp
  526. if author not in self.authors:
  527. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
  528. self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
  529. self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
  530. self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
  531. if stamp not in self.changes_by_date_by_author:
  532. self.changes_by_date_by_author[stamp] = {}
  533. if author not in self.changes_by_date_by_author[stamp]:
  534. self.changes_by_date_by_author[stamp][author] = {}
  535. self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
  536. self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
  537. files, inserted, deleted = 0, 0, 0
  538. except ValueError:
  539. print 'Warning: unexpected line "%s"' % line
  540. else:
  541. print 'Warning: unexpected line "%s"' % line
  542. else:
  543. numbers = getstatsummarycounts(line);
  544. if len(numbers) == 3:
  545. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  546. else:
  547. print 'Warning: failed to handle line "%s"' % line
  548. (files, inserted, deleted) = (0, 0, 0)
  549. def refine(self):
  550. # authors
  551. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  552. self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  553. self.authors_by_commits.reverse() # most first
  554. for i, name in enumerate(self.authors_by_commits):
  555. self.authors[name]['place_by_commits'] = i + 1
  556. for name in self.authors.keys():
  557. a = self.authors[name]
  558. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  559. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  560. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  561. delta = date_last - date_first
  562. a['date_first'] = date_first.strftime('%Y-%m-%d')
  563. a['date_last'] = date_last.strftime('%Y-%m-%d')
  564. a['timedelta'] = delta
  565. if 'lines_added' not in a: a['lines_added'] = 0
  566. if 'lines_removed' not in a: a['lines_removed'] = 0
  567. def getActiveDays(self):
  568. return self.active_days
  569. def getActivityByDayOfWeek(self):
  570. return self.activity_by_day_of_week
  571. def getActivityByHourOfDay(self):
  572. return self.activity_by_hour_of_day
  573. def getAuthorInfo(self, author):
  574. return self.authors[author]
  575. def getAuthors(self, limit = None):
  576. res = getkeyssortedbyvaluekey(self.authors, 'commits')
  577. res.reverse()
  578. return res[:limit]
  579. def getCommitDeltaDays(self):
  580. return (self.last_commit_stamp / 86400 - self.first_commit_stamp / 86400) + 1
  581. def getDomainInfo(self, domain):
  582. return self.domains[domain]
  583. def getDomains(self):
  584. return self.domains.keys()
  585. def getFirstCommitDate(self):
  586. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  587. def getLastCommitDate(self):
  588. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  589. def getTags(self):
  590. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  591. return lines.split('\n')
  592. def getTagDate(self, tag):
  593. return self.revToDate('tags/' + tag)
  594. def getTotalAuthors(self):
  595. return self.total_authors
  596. def getTotalCommits(self):
  597. return self.total_commits
  598. def getTotalFiles(self):
  599. return self.total_files
  600. def getTotalLOC(self):
  601. return self.total_lines
  602. def getTotalSize(self):
  603. return self.total_size
  604. def revToDate(self, rev):
  605. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  606. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  607. class ReportCreator:
  608. """Creates the actual report based on given data."""
  609. def __init__(self):
  610. pass
  611. def create(self, data, path):
  612. self.data = data
  613. self.path = path
  614. def html_linkify(text):
  615. return text.lower().replace(' ', '_')
  616. def html_header(level, text):
  617. name = html_linkify(text)
  618. return '\n<h%d id="%s"><a href="#%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  619. class HTMLReportCreator(ReportCreator):
  620. def create(self, data, path):
  621. ReportCreator.create(self, data, path)
  622. self.title = data.projectname
  623. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  624. binarypath = os.path.dirname(os.path.abspath(__file__))
  625. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  626. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  627. for file in (conf['style'], 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  628. for base in basedirs:
  629. src = base + '/' + file
  630. if os.path.exists(src):
  631. shutil.copyfile(src, path + '/' + file)
  632. break
  633. else:
  634. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  635. f = open(path + "/index.html", 'w')
  636. format = '%Y-%m-%d %H:%M:%S'
  637. self.printHeader(f)
  638. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  639. self.printNav(f)
  640. f.write('<dl>')
  641. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  642. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  643. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
  644. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  645. f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
  646. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  647. f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
  648. f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
  649. f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
  650. f.write('</dl>')
  651. f.write('</body>\n</html>')
  652. f.close()
  653. ###
  654. # Activity
  655. f = open(path + '/activity.html', 'w')
  656. self.printHeader(f)
  657. f.write('<h1>Activity</h1>')
  658. self.printNav(f)
  659. #f.write('<h2>Last 30 days</h2>')
  660. #f.write('<h2>Last 12 months</h2>')
  661. # Weekly activity
  662. WEEKS = 32
  663. f.write(html_header(2, 'Weekly activity'))
  664. f.write('<p>Last %d weeks</p>' % WEEKS)
  665. # generate weeks to show (previous N weeks from now)
  666. now = datetime.datetime.now()
  667. deltaweek = datetime.timedelta(7)
  668. weeks = []
  669. stampcur = now
  670. for i in range(0, WEEKS):
  671. weeks.insert(0, stampcur.strftime('%Y-%W'))
  672. stampcur -= deltaweek
  673. # top row: commits & bar
  674. f.write('<table class="noborders"><tr>')
  675. for i in range(0, WEEKS):
  676. commits = 0
  677. if weeks[i] in data.activity_by_year_week:
  678. commits = data.activity_by_year_week[weeks[i]]
  679. percentage = 0
  680. if weeks[i] in data.activity_by_year_week:
  681. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  682. height = max(1, int(200 * percentage))
  683. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  684. # bottom row: year/week
  685. f.write('</tr><tr>')
  686. for i in range(0, WEEKS):
  687. f.write('<td>%s</td>' % (WEEKS - i))
  688. f.write('</tr></table>')
  689. # Hour of Day
  690. f.write(html_header(2, 'Hour of Day'))
  691. hour_of_day = data.getActivityByHourOfDay()
  692. f.write('<table><tr><th>Hour</th>')
  693. for i in range(0, 24):
  694. f.write('<th>%d</th>' % i)
  695. f.write('</tr>\n<tr><th>Commits</th>')
  696. fp = open(path + '/hour_of_day.dat', 'w')
  697. for i in range(0, 24):
  698. if i in hour_of_day:
  699. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  700. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  701. fp.write('%d %d\n' % (i, hour_of_day[i]))
  702. else:
  703. f.write('<td>0</td>')
  704. fp.write('%d 0\n' % i)
  705. fp.close()
  706. f.write('</tr>\n<tr><th>%</th>')
  707. totalcommits = data.getTotalCommits()
  708. for i in range(0, 24):
  709. if i in hour_of_day:
  710. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  711. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  712. else:
  713. f.write('<td>0.00</td>')
  714. f.write('</tr></table>')
  715. f.write('<img src="hour_of_day.png" alt="Hour of Day">')
  716. fg = open(path + '/hour_of_day.dat', 'w')
  717. for i in range(0, 24):
  718. if i in hour_of_day:
  719. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  720. else:
  721. fg.write('%d 0\n' % (i + 1))
  722. fg.close()
  723. # Day of Week
  724. f.write(html_header(2, 'Day of Week'))
  725. day_of_week = data.getActivityByDayOfWeek()
  726. f.write('<div class="vtable"><table>')
  727. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  728. fp = open(path + '/day_of_week.dat', 'w')
  729. for d in range(0, 7):
  730. commits = 0
  731. if d in day_of_week:
  732. commits = day_of_week[d]
  733. fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
  734. f.write('<tr>')
  735. f.write('<th>%s</th>' % (WEEKDAYS[d]))
  736. if d in day_of_week:
  737. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  738. else:
  739. f.write('<td>0</td>')
  740. f.write('</tr>')
  741. f.write('</table></div>')
  742. f.write('<img src="day_of_week.png" alt="Day of Week">')
  743. fp.close()
  744. # Hour of Week
  745. f.write(html_header(2, 'Hour of Week'))
  746. f.write('<table>')
  747. f.write('<tr><th>Weekday</th>')
  748. for hour in range(0, 24):
  749. f.write('<th>%d</th>' % (hour))
  750. f.write('</tr>')
  751. for weekday in range(0, 7):
  752. f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
  753. for hour in range(0, 24):
  754. try:
  755. commits = data.activity_by_hour_of_week[weekday][hour]
  756. except KeyError:
  757. commits = 0
  758. if commits != 0:
  759. f.write('<td')
  760. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  761. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  762. f.write('>%d</td>' % commits)
  763. else:
  764. f.write('<td></td>')
  765. f.write('</tr>')
  766. f.write('</table>')
  767. # Month of Year
  768. f.write(html_header(2, 'Month of Year'))
  769. f.write('<div class="vtable"><table>')
  770. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  771. fp = open (path + '/month_of_year.dat', 'w')
  772. for mm in range(1, 13):
  773. commits = 0
  774. if mm in data.activity_by_month_of_year:
  775. commits = data.activity_by_month_of_year[mm]
  776. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  777. fp.write('%d %d\n' % (mm, commits))
  778. fp.close()
  779. f.write('</table></div>')
  780. f.write('<img src="month_of_year.png" alt="Month of Year">')
  781. # Commits by year/month
  782. f.write(html_header(2, 'Commits by year/month'))
  783. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
  784. for yymm in reversed(sorted(data.commits_by_month.keys())):
  785. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
  786. f.write('</table></div>')
  787. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month">')
  788. fg = open(path + '/commits_by_year_month.dat', 'w')
  789. for yymm in sorted(data.commits_by_month.keys()):
  790. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  791. fg.close()
  792. # Commits by year
  793. f.write(html_header(2, 'Commits by Year'))
  794. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
  795. for yy in reversed(sorted(data.commits_by_year.keys())):
  796. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, data.commits_by_year.get(yy,0), (100.0 * data.commits_by_year.get(yy,0)) / data.getTotalCommits(), data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
  797. f.write('</table></div>')
  798. f.write('<img src="commits_by_year.png" alt="Commits by Year">')
  799. fg = open(path + '/commits_by_year.dat', 'w')
  800. for yy in sorted(data.commits_by_year.keys()):
  801. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  802. fg.close()
  803. # Commits by timezone
  804. f.write(html_header(2, 'Commits by Timezone'))
  805. f.write('<table><tr>')
  806. f.write('<th>Timezone</th><th>Commits</th>')
  807. f.write('</tr>')
  808. max_commits_on_tz = max(data.commits_by_timezone.values())
  809. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  810. commits = data.commits_by_timezone[i]
  811. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  812. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  813. f.write('</table>')
  814. f.write('</body></html>')
  815. f.close()
  816. ###
  817. # Authors
  818. f = open(path + '/authors.html', 'w')
  819. self.printHeader(f)
  820. f.write('<h1>Authors</h1>')
  821. self.printNav(f)
  822. # Authors :: List of authors
  823. f.write(html_header(2, 'List of Authors'))
  824. f.write('<table class="authors sortable" id="authors">')
  825. f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  826. for author in data.getAuthors(conf['max_authors']):
  827. info = data.getAuthorInfo(author)
  828. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
  829. f.write('</table>')
  830. allauthors = data.getAuthors()
  831. if len(allauthors) > conf['max_authors']:
  832. rest = allauthors[conf['max_authors']:]
  833. f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
  834. f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
  835. f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author">')
  836. if len(allauthors) > conf['max_authors']:
  837. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  838. f.write(html_header(2, 'Commits per Author'))
  839. f.write('<img src="commits_by_author.png" alt="Commits per Author">')
  840. if len(allauthors) > conf['max_authors']:
  841. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  842. fgl = open(path + '/lines_of_code_by_author.dat', 'w')
  843. fgc = open(path + '/commits_by_author.dat', 'w')
  844. lines_by_authors = {} # cumulated added lines by
  845. # author. to save memory,
  846. # changes_by_date_by_author[stamp][author] is defined
  847. # only at points where author commits.
  848. # lines_by_authors allows us to generate all the
  849. # points in the .dat file.
  850. # Don't rely on getAuthors to give the same order each
  851. # time. Be robust and keep the list in a variable.
  852. commits_by_authors = {} # cumulated added lines by
  853. self.authors_to_plot = data.getAuthors(conf['max_authors'])
  854. for author in self.authors_to_plot:
  855. lines_by_authors[author] = 0
  856. commits_by_authors[author] = 0
  857. for stamp in sorted(data.changes_by_date_by_author.keys()):
  858. fgl.write('%d' % stamp)
  859. fgc.write('%d' % stamp)
  860. for author in self.authors_to_plot:
  861. if author in data.changes_by_date_by_author[stamp].keys():
  862. lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
  863. commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
  864. fgl.write(' %d' % lines_by_authors[author])
  865. fgc.write(' %d' % commits_by_authors[author])
  866. fgl.write('\n')
  867. fgc.write('\n')
  868. fgl.close()
  869. fgc.close()
  870. # Authors :: Author of Month
  871. f.write(html_header(2, 'Author of Month'))
  872. f.write('<table class="sortable" id="aom">')
  873. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  874. for yymm in reversed(sorted(data.author_of_month.keys())):
  875. authordict = data.author_of_month[yymm]
  876. authors = getkeyssortedbyvalues(authordict)
  877. authors.reverse()
  878. commits = data.author_of_month[yymm][authors[0]]
  879. next = ', '.join(authors[1:conf['authors_top']+1])
  880. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))
  881. f.write('</table>')
  882. f.write(html_header(2, 'Author of Year'))
  883. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  884. for yy in reversed(sorted(data.author_of_year.keys())):
  885. authordict = data.author_of_year[yy]
  886. authors = getkeyssortedbyvalues(authordict)
  887. authors.reverse()
  888. commits = data.author_of_year[yy][authors[0]]
  889. next = ', '.join(authors[1:conf['authors_top']+1])
  890. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
  891. f.write('</table>')
  892. # Domains
  893. f.write(html_header(2, 'Commits by Domains'))
  894. domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
  895. domains_by_commits.reverse() # most first
  896. f.write('<div class="vtable"><table>')
  897. f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
  898. fp = open(path + '/domains.dat', 'w')
  899. n = 0
  900. for domain in domains_by_commits:
  901. if n == conf['max_domains']:
  902. break
  903. commits = 0
  904. n += 1
  905. info = data.getDomainInfo(domain)
  906. fp.write('%s %d %d\n' % (domain, n , info['commits']))
  907. f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], (100.0 * info['commits'] / totalcommits)))
  908. f.write('</table></div>')
  909. f.write('<img src="domains.png" alt="Commits by Domains">')
  910. fp.close()
  911. f.write('</body></html>')
  912. f.close()
  913. ###
  914. # Files
  915. f = open(path + '/files.html', 'w')
  916. self.printHeader(f)
  917. f.write('<h1>Files</h1>')
  918. self.printNav(f)
  919. f.write('<dl>\n')
  920. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  921. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  922. try:
  923. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % (float(data.getTotalSize()) / data.getTotalFiles()))
  924. except ZeroDivisionError:
  925. pass
  926. f.write('</dl>\n')
  927. # Files :: File count by date
  928. f.write(html_header(2, 'File count by date'))
  929. # use set to get rid of duplicate/unnecessary entries
  930. files_by_date = set()
  931. for stamp in sorted(data.files_by_stamp.keys()):
  932. files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  933. fg = open(path + '/files_by_date.dat', 'w')
  934. for line in sorted(list(files_by_date)):
  935. fg.write('%s\n' % line)
  936. #for stamp in sorted(data.files_by_stamp.keys()):
  937. # fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  938. fg.close()
  939. f.write('<img src="files_by_date.png" alt="Files by Date">')
  940. #f.write('<h2>Average file size by date</h2>')
  941. # Files :: Extensions
  942. f.write(html_header(2, 'Extensions'))
  943. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  944. for ext in sorted(data.extensions.keys()):
  945. files = data.extensions[ext]['files']
  946. lines = data.extensions[ext]['lines']
  947. try:
  948. loc_percentage = (100.0 * lines) / data.getTotalLOC()
  949. except ZeroDivisionError:
  950. loc_percentage = 0
  951. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, loc_percentage, lines / files))
  952. f.write('</table>')
  953. f.write('</body></html>')
  954. f.close()
  955. ###
  956. # Lines
  957. f = open(path + '/lines.html', 'w')
  958. self.printHeader(f)
  959. f.write('<h1>Lines</h1>')
  960. self.printNav(f)
  961. f.write('<dl>\n')
  962. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  963. f.write('</dl>\n')
  964. f.write(html_header(2, 'Lines of Code'))
  965. f.write('<img src="lines_of_code.png" alt="Lines of Code">')
  966. fg = open(path + '/lines_of_code.dat', 'w')
  967. for stamp in sorted(data.changes_by_date.keys()):
  968. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  969. fg.close()
  970. f.write('</body></html>')
  971. f.close()
  972. ###
  973. # tags.html
  974. f = open(path + '/tags.html', 'w')
  975. self.printHeader(f)
  976. f.write('<h1>Tags</h1>')
  977. self.printNav(f)
  978. f.write('<dl>')
  979. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  980. if len(data.tags) > 0:
  981. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  982. f.write('</dl>')
  983. f.write('<table class="tags">')
  984. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  985. # sort the tags by date desc
  986. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  987. for tag in tags_sorted_by_date_desc:
  988. authorinfo = []
  989. self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  990. for i in reversed(self.authors_by_commits):
  991. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  992. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  993. f.write('</table>')
  994. f.write('</body></html>')
  995. f.close()
  996. self.createGraphs(path)
  997. def createGraphs(self, path):
  998. print 'Generating graphs...'
  999. # hour of day
  1000. f = open(path + '/hour_of_day.plot', 'w')
  1001. f.write(GNUPLOT_COMMON)
  1002. f.write(
  1003. """
  1004. set output 'hour_of_day.png'
  1005. unset key
  1006. set xrange [0.5:24.5]
  1007. set yrange [0:]
  1008. set xtics 4
  1009. set grid y
  1010. set ylabel "Commits"
  1011. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  1012. """)
  1013. f.close()
  1014. # day of week
  1015. f = open(path + '/day_of_week.plot', 'w')
  1016. f.write(GNUPLOT_COMMON)
  1017. f.write(
  1018. """
  1019. set output 'day_of_week.png'
  1020. unset key
  1021. set xrange [0.5:7.5]
  1022. set yrange [0:]
  1023. set xtics 1
  1024. set grid y
  1025. set ylabel "Commits"
  1026. plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
  1027. """)
  1028. f.close()
  1029. # Domains
  1030. f = open(path + '/domains.plot', 'w')
  1031. f.write(GNUPLOT_COMMON)
  1032. f.write(
  1033. """
  1034. set output 'domains.png'
  1035. unset key
  1036. unset xtics
  1037. set yrange [0:]
  1038. set grid y
  1039. set ylabel "Commits"
  1040. plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
  1041. """)
  1042. f.close()
  1043. # Month of Year
  1044. f = open(path + '/month_of_year.plot', 'w')
  1045. f.write(GNUPLOT_COMMON)
  1046. f.write(
  1047. """
  1048. set output 'month_of_year.png'
  1049. unset key
  1050. set xrange [0.5:12.5]
  1051. set yrange [0:]
  1052. set xtics 1
  1053. set grid y
  1054. set ylabel "Commits"
  1055. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  1056. """)
  1057. f.close()
  1058. # commits_by_year_month
  1059. f = open(path + '/commits_by_year_month.plot', 'w')
  1060. f.write(GNUPLOT_COMMON)
  1061. f.write(
  1062. """
  1063. set output 'commits_by_year_month.png'
  1064. unset key
  1065. set yrange [0:]
  1066. set xdata time
  1067. set timefmt "%Y-%m"
  1068. set format x "%Y-%m"
  1069. set xtics rotate
  1070. set bmargin 5
  1071. set grid y
  1072. set ylabel "Commits"
  1073. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  1074. """)
  1075. f.close()
  1076. # commits_by_year
  1077. f = open(path + '/commits_by_year.plot', 'w')
  1078. f.write(GNUPLOT_COMMON)
  1079. f.write(
  1080. """
  1081. set output 'commits_by_year.png'
  1082. unset key
  1083. set yrange [0:]
  1084. set xtics 1 rotate
  1085. set grid y
  1086. set ylabel "Commits"
  1087. set yrange [0:]
  1088. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  1089. """)
  1090. f.close()
  1091. # Files by date
  1092. f = open(path + '/files_by_date.plot', 'w')
  1093. f.write(GNUPLOT_COMMON)
  1094. f.write(
  1095. """
  1096. set output 'files_by_date.png'
  1097. unset key
  1098. set yrange [0:]
  1099. set xdata time
  1100. set timefmt "%Y-%m-%d"
  1101. set format x "%Y-%m-%d"
  1102. set grid y
  1103. set ylabel "Files"
  1104. set xtics rotate
  1105. set ytics autofreq
  1106. set bmargin 6
  1107. plot 'files_by_date.dat' using 1:2 w steps
  1108. """)
  1109. f.close()
  1110. # Lines of Code
  1111. f = open(path + '/lines_of_code.plot', 'w')
  1112. f.write(GNUPLOT_COMMON)
  1113. f.write(
  1114. """
  1115. set output 'lines_of_code.png'
  1116. unset key
  1117. set yrange [0:]
  1118. set xdata time
  1119. set timefmt "%s"
  1120. set format x "%Y-%m-%d"
  1121. set grid y
  1122. set ylabel "Lines"
  1123. set xtics rotate
  1124. set bmargin 6
  1125. plot 'lines_of_code.dat' using 1:2 w lines
  1126. """)
  1127. f.close()
  1128. # Lines of Code Added per author
  1129. f = open(path + '/lines_of_code_by_author.plot', 'w')
  1130. f.write(GNUPLOT_COMMON)
  1131. f.write(
  1132. """
  1133. set terminal png transparent size 640,480
  1134. set output 'lines_of_code_by_author.png'
  1135. set key left top
  1136. set yrange [0:]
  1137. set xdata time
  1138. set timefmt "%s"
  1139. set format x "%Y-%m-%d"
  1140. set grid y
  1141. set ylabel "Lines"
  1142. set xtics rotate
  1143. set bmargin 6
  1144. plot """
  1145. )
  1146. i = 1
  1147. plots = []
  1148. for a in self.authors_to_plot:
  1149. i = i + 1
  1150. author = a.replace("\"", "\\\"").replace("`", "")
  1151. plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
  1152. f.write(", ".join(plots))
  1153. f.write('\n')
  1154. f.close()
  1155. # Commits per author
  1156. f = open(path + '/commits_by_author.plot', 'w')
  1157. f.write(GNUPLOT_COMMON)
  1158. f.write(
  1159. """
  1160. set terminal png transparent size 640,480
  1161. set output 'commits_by_author.png'
  1162. set key left top
  1163. set yrange [0:]
  1164. set xdata time
  1165. set timefmt "%s"
  1166. set format x "%Y-%m-%d"
  1167. set grid y
  1168. set ylabel "Commits"
  1169. set xtics rotate
  1170. set bmargin 6
  1171. plot """
  1172. )
  1173. i = 1
  1174. plots = []
  1175. for a in self.authors_to_plot:
  1176. i = i + 1
  1177. author = a.replace("\"", "\\\"").replace("`", "")
  1178. plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
  1179. f.write(", ".join(plots))
  1180. f.write('\n')
  1181. f.close()
  1182. os.chdir(path)
  1183. files = glob.glob(path + '/*.plot')
  1184. for f in files:
  1185. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  1186. if len(out) > 0:
  1187. print out
  1188. def printHeader(self, f, title = ''):
  1189. f.write(
  1190. """<!DOCTYPE html>
  1191. <html>
  1192. <head>
  1193. <meta charset="UTF-8">
  1194. <title>GitStats - %s</title>
  1195. <link rel="stylesheet" href="%s" type="text/css">
  1196. <meta name="generator" content="GitStats %s">
  1197. <script type="text/javascript" src="sortable.js"></script>
  1198. </head>
  1199. <body>
  1200. """ % (self.title, conf['style'], getversion()))
  1201. def printNav(self, f):
  1202. f.write("""
  1203. <div class="nav">
  1204. <ul>
  1205. <li><a href="index.html">General</a></li>
  1206. <li><a href="activity.html">Activity</a></li>
  1207. <li><a href="authors.html">Authors</a></li>
  1208. <li><a href="files.html">Files</a></li>
  1209. <li><a href="lines.html">Lines</a></li>
  1210. <li><a href="tags.html">Tags</a></li>
  1211. </ul>
  1212. </div>
  1213. """)
  1214. def usage():
  1215. print """
  1216. Usage: gitstats [options] <gitpath..> <outputpath>
  1217. Options:
  1218. -c key=value Override configuration value
  1219. Default config values:
  1220. %s
  1221. Please see the manual page for more details.
  1222. """ % conf
  1223. class GitStats:
  1224. def run(self, args_orig):
  1225. optlist, args = getopt.getopt(args_orig, 'hc:', ["help"])
  1226. for o,v in optlist:
  1227. if o == '-c':
  1228. key, value = v.split('=', 1)
  1229. if key not in conf:
  1230. raise KeyError('no such key "%s" in config' % key)
  1231. if isinstance(conf[key], int):
  1232. conf[key] = int(value)
  1233. else:
  1234. conf[key] = value
  1235. elif o in ('-h', '--help'):
  1236. usage()
  1237. sys.exit()
  1238. if len(args) < 2:
  1239. usage()
  1240. sys.exit(0)
  1241. outputpath = os.path.abspath(args[-1])
  1242. rundir = os.getcwd()
  1243. try:
  1244. os.makedirs(outputpath)
  1245. except OSError:
  1246. pass
  1247. if not os.path.isdir(outputpath):
  1248. print 'FATAL: Output path is not a directory or does not exist'
  1249. sys.exit(1)
  1250. if not getgnuplotversion():
  1251. print 'gnuplot not found'
  1252. sys.exit(1)
  1253. print 'Output path: %s' % outputpath
  1254. cachefile = os.path.join(outputpath, 'gitstats.cache')
  1255. data = GitDataCollector()
  1256. data.loadCache(cachefile)
  1257. for gitpath in args[0:-1]:
  1258. print 'Git path: %s' % gitpath
  1259. prevdir = os.getcwd()
  1260. os.chdir(gitpath)
  1261. print 'Collecting data...'
  1262. data.collect(gitpath)
  1263. os.chdir(prevdir)
  1264. print 'Refining data...'
  1265. data.saveCache(cachefile)
  1266. data.refine()
  1267. os.chdir(rundir)
  1268. print 'Generating report...'
  1269. report = HTMLReportCreator()
  1270. report.create(data, outputpath)
  1271. time_end = time.time()
  1272. exectime_internal = time_end - time_start
  1273. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)
  1274. if sys.stdin.isatty():
  1275. print 'You may now run:'
  1276. print
  1277. print ' sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''")
  1278. print
  1279. if __name__=='__main__':
  1280. g = GitStats()
  1281. g.run(sys.argv[1:])