gitstats 62KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2014 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/AUTHOR)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import getopt
  6. import glob
  7. import os
  8. import pickle
  9. import platform
  10. import re
  11. import shutil
  12. import subprocess
  13. import sys
  14. import time
  15. import zlib
  16. if sys.version_info < (2, 6):
  17. print >> sys.stderr, "Python 2.6 or higher is required for gitstats"
  18. sys.exit(1)
  19. from multiprocessing import Pool
  20. os.environ['LC_ALL'] = 'C'
  21. GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
  22. ON_LINUX = (platform.system() == 'Linux')
  23. WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
  24. exectime_internal = 0.0
  25. exectime_external = 0.0
  26. time_start = time.time()
  27. # By default, gnuplot is searched from path, but can be overridden with the
  28. # environment variable "GNUPLOT"
  29. gnuplot_cmd = 'gnuplot'
  30. if 'GNUPLOT' in os.environ:
  31. gnuplot_cmd = os.environ['GNUPLOT']
  32. conf = {
  33. 'max_domains': 10,
  34. 'max_ext_length': 10,
  35. 'style': 'gitstats.css',
  36. 'max_authors': 20,
  37. 'authors_top': 5,
  38. 'commit_begin': '',
  39. 'commit_end': 'HEAD',
  40. 'linear_linestats': 1,
  41. 'project_name': '',
  42. 'processes': 8,
  43. 'start_date': ''
  44. }
  45. def getpipeoutput(cmds, quiet = False):
  46. global exectime_external
  47. start = time.time()
  48. if not quiet and ON_LINUX and os.isatty(1):
  49. print '>> ' + ' | '.join(cmds),
  50. sys.stdout.flush()
  51. p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  52. processes=[p]
  53. for x in cmds[1:]:
  54. p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True)
  55. processes.append(p)
  56. output = p.communicate()[0]
  57. for p in processes:
  58. p.wait()
  59. end = time.time()
  60. if not quiet:
  61. if ON_LINUX and os.isatty(1):
  62. print '\r',
  63. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  64. exectime_external += (end - start)
  65. return output.rstrip('\n')
  66. def getlogrange(defaultrange = 'HEAD', end_only = True):
  67. commit_range = getcommitrange(defaultrange, end_only)
  68. if len(conf['start_date']) > 0:
  69. return '--since="%s" "%s"' % (conf['start_date'], commit_range)
  70. return commit_range
  71. def getcommitrange(defaultrange = 'HEAD', end_only = False):
  72. if len(conf['commit_end']) > 0:
  73. if end_only or len(conf['commit_begin']) == 0:
  74. return conf['commit_end']
  75. return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
  76. return defaultrange
  77. def getkeyssortedbyvalues(dict):
  78. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  79. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  80. def getkeyssortedbyvaluekey(d, key):
  81. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  82. def getstatsummarycounts(line):
  83. numbers = re.findall('\d+', line)
  84. if len(numbers) == 1:
  85. # neither insertions nor deletions: may probably only happen for "0 files changed"
  86. numbers.append(0);
  87. numbers.append(0);
  88. elif len(numbers) == 2 and line.find('(+)') != -1:
  89. numbers.append(0); # only insertions were printed on line
  90. elif len(numbers) == 2 and line.find('(-)') != -1:
  91. numbers.insert(1, 0); # only deletions were printed on line
  92. return numbers
  93. VERSION = 0
  94. def getversion():
  95. global VERSION
  96. if VERSION == 0:
  97. gitstats_repo = os.path.dirname(os.path.abspath(__file__))
  98. VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" %
  99. (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])])
  100. return VERSION
  101. def getgitversion():
  102. return getpipeoutput(['git --version']).split('\n')[0]
  103. def getgnuplotversion():
  104. # Manually hack the version
  105. return "1.0.1"
  106. # return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
  107. def getnumoffilesfromrev(time_rev):
  108. """
  109. Get number of files changed in commit
  110. """
  111. time, rev = time_rev
  112. return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))
  113. def getnumoflinesinblob(ext_blob):
  114. """
  115. Get number of lines in blob
  116. """
  117. ext, blob_id = ext_blob
  118. return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0]))
  119. class DataCollector:
  120. """Manages data collection from a revision control repository."""
  121. def __init__(self):
  122. self.stamp_created = time.time()
  123. self.cache = {}
  124. self.total_authors = 0
  125. self.activity_by_hour_of_day = {} # hour -> commits
  126. self.activity_by_day_of_week = {} # day -> commits
  127. self.activity_by_month_of_year = {} # month [1-12] -> commits
  128. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  129. self.activity_by_hour_of_day_busiest = 0
  130. self.activity_by_hour_of_week_busiest = 0
  131. self.activity_by_year_week = {} # yy_wNN -> commits
  132. self.activity_by_year_week_peak = 0
  133. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
  134. self.total_commits = 0
  135. self.total_files = 0
  136. self.authors_by_commits = 0
  137. # domains
  138. self.domains = {} # domain -> commits
  139. # author of the month
  140. self.author_of_month = {} # month -> author -> commits
  141. self.author_of_year = {} # year -> author -> commits
  142. self.commits_by_month = {} # month -> commits
  143. self.commits_by_year = {} # year -> commits
  144. self.lines_added_by_month = {} # month -> lines added
  145. self.lines_added_by_year = {} # year -> lines added
  146. self.lines_removed_by_month = {} # month -> lines removed
  147. self.lines_removed_by_year = {} # year -> lines removed
  148. self.first_commit_stamp = 0
  149. self.last_commit_stamp = 0
  150. self.last_active_day = None
  151. self.active_days = set()
  152. # lines
  153. self.total_lines = 0
  154. self.total_lines_added = 0
  155. self.total_lines_removed = 0
  156. # size
  157. self.total_size = 0
  158. # timezone
  159. self.commits_by_timezone = {} # timezone -> commits
  160. # tags
  161. self.tags = {}
  162. self.files_by_stamp = {} # stamp -> files
  163. # extensions
  164. self.extensions = {} # extension -> files, lines
  165. # line statistics
  166. self.changes_by_date = {} # stamp -> { files, ins, del }
  167. ##
  168. # This should be the main function to extract data from the repository.
  169. def collect(self, dir):
  170. self.dir = dir
  171. if len(conf['project_name']) == 0:
  172. self.projectname = os.path.basename(os.path.abspath(dir))
  173. else:
  174. self.projectname = conf['project_name']
  175. ##
  176. # Load cacheable data
  177. def loadCache(self, cachefile):
  178. if not os.path.exists(cachefile):
  179. return
  180. print 'Loading cache...'
  181. f = open(cachefile, 'rb')
  182. try:
  183. self.cache = pickle.loads(zlib.decompress(f.read()))
  184. except:
  185. # temporary hack to upgrade non-compressed caches
  186. f.seek(0)
  187. self.cache = pickle.load(f)
  188. f.close()
  189. ##
  190. # Produce any additional statistics from the extracted data.
  191. def refine(self):
  192. pass
  193. ##
  194. # : get a dictionary of author
  195. def getAuthorInfo(self, author):
  196. return None
  197. def getActivityByDayOfWeek(self):
  198. return {}
  199. def getActivityByHourOfDay(self):
  200. return {}
  201. # : get a dictionary of domains
  202. def getDomainInfo(self, domain):
  203. return None
  204. ##
  205. # Get a list of authors
  206. def getAuthors(self):
  207. return []
  208. def getFirstCommitDate(self):
  209. return datetime.datetime.now()
  210. def getLastCommitDate(self):
  211. return datetime.datetime.now()
  212. def getStampCreated(self):
  213. return self.stamp_created
  214. def getTags(self):
  215. return []
  216. def getTotalAuthors(self):
  217. return -1
  218. def getTotalCommits(self):
  219. return -1
  220. def getTotalFiles(self):
  221. return -1
  222. def getTotalLOC(self):
  223. return -1
  224. ##
  225. # Save cacheable data
  226. def saveCache(self, cachefile):
  227. print 'Saving cache...'
  228. tempfile = cachefile + '.tmp'
  229. f = open(tempfile, 'wb')
  230. #pickle.dump(self.cache, f)
  231. data = zlib.compress(pickle.dumps(self.cache))
  232. f.write(data)
  233. f.close()
  234. try:
  235. os.remove(cachefile)
  236. except OSError:
  237. pass
  238. os.rename(tempfile, cachefile)
  239. class GitDataCollector(DataCollector):
  240. def collect(self, dir):
  241. DataCollector.collect(self, dir)
  242. self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
  243. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  244. # tags
  245. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  246. for line in lines:
  247. if len(line) == 0:
  248. continue
  249. (hash, tag) = line.split(' ')
  250. tag = tag.replace('refs/tags/', '')
  251. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
  252. if len(output) > 0:
  253. parts = output.split(' ')
  254. stamp = 0
  255. try:
  256. stamp = int(parts[0])
  257. except ValueError:
  258. stamp = 0
  259. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  260. # collect info on tags, starting from latest
  261. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items()))))
  262. prev = None
  263. for tag in reversed(tags_sorted_by_date_desc):
  264. cmd = 'git shortlog -s "%s"' % tag
  265. if prev != None:
  266. cmd += ' "^%s"' % prev
  267. output = getpipeoutput([cmd])
  268. if len(output) == 0:
  269. continue
  270. prev = tag
  271. for line in output.split('\n'):
  272. parts = re.split('\s+', line, 2)
  273. commits = int(parts[1])
  274. author = parts[2]
  275. self.tags[tag]['commits'] += commits
  276. self.tags[tag]['authors'][author] = commits
  277. # Collect revision statistics
  278. # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
  279. lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
  280. for line in lines:
  281. parts = line.split(' ', 4)
  282. author = ''
  283. try:
  284. stamp = int(parts[0])
  285. except ValueError:
  286. stamp = 0
  287. timezone = parts[3]
  288. author, mail = parts[4].split('<', 1)
  289. author = author.rstrip()
  290. mail = mail.rstrip('>')
  291. domain = '?'
  292. if mail.find('@') != -1:
  293. domain = mail.rsplit('@', 1)[1]
  294. date = datetime.datetime.fromtimestamp(float(stamp))
  295. # First and last commit stamp (may be in any order because of cherry-picking and patches)
  296. if stamp > self.last_commit_stamp:
  297. self.last_commit_stamp = stamp
  298. if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
  299. self.first_commit_stamp = stamp
  300. # activity
  301. # hour
  302. hour = date.hour
  303. self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
  304. # most active hour?
  305. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  306. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  307. # day of week
  308. day = date.weekday()
  309. self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1
  310. # domain stats
  311. if domain not in self.domains:
  312. self.domains[domain] = {}
  313. # commits
  314. self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1
  315. # hour of week
  316. if day not in self.activity_by_hour_of_week:
  317. self.activity_by_hour_of_week[day] = {}
  318. self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
  319. # most active hour?
  320. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  321. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  322. # month of year
  323. month = date.month
  324. self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1
  325. # yearly/weekly activity
  326. yyw = date.strftime('%Y-%W')
  327. self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
  328. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  329. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  330. # author stats
  331. if author not in self.authors:
  332. self.authors[author] = {}
  333. # commits, note again that commits may be in any date order because of cherry-picking and patches
  334. if 'last_commit_stamp' not in self.authors[author]:
  335. self.authors[author]['last_commit_stamp'] = stamp
  336. if stamp > self.authors[author]['last_commit_stamp']:
  337. self.authors[author]['last_commit_stamp'] = stamp
  338. if 'first_commit_stamp' not in self.authors[author]:
  339. self.authors[author]['first_commit_stamp'] = stamp
  340. if stamp < self.authors[author]['first_commit_stamp']:
  341. self.authors[author]['first_commit_stamp'] = stamp
  342. # author of the month/year
  343. yymm = date.strftime('%Y-%m')
  344. if yymm in self.author_of_month:
  345. self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
  346. else:
  347. self.author_of_month[yymm] = {}
  348. self.author_of_month[yymm][author] = 1
  349. self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1
  350. yy = date.year
  351. if yy in self.author_of_year:
  352. self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
  353. else:
  354. self.author_of_year[yy] = {}
  355. self.author_of_year[yy][author] = 1
  356. self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1
  357. # authors: active days
  358. yymmdd = date.strftime('%Y-%m-%d')
  359. if 'last_active_day' not in self.authors[author]:
  360. self.authors[author]['last_active_day'] = yymmdd
  361. self.authors[author]['active_days'] = set([yymmdd])
  362. elif yymmdd != self.authors[author]['last_active_day']:
  363. self.authors[author]['last_active_day'] = yymmdd
  364. self.authors[author]['active_days'].add(yymmdd)
  365. # project: active days
  366. if yymmdd != self.last_active_day:
  367. self.last_active_day = yymmdd
  368. self.active_days.add(yymmdd)
  369. # timezone
  370. self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
  371. # outputs "<stamp> <files>" for each revision
  372. revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
  373. lines = []
  374. revs_to_read = []
  375. time_rev_count = []
  376. #Look up rev in cache and take info from cache if found
  377. #If not append rev to list of rev to read from repo
  378. for revline in revlines:
  379. time, rev = revline.split(' ')
  380. #if cache empty then add time and rev to list of new rev's
  381. #otherwise try to read needed info from cache
  382. if 'files_in_tree' not in self.cache.keys():
  383. revs_to_read.append((time,rev))
  384. continue
  385. if rev in self.cache['files_in_tree'].keys():
  386. lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
  387. else:
  388. revs_to_read.append((time,rev))
  389. #Read revisions from repo
  390. pool = Pool(processes=conf['processes'])
  391. time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
  392. pool.terminate()
  393. pool.join()
  394. #Update cache with new revisions and append then to general list
  395. for (time, rev, count) in time_rev_count:
  396. if 'files_in_tree' not in self.cache:
  397. self.cache['files_in_tree'] = {}
  398. self.cache['files_in_tree'][rev] = count
  399. lines.append('%d %d' % (int(time), count))
  400. self.total_commits += len(lines)
  401. for line in lines:
  402. parts = line.split(' ')
  403. if len(parts) != 2:
  404. continue
  405. (stamp, files) = parts[0:2]
  406. try:
  407. self.files_by_stamp[int(stamp)] = int(files)
  408. except ValueError:
  409. print 'Warning: failed to parse line "%s"' % line
  410. # extensions and size of files
  411. lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
  412. blobs_to_read = []
  413. for line in lines:
  414. if len(line) == 0:
  415. continue
  416. parts = re.split('\s+', line, 4)
  417. if parts[0] == '160000' and parts[3] == '-':
  418. # skip submodules
  419. continue
  420. blob_id = parts[2]
  421. size = int(parts[3])
  422. fullpath = parts[4]
  423. self.total_size += size
  424. self.total_files += 1
  425. filename = fullpath.split('/')[-1] # strip directories
  426. if filename.find('.') == -1 or filename.rfind('.') == 0:
  427. ext = ''
  428. else:
  429. ext = filename[(filename.rfind('.') + 1):]
  430. if len(ext) > conf['max_ext_length']:
  431. ext = ''
  432. if ext not in self.extensions:
  433. self.extensions[ext] = {'files': 0, 'lines': 0}
  434. self.extensions[ext]['files'] += 1
  435. #if cache empty then add ext and blob id to list of new blob's
  436. #otherwise try to read needed info from cache
  437. if 'lines_in_blob' not in self.cache.keys():
  438. blobs_to_read.append((ext,blob_id))
  439. continue
  440. if blob_id in self.cache['lines_in_blob'].keys():
  441. self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
  442. else:
  443. blobs_to_read.append((ext,blob_id))
  444. #Get info abount line count for new blob's that wasn't found in cache
  445. pool = Pool(processes=conf['processes'])
  446. ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
  447. pool.terminate()
  448. pool.join()
  449. #Update cache and write down info about number of number of lines
  450. for (ext, blob_id, linecount) in ext_blob_linecount:
  451. if 'lines_in_blob' not in self.cache:
  452. self.cache['lines_in_blob'] = {}
  453. self.cache['lines_in_blob'][blob_id] = linecount
  454. self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
  455. # line statistics
  456. # outputs:
  457. # N files changed, N insertions (+), N deletions(-)
  458. # <stamp> <author>
  459. self.changes_by_date = {} # stamp -> { files, ins, del }
  460. # computation of lines of code by date is better done
  461. # on a linear history.
  462. extra = ''
  463. if conf['linear_linestats']:
  464. extra = '--first-parent -m'
  465. lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
  466. lines.reverse()
  467. files = 0; inserted = 0; deleted = 0; total_lines = 0
  468. author = None
  469. for line in lines:
  470. if len(line) == 0:
  471. continue
  472. # <stamp> <author>
  473. if re.search('files? changed', line) == None:
  474. pos = line.find(' ')
  475. if pos != -1:
  476. try:
  477. (stamp, author) = (int(line[:pos]), line[pos+1:])
  478. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  479. date = datetime.datetime.fromtimestamp(stamp)
  480. yymm = date.strftime('%Y-%m')
  481. self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
  482. self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
  483. yy = date.year
  484. self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
  485. self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
  486. files, inserted, deleted = 0, 0, 0
  487. except ValueError:
  488. print 'Warning: unexpected line "%s"' % line
  489. else:
  490. print 'Warning: unexpected line "%s"' % line
  491. else:
  492. numbers = getstatsummarycounts(line)
  493. if len(numbers) == 3:
  494. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  495. total_lines += inserted
  496. total_lines -= deleted
  497. self.total_lines_added += inserted
  498. self.total_lines_removed += deleted
  499. else:
  500. print 'Warning: failed to handle line "%s"' % line
  501. (files, inserted, deleted) = (0, 0, 0)
  502. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  503. self.total_lines += total_lines
  504. # Per-author statistics
  505. # defined for stamp, author only if author commited at this timestamp.
  506. self.changes_by_date_by_author = {} # stamp -> author -> lines_added
  507. # Similar to the above, but never use --first-parent
  508. # (we need to walk through every commit to know who
  509. # committed what, not just through mainline)
  510. lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
  511. lines.reverse()
  512. files = 0; inserted = 0; deleted = 0
  513. author = None
  514. stamp = 0
  515. for line in lines:
  516. if len(line) == 0:
  517. continue
  518. # <stamp> <author>
  519. if re.search('files? changed', line) == None:
  520. pos = line.find(' ')
  521. if pos != -1:
  522. try:
  523. oldstamp = stamp
  524. (stamp, author) = (int(line[:pos]), line[pos+1:])
  525. if oldstamp > stamp:
  526. # clock skew, keep old timestamp to avoid having ugly graph
  527. stamp = oldstamp
  528. if author not in self.authors:
  529. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
  530. self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
  531. self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
  532. self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
  533. if stamp not in self.changes_by_date_by_author:
  534. self.changes_by_date_by_author[stamp] = {}
  535. if author not in self.changes_by_date_by_author[stamp]:
  536. self.changes_by_date_by_author[stamp][author] = {}
  537. self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
  538. self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
  539. files, inserted, deleted = 0, 0, 0
  540. except ValueError:
  541. print 'Warning: unexpected line "%s"' % line
  542. else:
  543. print 'Warning: unexpected line "%s"' % line
  544. else:
  545. numbers = getstatsummarycounts(line);
  546. if len(numbers) == 3:
  547. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  548. else:
  549. print 'Warning: failed to handle line "%s"' % line
  550. (files, inserted, deleted) = (0, 0, 0)
  551. def refine(self):
  552. # authors
  553. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  554. self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  555. self.authors_by_commits.reverse() # most first
  556. for i, name in enumerate(self.authors_by_commits):
  557. self.authors[name]['place_by_commits'] = i + 1
  558. for name in self.authors.keys():
  559. a = self.authors[name]
  560. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  561. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  562. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  563. delta = date_last - date_first
  564. a['date_first'] = date_first.strftime('%Y-%m-%d')
  565. a['date_last'] = date_last.strftime('%Y-%m-%d')
  566. a['timedelta'] = delta
  567. if 'lines_added' not in a: a['lines_added'] = 0
  568. if 'lines_removed' not in a: a['lines_removed'] = 0
  569. def getActiveDays(self):
  570. return self.active_days
  571. def getActivityByDayOfWeek(self):
  572. return self.activity_by_day_of_week
  573. def getActivityByHourOfDay(self):
  574. return self.activity_by_hour_of_day
  575. def getAuthorInfo(self, author):
  576. return self.authors[author]
  577. def getAuthors(self, limit = None):
  578. res = getkeyssortedbyvaluekey(self.authors, 'commits')
  579. res.reverse()
  580. return res[:limit]
  581. def getCommitDeltaDays(self):
  582. return (self.last_commit_stamp / 86400 - self.first_commit_stamp / 86400) + 1
  583. def getDomainInfo(self, domain):
  584. return self.domains[domain]
  585. def getDomains(self):
  586. return self.domains.keys()
  587. def getFirstCommitDate(self):
  588. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  589. def getLastCommitDate(self):
  590. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  591. def getTags(self):
  592. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  593. return lines.split('\n')
  594. def getTagDate(self, tag):
  595. return self.revToDate('tags/' + tag)
  596. def getTotalAuthors(self):
  597. return self.total_authors
  598. def getTotalCommits(self):
  599. return self.total_commits
  600. def getTotalFiles(self):
  601. return self.total_files
  602. def getTotalLOC(self):
  603. return self.total_lines
  604. def getTotalSize(self):
  605. return self.total_size
  606. def revToDate(self, rev):
  607. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  608. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  609. class ReportCreator:
  610. """Creates the actual report based on given data."""
  611. def __init__(self):
  612. pass
  613. def create(self, data, path):
  614. self.data = data
  615. self.path = path
  616. def html_linkify(text):
  617. return text.lower().replace(' ', '_')
  618. def html_header(level, text):
  619. name = html_linkify(text)
  620. return '\n<h%d id="%s"><a href="#%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  621. class HTMLReportCreator(ReportCreator):
  622. def create(self, data, path):
  623. ReportCreator.create(self, data, path)
  624. self.title = data.projectname
  625. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  626. binarypath = os.path.dirname(os.path.abspath(__file__))
  627. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  628. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  629. for file in (conf['style'], 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  630. for base in basedirs:
  631. src = base + '/' + file
  632. if os.path.exists(src):
  633. shutil.copyfile(src, path + '/' + file)
  634. break
  635. else:
  636. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  637. f = open(path + "/index.html", 'w')
  638. format = '%Y-%m-%d %H:%M:%S'
  639. self.printHeader(f)
  640. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  641. # New function for TSV write ins, put this in a more organized place later
  642. def writeHeaderstoNewFile(fileName,headers, delimiter):
  643. """
  644. Writes the headers to the first line of the file
  645. Args:
  646. fileName (String): Name of the destination file, ex: "data.tsv"
  647. headers (List(String)): Headers to be written, ex: ["header1","header2"....]
  648. """
  649. # assert fileName[-4:] ==".tsv", "fileName must be '.tsv' file not '%s'" %(fileName)
  650. f = open (fileName,"w")
  651. for headerIndex in range(len(headers)):
  652. if headerIndex!=len(headers)-1:
  653. # write header along with\t
  654. f.write(headers[headerIndex]+delimiter)
  655. else:
  656. # write last word along with\n
  657. f.write(headers[len(headers)-1]+"\n")
  658. f.close()
  659. self.printNav(f)
  660. writeHeaderstoNewFile(path + "/general.tsv", ['header','data'],"\t")
  661. general_tsv=open(path + "/general.tsv", "a+")
  662. f.write('<dl>')
  663. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  664. general_tsv.write('Project Name\t%s\n'%(data.projectname))
  665. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  666. general_tsv.write('Generated\t%s (in %d seconds)\n'%((datetime.datetime.now().strftime(format), time.time() - data.getStampCreated())))
  667. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
  668. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  669. general_tsv.write('Report Period\t%s to %s\n'%(data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  670. f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
  671. general_tsv.write('Age\t%d days, %d active days (%3.2f%%)\n'% (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
  672. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  673. general_tsv.write('Total Files\t%s\n'% data.getTotalFiles())
  674. f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
  675. general_tsv.write('Total Lines of Code\t%s (%d added, %d removed)\n' %(data.getTotalLOC(),data.total_lines_added, data.total_lines_removed))
  676. f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
  677. general_tsv.write('Total Commits\t%s (average %.1f commits per active day, %.1f per all days)\n' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
  678. f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
  679. general_tsv.write('Authors\t%s (average %.1f commits per author)\n' % (data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
  680. f.write('</dl>')
  681. f.write('</body>\n</html>')
  682. f.close()
  683. ###
  684. # Activity
  685. f = open(path + '/activity.html', 'w')
  686. self.printHeader(f)
  687. f.write('<h1>Activity</h1>')
  688. self.printNav(f)
  689. #f.write('<h2>Last 30 days</h2>')
  690. #f.write('<h2>Last 12 months</h2>')
  691. # Weekly activity
  692. WEEKS = 32
  693. f.write(html_header(2, 'Weekly activity'))
  694. f.write('<p>Last %d weeks</p>' % WEEKS)
  695. writeHeaderstoNewFile(path + "/day_of_week_TEST.tsv", ['day_number','day_name','commits'],"\t")
  696. day_of_week_tsv=open(path + "/day_of_week_TEST.tsv", "a+")
  697. writeHeaderstoNewFile(path + "/weekly_actvitity_TEST.tsv", ['days, commits'], "\t")
  698. # generate weeks to show (previous N weeks from now)
  699. now = datetime.datetime.now()
  700. deltaweek = datetime.timedelta(7)
  701. weeks = []
  702. stampcur = now
  703. for i in range(0, WEEKS):
  704. weeks.insert(0, stampcur.strftime('%Y-%W'))
  705. stampcur -= deltaweek
  706. # top row: commits & bar
  707. f.write('<table class="noborders"><tr>')
  708. for i in range(0, WEEKS):
  709. commits = 0
  710. if weeks[i] in data.activity_by_year_week:
  711. commits = data.activity_by_year_week[weeks[i]]
  712. percentage = 0
  713. if weeks[i] in data.activity_by_year_week:
  714. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  715. height = max(1, int(200 * percentage))
  716. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  717. # bottom row: year/week
  718. f.write('</tr><tr>')
  719. for i in range(0, WEEKS):
  720. f.write('<td>%s</td>' % (WEEKS - i))
  721. f.write('</tr></table>')
  722. # Hour of Day
  723. f.write(html_header(2, 'Hour of Day'))
  724. hour_of_day = data.getActivityByHourOfDay()
  725. f.write('<table><tr><th>Hour</th>')
  726. for i in range(0, 24):
  727. f.write('<th>%d</th>' % i)
  728. f.write('</tr>\n<tr><th>Commits</th>')
  729. fp = open(path + '/hour_of_day.dat', 'w')
  730. for i in range(0, 24):
  731. if i in hour_of_day:
  732. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  733. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  734. fp.write('%d %d\n' % (i, hour_of_day[i]))
  735. else:
  736. f.write('<td>0</td>')
  737. fp.write('%d 0\n' % i)
  738. fp.close()
  739. f.write('</tr>\n<tr><th>%</th>')
  740. totalcommits = data.getTotalCommits()
  741. for i in range(0, 24):
  742. if i in hour_of_day:
  743. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  744. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  745. else:
  746. f.write('<td>0.00</td>')
  747. f.write('</tr></table>')
  748. f.write('<img src="hour_of_day.png" alt="Hour of Day">')
  749. fg = open(path + '/hour_of_day.dat', 'w')
  750. for i in range(0, 24):
  751. if i in hour_of_day:
  752. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  753. else:
  754. fg.write('%d 0\n' % (i + 1))
  755. fg.close()
  756. # Day of Week
  757. writeHeaderstoNewFile(path + "/day_of_week_TEST.tsv", ['day_number','day_name','commits'],"\t")
  758. day_of_week_tsv=open(path + "/day_of_week_TEST.tsv", "a+")
  759. # FOR TABLE OUTPUT, NEED TO TEST THIS, THINK I CAN GET AROUND THIS USING TSVS
  760. writeHeaderstoNewFile(path +"/day_of_week_TABLE.csv", ['Day_Num','Day_Name', 'Commits'], ',')
  761. day_of_week_TABLE=open(path + "/day_of_week_TABLE.csv", "a+")
  762. f.write(html_header(2, 'Day of Week'))
  763. day_of_week = data.getActivityByDayOfWeek()
  764. f.write('<div class="vtable"><table>')
  765. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  766. fp = open(path + '/day_of_week.dat', 'w')
  767. for d in range(0, 7):
  768. commits = 0
  769. if d in day_of_week:
  770. commits = day_of_week[d]
  771. fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
  772. # WRITE TO TSV, add +1, may cause off by one err
  773. day_of_week_tsv.write("%d\t%s\t%d\n" %(d+1, WEEKDAYS[d],commits))
  774. day_of_week_TABLE.write("%d,%s,%d\n" %(d+1, WEEKDAYS[d],commits))
  775. f.write('<tr>')
  776. f.write('<th>%s</th>' % (WEEKDAYS[d]))
  777. if d in day_of_week:
  778. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  779. else:
  780. f.write('<td>0</td>')
  781. f.write('</tr>')
  782. f.write('</table></div>')
  783. f.write('<img src="day_of_week.png" alt="Day of Week">')
  784. fp.close()
  785. # Hour of Week
  786. writeHeaderstoNewFile( path+"/hour_of_week_TEST.tsv", ['day','hour','value'], "\t")
  787. hour_of_week_TEST=open( path+"/hour_of_week_TEST.tsv", "a+")
  788. f.write(html_header(2, 'Hour of Week'))
  789. f.write('<table>')
  790. f.write('<tr><th>Weekday</th>')
  791. for hour in range(0, 24):
  792. f.write('<th>%d</th>' % (hour))
  793. f.write('</tr>')
  794. for weekday in range(0, 7):
  795. f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
  796. for hour in range(0, 24):
  797. try:
  798. commits = data.activity_by_hour_of_week[weekday][hour]
  799. except KeyError:
  800. commits = 0
  801. if commits != 0:
  802. f.write('<td')
  803. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  804. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  805. f.write('>%d</td>' % commits)
  806. hour_of_week_TEST.write("%d\t%d\t%d\n" %(weekday+1,hour+1,commits))
  807. else:
  808. f.write('<td></td>')
  809. hour_of_week_TEST.write("%d\t%d\t%d\n" %(weekday+1,hour+1,0))
  810. f.write('</tr>')
  811. f.write('</table>')
  812. # Month of Year
  813. f.write(html_header(2, 'Month of Year'))
  814. f.write('<div class="vtable"><table>')
  815. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  816. fp = open (path + '/month_of_year.dat', 'w')
  817. for mm in range(1, 13):
  818. commits = 0
  819. if mm in data.activity_by_month_of_year:
  820. commits = data.activity_by_month_of_year[mm]
  821. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  822. fp.write('%d %d\n' % (mm, commits))
  823. fp.close()
  824. f.write('</table></div>')
  825. f.write('<img src="month_of_year.png" alt="Month of Year">')
  826. # Commits by year/month
  827. # TEST THIS
  828. writeHeaderstoNewFile(path+"/commits_by_year_month_TABLE.tsv", ['Month','Commits','Lines added','Lines removed'], ",")
  829. commits_by_year_month_TABLE=open( path+"/commits_by_year_month_TABLE.tsv", "a+")
  830. f.write(html_header(2, 'Commits by year/month'))
  831. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
  832. for yymm in reversed(sorted(data.commits_by_month.keys())):
  833. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
  834. commits_by_year_month_TABLE.write('%s\t%d\t%d\t%d\n' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
  835. f.write('</table></div>')
  836. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month">')
  837. fg = open(path + '/commits_by_year_month.dat', 'w')
  838. for yymm in sorted(data.commits_by_month.keys()):
  839. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  840. fg.close()
  841. # Commits by year
  842. f.write(html_header(2, 'Commits by Year'))
  843. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
  844. for yy in reversed(sorted(data.commits_by_year.keys())):
  845. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, data.commits_by_year.get(yy,0), (100.0 * data.commits_by_year.get(yy,0)) / data.getTotalCommits(), data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
  846. f.write('</table></div>')
  847. f.write('<img src="commits_by_year.png" alt="Commits by Year">')
  848. fg = open(path + '/commits_by_year.dat', 'w')
  849. for yy in sorted(data.commits_by_year.keys()):
  850. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  851. fg.close()
  852. # Commits by timezone
  853. f.write(html_header(2, 'Commits by Timezone'))
  854. f.write('<table><tr>')
  855. f.write('<th>Timezone</th><th>Commits</th>')
  856. f.write('</tr>')
  857. max_commits_on_tz = max(data.commits_by_timezone.values())
  858. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  859. commits = data.commits_by_timezone[i]
  860. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  861. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  862. f.write('</table>')
  863. f.write('</body></html>')
  864. f.close()
  865. ###
  866. # Authors
  867. f = open(path + '/authors.html', 'w')
  868. self.printHeader(f)
  869. f.write('<h1>Authors</h1>')
  870. self.printNav(f)
  871. # Authors :: List of authors
  872. writeHeaderstoNewFile(path + "/list_authors.tsv", ['author','commits','pos_lines','neg_lines', 'first_commit', 'last_commit', 'age', 'active_days', 'num_by_commits'],"\t")
  873. list_authors_tsv=open(path + "/list_authors.tsv", "a+")
  874. f.write(html_header(2, 'List of Authors'))
  875. authors_file= open(path + "/authors.tsv","w")
  876. f.write('<table class="authors sortable" id="authors">')
  877. f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  878. for author in data.getAuthors(conf['max_authors']):
  879. info = data.getAuthorInfo(author)
  880. authors_file.write(author+"\t")
  881. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
  882. # write tabular data to tsv
  883. list_authors_tsv.write("%s\t%d (%.2f%%)\t %d\t%d\t%s\t%s\t%s\t%d\t%d\n" %(author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
  884. f.write('</table>')
  885. # authors_file.write("\n")
  886. allauthors = data.getAuthors()
  887. if len(allauthors) > conf['max_authors']:
  888. rest = allauthors[conf['max_authors']:]
  889. f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
  890. f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
  891. f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author">')
  892. if len(allauthors) > conf['max_authors']:
  893. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  894. f.write(html_header(2, 'Commits per Author'))
  895. f.write('<img src="commits_by_author.png" alt="Commits per Author">')
  896. if len(allauthors) > conf['max_authors']:
  897. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  898. fgl = open(path + '/lines_of_code_by_author.dat', 'w')
  899. fgc = open(path + '/commits_by_author.dat', 'w')
  900. lines_by_authors = {} # cumulated added lines by
  901. # author. to save memory,
  902. # changes_by_date_by_author[stamp][author] is defined
  903. # only at points where author commits.
  904. # lines_by_authors allows us to generate all the
  905. # points in the .dat file.
  906. # Don't rely on getAuthors to give the same order each
  907. # time. Be robust and keep the list in a variable.
  908. commits_by_authors = {} # cumulated added lines by
  909. self.authors_to_plot = data.getAuthors(conf['max_authors'])
  910. for author in self.authors_to_plot:
  911. lines_by_authors[author] = 0
  912. commits_by_authors[author] = 0
  913. for stamp in sorted(data.changes_by_date_by_author.keys()):
  914. fgl.write('%d' % stamp)
  915. fgc.write('%d' % stamp)
  916. # For easy ctrl f search of the downloaded file, give the proper non-unix timestamp, NEEDS TESTING
  917. stamp_converted = datetime.datetime.fromtimestamp(int(stamp)).strftime('%Y-%m-%d %H:%M:%S')
  918. fgl.write('%s' % stamp_converted)
  919. fgc.write('%s' % stamp_converted)
  920. # hour_of_week_TEST.write("%d\t" %(stamp))
  921. # commits_by_author_TEST.write("%d\t" %(stamp))
  922. for author in self.authors_to_plot:
  923. if author in data.changes_by_date_by_author[stamp].keys():
  924. lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
  925. commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
  926. fgl.write(' %d' % lines_by_authors[author])
  927. fgc.write(' %d' % commits_by_authors[author])
  928. # hour_of_week_TEST.write("%d\t" % lines_by_authors[author] )
  929. # commits_by_author_TEST.write("%d\t" % commits_by_authors[author] )
  930. # hour_of_week_TEST.write("\n")
  931. # commits_by_author_TEST.write("\n")
  932. fgl.write('\n')
  933. fgc.write('\n')
  934. fgl.close()
  935. fgc.close()
  936. # Authors :: Author of Month
  937. f.write(html_header(2, 'Author of Month'))
  938. f.write('<table class="sortable" id="aom">')
  939. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  940. for yymm in reversed(sorted(data.author_of_month.keys())):
  941. authordict = data.author_of_month[yymm]
  942. authors = getkeyssortedbyvalues(authordict)
  943. authors.reverse()
  944. commits = data.author_of_month[yymm][authors[0]]
  945. next = ', '.join(authors[1:conf['authors_top']+1])
  946. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))
  947. f.write('</table>')
  948. f.write(html_header(2, 'Author of Year'))
  949. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  950. for yy in reversed(sorted(data.author_of_year.keys())):
  951. authordict = data.author_of_year[yy]
  952. authors = getkeyssortedbyvalues(authordict)
  953. authors.reverse()
  954. commits = data.author_of_year[yy][authors[0]]
  955. next = ', '.join(authors[1:conf['authors_top']+1])
  956. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
  957. f.write('</table>')
  958. # Domains
  959. f.write(html_header(2, 'Commits by Domains'))
  960. domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
  961. domains_by_commits.reverse() # most first
  962. f.write('<div class="vtable"><table>')
  963. f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
  964. fp = open(path + '/domains.dat', 'w')
  965. n = 0
  966. for domain in domains_by_commits:
  967. if n == conf['max_domains']:
  968. break
  969. commits = 0
  970. n += 1
  971. info = data.getDomainInfo(domain)
  972. fp.write('%s %d %d\n' % (domain, n , info['commits']))
  973. f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], (100.0 * info['commits'] / totalcommits)))
  974. f.write('</table></div>')
  975. f.write('<img src="domains.png" alt="Commits by Domains">')
  976. fp.close()
  977. f.write('</body></html>')
  978. f.close()
  979. ###
  980. # Files
  981. f = open(path + '/files.html', 'w')
  982. self.printHeader(f)
  983. f.write('<h1>Files</h1>')
  984. self.printNav(f)
  985. f.write('<dl>\n')
  986. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  987. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  988. try:
  989. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % (float(data.getTotalSize()) / data.getTotalFiles()))
  990. except ZeroDivisionError:
  991. pass
  992. f.write('</dl>\n')
  993. # Files :: File count by date
  994. f.write(html_header(2, 'File count by date'))
  995. # use set to get rid of duplicate/unnecessary entries
  996. files_by_date = set()
  997. for stamp in sorted(data.files_by_stamp.keys()):
  998. files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  999. fg = open(path + '/files_by_date.dat', 'w')
  1000. for line in sorted(list(files_by_date)):
  1001. fg.write('%s\n' % line)
  1002. #for stamp in sorted(data.files_by_stamp.keys()):
  1003. # fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  1004. fg.close()
  1005. f.write('<img src="files_by_date.png" alt="Files by Date">')
  1006. #f.write('<h2>Average file size by date</h2>')
  1007. # Files :: Extensions
  1008. writeHeaderstoNewFile(path + "/extensions.tsv", ['extension','files','lines','lines_file'],"\t")
  1009. extensions_tsv=open(path + "/extensions.tsv", "a+")
  1010. f.write(html_header(2, 'Extensions'))
  1011. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  1012. for ext in sorted(data.extensions.keys()):
  1013. files = data.extensions[ext]['files']
  1014. lines = data.extensions[ext]['lines']
  1015. try:
  1016. loc_percentage = (100.0 * lines) / data.getTotalLOC()
  1017. except ZeroDivisionError:
  1018. loc_percentage = 0
  1019. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, loc_percentage, lines / files))
  1020. # write extension data to extensions.tsv
  1021. extensions_tsv.write('%s\t%d (%.2f%%)\t%d (%.2f%%)\t%d\n' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, loc_percentage, lines / files))
  1022. f.write('</table>')
  1023. f.write('</body></html>')
  1024. f.close()
  1025. ###
  1026. # Lines
  1027. f = open(path + '/lines.html', 'w')
  1028. self.printHeader(f)
  1029. f.write('<h1>Lines</h1>')
  1030. self.printNav(f)
  1031. f.write('<dl>\n')
  1032. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  1033. f.write('</dl>\n')
  1034. f.write(html_header(2, 'Lines of Code'))
  1035. f.write('<img src="lines_of_code.png" alt="Lines of Code">')
  1036. fg = open(path + '/lines_of_code.dat', 'w')
  1037. for stamp in sorted(data.changes_by_date.keys()):
  1038. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  1039. fg.close()
  1040. f.write('</body></html>')
  1041. f.close()
  1042. ###
  1043. # tags.html
  1044. writeHeaderstoNewFile(path + "/tags.tsv", ['name','date','commits','authors'],"\t")
  1045. tags_tsv=open(path + "/tags.tsv", "a+")
  1046. f = open(path + '/tags.html', 'w')
  1047. self.printHeader(f)
  1048. f.write('<h1>Tags</h1>')
  1049. self.printNav(f)
  1050. f.write('<dl>')
  1051. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  1052. if len(data.tags) > 0:
  1053. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  1054. f.write('</dl>')
  1055. f.write('<table class="tags">')
  1056. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  1057. # sort the tags by date desc
  1058. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  1059. for tag in tags_sorted_by_date_desc:
  1060. authorinfo = []
  1061. self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  1062. for i in reversed(self.authors_by_commits):
  1063. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  1064. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  1065. tags_tsv.write("%s\t%s\t%d\t%s\n" %(tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  1066. f.write('</table>')
  1067. f.write('</body></html>')
  1068. f.close()
  1069. self.createGraphs(path)
  1070. def createGraphs(self, path):
  1071. print 'Generating graphs...'
  1072. # hour of day
  1073. f = open(path + '/hour_of_day.plot', 'w')
  1074. f.write(GNUPLOT_COMMON)
  1075. f.write(
  1076. """
  1077. set output 'hour_of_day.png'
  1078. unset key
  1079. set xrange [0.5:24.5]
  1080. set yrange [0:]
  1081. set xtics 4
  1082. set grid y
  1083. set ylabel "Commits"
  1084. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  1085. """)
  1086. f.close()
  1087. # day of week
  1088. f = open(path + '/day_of_week.plot', 'w')
  1089. f.write(GNUPLOT_COMMON)
  1090. f.write(
  1091. """
  1092. set output 'day_of_week.png'
  1093. unset key
  1094. set xrange [0.5:7.5]
  1095. set yrange [0:]
  1096. set xtics 1
  1097. set grid y
  1098. set ylabel "Commits"
  1099. plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
  1100. """)
  1101. f.close()
  1102. # Domains
  1103. f = open(path + '/domains.plot', 'w')
  1104. f.write(GNUPLOT_COMMON)
  1105. f.write(
  1106. """
  1107. set output 'domains.png'
  1108. unset key
  1109. unset xtics
  1110. set yrange [0:]
  1111. set grid y
  1112. set ylabel "Commits"
  1113. plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
  1114. """)
  1115. f.close()
  1116. # Month of Year
  1117. f = open(path + '/month_of_year.plot', 'w')
  1118. f.write(GNUPLOT_COMMON)
  1119. f.write(
  1120. """
  1121. set output 'month_of_year.png'
  1122. unset key
  1123. set xrange [0.5:12.5]
  1124. set yrange [0:]
  1125. set xtics 1
  1126. set grid y
  1127. set ylabel "Commits"
  1128. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  1129. """)
  1130. f.close()
  1131. # commits_by_year_month
  1132. f = open(path + '/commits_by_year_month.plot', 'w')
  1133. f.write(GNUPLOT_COMMON)
  1134. f.write(
  1135. """
  1136. set output 'commits_by_year_month.png'
  1137. unset key
  1138. set yrange [0:]
  1139. set xdata time
  1140. set timefmt "%Y-%m"
  1141. set format x "%Y-%m"
  1142. set xtics rotate
  1143. set bmargin 5
  1144. set grid y
  1145. set ylabel "Commits"
  1146. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  1147. """)
  1148. f.close()
  1149. # commits_by_year
  1150. f = open(path + '/commits_by_year.plot', 'w')
  1151. f.write(GNUPLOT_COMMON)
  1152. f.write(
  1153. """
  1154. set output 'commits_by_year.png'
  1155. unset key
  1156. set yrange [0:]
  1157. set xtics 1 rotate
  1158. set grid y
  1159. set ylabel "Commits"
  1160. set yrange [0:]
  1161. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  1162. """)
  1163. f.close()
  1164. # Files by date
  1165. f = open(path + '/files_by_date.plot', 'w')
  1166. f.write(GNUPLOT_COMMON)
  1167. f.write(
  1168. """
  1169. set output 'files_by_date.png'
  1170. unset key
  1171. set yrange [0:]
  1172. set xdata time
  1173. set timefmt "%Y-%m-%d"
  1174. set format x "%Y-%m-%d"
  1175. set grid y
  1176. set ylabel "Files"
  1177. set xtics rotate
  1178. set ytics autofreq
  1179. set bmargin 6
  1180. plot 'files_by_date.dat' using 1:2 w steps
  1181. """)
  1182. f.close()
  1183. # Lines of Code
  1184. f = open(path + '/lines_of_code.plot', 'w')
  1185. f.write(GNUPLOT_COMMON)
  1186. f.write(
  1187. """
  1188. set output 'lines_of_code.png'
  1189. unset key
  1190. set yrange [0:]
  1191. set xdata time
  1192. set timefmt "%s"
  1193. set format x "%Y-%m-%d"
  1194. set grid y
  1195. set ylabel "Lines"
  1196. set xtics rotate
  1197. set bmargin 6
  1198. plot 'lines_of_code.dat' using 1:2 w lines
  1199. """)
  1200. f.close()
  1201. # Lines of Code Added per author
  1202. f = open(path + '/lines_of_code_by_author.plot', 'w')
  1203. f.write(GNUPLOT_COMMON)
  1204. f.write(
  1205. """
  1206. set terminal png transparent size 640,480
  1207. set output 'lines_of_code_by_author.png'
  1208. set key left top
  1209. set yrange [0:]
  1210. set xdata time
  1211. set timefmt "%s"
  1212. set format x "%Y-%m-%d"
  1213. set grid y
  1214. set ylabel "Lines"
  1215. set xtics rotate
  1216. set bmargin 6
  1217. plot """
  1218. )
  1219. i = 1
  1220. plots = []
  1221. for a in self.authors_to_plot:
  1222. i = i + 1
  1223. author = a.replace("\"", "\\\"").replace("`", "")
  1224. plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
  1225. f.write(", ".join(plots))
  1226. f.write('\n')
  1227. f.close()
  1228. # Commits per author
  1229. f = open(path + '/commits_by_author.plot', 'w')
  1230. f.write(GNUPLOT_COMMON)
  1231. f.write(
  1232. """
  1233. set terminal png transparent size 640,480
  1234. set output 'commits_by_author.png'
  1235. set key left top
  1236. set yrange [0:]
  1237. set xdata time
  1238. set timefmt "%s"
  1239. set format x "%Y-%m-%d"
  1240. set grid y
  1241. set ylabel "Commits"
  1242. set xtics rotate
  1243. set bmargin 6
  1244. plot """
  1245. )
  1246. i = 1
  1247. plots = []
  1248. for a in self.authors_to_plot:
  1249. i = i + 1
  1250. author = a.replace("\"", "\\\"").replace("`", "")
  1251. plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
  1252. f.write(", ".join(plots))
  1253. f.write('\n')
  1254. f.close()
  1255. os.chdir(path)
  1256. files = glob.glob(path + '/*.plot')
  1257. #for f in files:
  1258. # out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  1259. # if len(out) > 0:
  1260. # print out
  1261. def printHeader(self, f, title = ''):
  1262. f.write(
  1263. """<!DOCTYPE html>
  1264. <html>
  1265. <head>
  1266. <meta charset="UTF-8">
  1267. <title>GitStats - %s</title>
  1268. <link rel="stylesheet" href="%s" type="text/css">
  1269. <meta name="generator" content="GitStats %s">
  1270. <script type="text/javascript" src="sortable.js"></script>
  1271. </head>
  1272. <body>
  1273. """ % (self.title, conf['style'], getversion()))
  1274. def printNav(self, f):
  1275. f.write("""
  1276. <div class="nav">
  1277. <ul>
  1278. <li><a href="index.html">General</a></li>
  1279. <li><a href="activity.html">Activity</a></li>
  1280. <li><a href="authors.html">Authors</a></li>
  1281. <li><a href="files.html">Files</a></li>
  1282. <li><a href="lines.html">Lines</a></li>
  1283. <li><a href="tags.html">Tags</a></li>
  1284. </ul>
  1285. </div>
  1286. """)
  1287. def usage():
  1288. print """
  1289. Usage: gitstats [options] <gitpath..> <outputpath>
  1290. Options:
  1291. -c key=value Override configuration value
  1292. Default config values:
  1293. %s
  1294. Please see the manual page for more details.
  1295. """ % conf
  1296. class GitStats:
  1297. def run(self, args_orig):
  1298. optlist, args = getopt.getopt(args_orig, 'hc:', ["help"])
  1299. for o,v in optlist:
  1300. if o == '-c':
  1301. key, value = v.split('=', 1)
  1302. if key not in conf:
  1303. raise KeyError('no such key "%s" in config' % key)
  1304. if isinstance(conf[key], int):
  1305. conf[key] = int(value)
  1306. else:
  1307. conf[key] = value
  1308. elif o in ('-h', '--help'):
  1309. usage()
  1310. sys.exit()
  1311. if len(args) < 2:
  1312. usage()
  1313. sys.exit(0)
  1314. outputpath = os.path.abspath(args[-1])
  1315. # execute git2json for top files
  1316. os.system("cd "+os.path.abspath(args[0])+" && git2json > '"+os.path.abspath(args[-1])+"/top_committed_files.json'")
  1317. rundir = os.getcwd()
  1318. try:
  1319. os.makedirs(outputpath)
  1320. except OSError:
  1321. pass
  1322. if not os.path.isdir(outputpath):
  1323. print 'FATAL: Output path is not a directory or does not exist'
  1324. sys.exit(1)
  1325. if not getgnuplotversion():
  1326. print 'gnuplot not found'
  1327. sys.exit(1)
  1328. print 'Output path: %s' % outputpath
  1329. cachefile = os.path.join(outputpath, 'gitstats.cache')
  1330. data = GitDataCollector()
  1331. data.loadCache(cachefile)
  1332. for gitpath in args[0:-1]:
  1333. print 'Git path: %s' % gitpath
  1334. prevdir = os.getcwd()
  1335. os.chdir(gitpath)
  1336. print 'Collecting data...'
  1337. data.collect(gitpath)
  1338. os.chdir(prevdir)
  1339. print 'Refining data...'
  1340. data.saveCache(cachefile)
  1341. data.refine()
  1342. os.chdir(rundir)
  1343. print 'Generating report...'
  1344. report = HTMLReportCreator()
  1345. report.create(data, outputpath)
  1346. time_end = time.time()
  1347. exectime_internal = time_end - time_start
  1348. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)
  1349. if sys.stdin.isatty():
  1350. print 'You may now run:'
  1351. print
  1352. print ' sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''")
  1353. print
  1354. if __name__=='__main__':
  1355. g = GitStats()
  1356. g.run(sys.argv[1:])