gitstats.py 168KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589
  1. import datetime
  2. import getopt
  3. import glob
  4. import os
  5. import pickle
  6. import platform
  7. import re
  8. import shutil
  9. import subprocess
  10. import sys
  11. import time
  12. import zlib
  13. from collections import defaultdict
  14. from fpdf import FPDF
  15. from fpdf.enums import XPos, YPos
  16. if sys.version_info < (3, 6):
  17. print("Python 3.6 or higher is required for gitstats", file=sys.stderr)
  18. sys.exit(1)
  19. from multiprocessing import Pool
  20. os.environ['LC_ALL'] = 'C'
  21. GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
  22. ON_LINUX = (platform.system() == 'Linux')
  23. WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
  24. exectime_internal = 0.0
  25. exectime_external = 0.0
  26. time_start = time.time()
  27. # By default, gnuplot is searched from path, but can be overridden with the
  28. # environment variable "GNUPLOT"
  29. gnuplot_cmd = 'gnuplot'
  30. if 'GNUPLOT' in os.environ:
  31. gnuplot_cmd = os.environ['GNUPLOT']
  32. conf = {
  33. 'max_domains': 10,
  34. 'max_ext_length': 10,
  35. 'style': 'gitstats.css',
  36. 'max_authors': 20,
  37. 'authors_top': 5,
  38. 'commit_begin': '',
  39. 'commit_end': 'HEAD',
  40. 'linear_linestats': 1,
  41. 'project_name': '',
  42. 'processes': 8,
  43. 'start_date': '',
  44. 'debug': False,
  45. 'verbose': False
  46. }
  47. def getpipeoutput(cmds, quiet = False):
  48. global exectime_external
  49. start = time.time()
  50. # Basic input validation to prevent command injection
  51. for cmd in cmds:
  52. if not isinstance(cmd, str):
  53. raise TypeError("Commands must be strings")
  54. # Check for obvious command injection attempts
  55. if any(dangerous in cmd for dangerous in [';', '&&', '||', '`', '$(']):
  56. print(f'Warning: Potentially dangerous command detected: {cmd}')
  57. if (not quiet and ON_LINUX and os.isatty(1)) or conf['verbose']:
  58. print('>> ' + ' | '.join(cmds), end='')
  59. sys.stdout.flush()
  60. p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  61. processes=[p]
  62. for x in cmds[1:]:
  63. p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True)
  64. processes.append(p)
  65. output = p.communicate()[0]
  66. for p in processes:
  67. p.wait()
  68. end = time.time()
  69. if not quiet or conf['verbose'] or conf['debug']:
  70. if ON_LINUX and os.isatty(1):
  71. print('\r', end='')
  72. print('[%.5f] >> %s' % (end - start, ' | '.join(cmds)))
  73. if conf['debug']:
  74. print(f'DEBUG: Command output ({len(output)} bytes): {output[:200].decode("utf-8", errors="replace")}...')
  75. exectime_external += (end - start)
  76. return output.decode('utf-8', errors='replace').rstrip('\n')
  77. def getlogrange(defaultrange = 'HEAD', end_only = True):
  78. commit_range = getcommitrange(defaultrange, end_only)
  79. if len(conf['start_date']) > 0:
  80. return '--since="%s" "%s"' % (conf['start_date'], commit_range)
  81. return commit_range
  82. def getcommitrange(defaultrange = 'HEAD', end_only = False):
  83. if len(conf['commit_end']) > 0:
  84. if end_only or len(conf['commit_begin']) == 0:
  85. return conf['commit_end']
  86. return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
  87. return defaultrange
  88. def getkeyssortedbyvalues(dict):
  89. return list(map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items()))))
  90. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  91. def getkeyssortedbyvaluekey(d, key):
  92. return list(map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys()))))
  93. def getstatsummarycounts(line):
  94. numbers = re.findall(r'\d+', line)
  95. if len(numbers) == 1:
  96. # neither insertions nor deletions: may probably only happen for "0 files changed"
  97. numbers.append(0);
  98. numbers.append(0);
  99. elif len(numbers) == 2 and line.find('(+)') != -1:
  100. numbers.append(0); # only insertions were printed on line
  101. elif len(numbers) == 2 and line.find('(-)') != -1:
  102. numbers.insert(1, 0); # only deletions were printed on line
  103. return numbers
  104. VERSION = 0
  105. def getversion():
  106. global VERSION
  107. if VERSION == 0:
  108. gitstats_repo = os.path.dirname(os.path.abspath(__file__))
  109. VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" %
  110. (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])])
  111. return VERSION
  112. def getgitversion():
  113. return getpipeoutput(['git --version']).split('\n')[0]
  114. def getgnuplotversion():
  115. return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
  116. def getnumoffilesfromrev(time_rev):
  117. """
  118. Get number of files changed in commit
  119. """
  120. time, rev = time_rev
  121. return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))
  122. def getnumoflinesinblob(ext_blob):
  123. """
  124. Get number of lines in blob
  125. """
  126. ext, blob_id = ext_blob
  127. return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0]))
  128. def analyzesloc(ext_blob):
  129. """
  130. Analyze source lines of code vs comments vs blank lines in a blob
  131. Returns (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines)
  132. """
  133. ext, blob_id = ext_blob
  134. content = getpipeoutput(['git cat-file blob %s' % blob_id])
  135. total_lines = 0
  136. source_lines = 0
  137. comment_lines = 0
  138. blank_lines = 0
  139. # Define comment patterns for different file types
  140. comment_patterns = {
  141. '.py': [r'^\s*#', r'^\s*"""', r'^\s*\'\'\''],
  142. '.js': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
  143. '.ts': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
  144. '.java': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
  145. '.cpp': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
  146. '.c': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
  147. '.h': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
  148. '.css': [r'^\s*/\*', r'^\s*\*'],
  149. '.html': [r'^\s*<!--', r'^\s*<!\-\-'],
  150. '.xml': [r'^\s*<!--', r'^\s*<!\-\-'],
  151. '.sh': [r'^\s*#'],
  152. '.rb': [r'^\s*#'],
  153. '.pl': [r'^\s*#'],
  154. '.php': [r'^\s*//', r'^\s*/\*', r'^\s*\*', r'^\s*#'],
  155. }
  156. import re
  157. patterns = comment_patterns.get(ext, [])
  158. for line in content.split('\n'):
  159. total_lines += 1
  160. line_stripped = line.strip()
  161. if not line_stripped:
  162. blank_lines += 1
  163. elif any(re.match(pattern, line) for pattern in patterns):
  164. comment_lines += 1
  165. else:
  166. source_lines += 1
  167. return (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines)
  168. class DataCollector:
  169. """Manages data collection from a revision control repository."""
  170. def __init__(self):
  171. self.stamp_created = time.time()
  172. self.cache = {}
  173. self.total_authors = 0
  174. self.activity_by_hour_of_day = defaultdict(int) # hour -> commits
  175. self.activity_by_day_of_week = defaultdict(int) # day -> commits
  176. self.activity_by_month_of_year = defaultdict(int) # month [1-12] -> commits
  177. self.activity_by_hour_of_week = defaultdict(lambda: defaultdict(int)) # weekday -> hour -> commits
  178. self.activity_by_hour_of_day_busiest = 0
  179. self.activity_by_hour_of_week_busiest = 0
  180. self.activity_by_year_week = defaultdict(int) # yy_wNN -> commits
  181. self.activity_by_year_week_peak = 0
  182. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
  183. self.total_commits = 0
  184. self.total_files = 0
  185. self.authors_by_commits = 0
  186. # domains
  187. self.domains = defaultdict(lambda: defaultdict(int)) # domain -> commits
  188. # author of the month
  189. self.author_of_month = defaultdict(lambda: defaultdict(int)) # month -> author -> commits
  190. self.author_of_year = defaultdict(lambda: defaultdict(int)) # year -> author -> commits
  191. self.commits_by_month = defaultdict(int) # month -> commits
  192. self.commits_by_year = defaultdict(int) # year -> commits
  193. self.lines_added_by_month = defaultdict(int) # month -> lines added
  194. self.lines_added_by_year = defaultdict(int) # year -> lines added
  195. self.lines_removed_by_month = defaultdict(int) # month -> lines removed
  196. self.lines_removed_by_year = defaultdict(int) # year -> lines removed
  197. self.first_commit_stamp = 0
  198. self.last_commit_stamp = 0
  199. self.last_active_day = None
  200. self.active_days = set()
  201. # lines
  202. self.total_lines = 0
  203. self.total_lines_added = 0
  204. self.total_lines_removed = 0
  205. # SLOC (Source Lines of Code) analysis
  206. self.total_source_lines = 0
  207. self.total_comment_lines = 0
  208. self.total_blank_lines = 0
  209. self.sloc_by_extension = {} # ext -> {'source': 0, 'comments': 0, 'blank': 0, 'total': 0}
  210. # File size and revision tracking
  211. self.file_sizes = {} # filepath -> size in bytes
  212. self.file_revisions = {} # filepath -> revision count
  213. # Directory activity tracking
  214. self.directories = defaultdict(lambda: {'commits': 0, 'lines_added': 0, 'lines_removed': 0, 'files': set()})
  215. self.directory_revisions = defaultdict(int) # directory -> total file revisions in directory
  216. # size
  217. self.total_size = 0
  218. # timezone
  219. self.commits_by_timezone = defaultdict(int) # timezone -> commits
  220. # tags
  221. self.tags = {}
  222. self.files_by_stamp = {} # stamp -> files
  223. # extensions
  224. self.extensions = {} # extension -> files, lines
  225. # line statistics
  226. self.changes_by_date = {} # stamp -> { files, ins, del }
  227. # Pace of Changes tracking (number of line changes happening over time)
  228. self.pace_of_changes = {} # stamp -> total_line_changes (ins + del)
  229. # Last 30 days activity
  230. self.last_30_days_commits = 0
  231. self.last_30_days_lines_added = 0
  232. self.last_30_days_lines_removed = 0
  233. # Last 12 months activity
  234. self.last_12_months_commits = defaultdict(int) # month -> commits
  235. self.last_12_months_lines_added = defaultdict(int) # month -> lines added
  236. self.last_12_months_lines_removed = defaultdict(int) # month -> lines removed
  237. # Repository size tracking
  238. self.repository_size_mb = 0.0
  239. # Branch analysis
  240. self.branches = {} # branch_name -> {'commits': 0, 'lines_added': 0, 'lines_removed': 0, 'authors': {}, 'is_merged': True, 'merge_base': '', 'unique_commits': []}
  241. self.unmerged_branches = [] # list of branch names that are not merged into main branch
  242. self.main_branch = 'master' # will be detected automatically
  243. # Team collaboration analysis
  244. self.author_collaboration = {} # author -> {'worked_with': {other_author: shared_files}, 'file_ownership': {file: change_count}}
  245. self.commit_patterns = {} # author -> {'avg_commit_size': lines, 'small_commits': count, 'large_commits': count, 'commit_frequency': commits_per_day}
  246. self.working_patterns = {} # author -> {'night_commits': count, 'weekend_commits': count, 'peak_hours': [hours], 'timezone_pattern': {tz: count}}
  247. self.impact_analysis = {} # author -> {'critical_files': [files], 'impact_score': score, 'bug_potential': score}
  248. self.team_performance = {} # author -> {'efficiency_score': score, 'consistency': score, 'leadership_score': score}
  249. # File importance tracking
  250. self.critical_files = set() # Files that are likely critical (main.py, app.py, index.html, etc.)
  251. self.file_impact_scores = {} # file -> impact_score based on how often it's changed and by whom
  252. # Time-based analysis
  253. self.commits_by_time_of_day = defaultdict(lambda: defaultdict(int)) # author -> hour -> commits
  254. self.commits_by_day_of_week = defaultdict(lambda: defaultdict(int)) # author -> day -> commits
  255. self.author_active_periods = {} # author -> {'active_days': set, 'longest_streak': days, 'avg_gap': days}
  256. # Quality indicators
  257. self.potential_bug_commits = [] # List of commits that might indicate bugs (reverts, fixes, etc.)
  258. self.refactoring_commits = [] # List of commits that appear to be refactoring
  259. self.feature_commits = [] # List of commits that appear to add features
  260. ##
  261. # This should be the main function to extract data from the repository.
  262. def collect(self, dir):
  263. self.dir = dir
  264. if len(conf['project_name']) == 0:
  265. self.projectname = os.path.basename(os.path.abspath(dir))
  266. else:
  267. self.projectname = conf['project_name']
  268. ##
  269. # Load cacheable data
  270. def loadCache(self, cachefile):
  271. if not os.path.exists(cachefile):
  272. return
  273. print('Loading cache...')
  274. try:
  275. with open(cachefile, 'rb') as f:
  276. try:
  277. self.cache = pickle.loads(zlib.decompress(f.read()))
  278. except (zlib.error, pickle.PickleError) as e:
  279. # temporary hack to upgrade non-compressed caches
  280. try:
  281. f.seek(0)
  282. self.cache = pickle.load(f)
  283. except (pickle.PickleError, EOFError) as e2:
  284. print(f'Warning: Failed to load cache file {cachefile}: {e2}')
  285. self.cache = {}
  286. except Exception as e:
  287. print(f'Warning: Unexpected error loading cache file {cachefile}: {e}')
  288. self.cache = {}
  289. except IOError as e:
  290. print(f'Warning: Could not open cache file {cachefile}: {e}')
  291. self.cache = {}
  292. ##
  293. # Produce any additional statistics from the extracted data.
  294. def refine(self):
  295. pass
  296. ##
  297. # : get a dictionary of author
  298. def getAuthorInfo(self, author):
  299. return None
  300. def getActivityByDayOfWeek(self):
  301. return {}
  302. def getActivityByHourOfDay(self):
  303. return {}
  304. # : get a dictionary of domains
  305. def getDomainInfo(self, domain):
  306. return None
  307. ##
  308. # Get a list of authors
  309. def getAuthors(self):
  310. return []
  311. def getFirstCommitDate(self):
  312. return datetime.datetime.now()
  313. def getLastCommitDate(self):
  314. return datetime.datetime.now()
  315. def getStampCreated(self):
  316. return self.stamp_created
  317. def getTags(self):
  318. return []
  319. def getTotalAuthors(self):
  320. return -1
  321. def getTotalCommits(self):
  322. return -1
  323. def getTotalFiles(self):
  324. return -1
  325. def getTotalLOC(self):
  326. return -1
  327. ##
  328. # Save cacheable data
  329. def saveCache(self, cachefile):
  330. print('Saving cache...')
  331. tempfile = cachefile + '.tmp'
  332. try:
  333. with open(tempfile, 'wb') as f:
  334. #pickle.dump(self.cache, f)
  335. data = zlib.compress(pickle.dumps(self.cache))
  336. f.write(data)
  337. try:
  338. os.remove(cachefile)
  339. except OSError:
  340. pass
  341. os.rename(tempfile, cachefile)
  342. except IOError as e:
  343. print(f'Warning: Could not save cache file {cachefile}: {e}')
  344. # Clean up temp file if it exists
  345. try:
  346. os.remove(tempfile)
  347. except OSError:
  348. pass
  349. class GitDataCollector(DataCollector):
  350. def collect(self, dir):
  351. DataCollector.collect(self, dir)
  352. self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
  353. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  354. # Clear tags for each repository to avoid multirepo contamination
  355. if not hasattr(self, '_first_repo'):
  356. self._first_repo = True
  357. else:
  358. # For subsequent repos, clear tags to avoid mixing
  359. self.tags = {}
  360. # tags
  361. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  362. for line in lines:
  363. if len(line) == 0:
  364. continue
  365. (hash, tag) = line.split(' ')
  366. tag = tag.replace('refs/tags/', '')
  367. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
  368. if len(output) > 0:
  369. parts = output.split(' ')
  370. stamp = 0
  371. try:
  372. stamp = int(parts[0])
  373. except ValueError:
  374. stamp = 0
  375. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  376. # collect info on tags, starting from latest
  377. tags_sorted_by_date_desc = list(map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items())))))
  378. prev = None
  379. for tag in reversed(tags_sorted_by_date_desc):
  380. cmd = 'git shortlog -s "%s"' % tag
  381. if prev != None:
  382. cmd += ' "^%s"' % prev
  383. output = getpipeoutput([cmd])
  384. if len(output) == 0:
  385. continue
  386. prev = tag
  387. for line in output.split('\n'):
  388. parts = re.split(r'\s+', line, maxsplit=2)
  389. commits = int(parts[1])
  390. author = parts[2]
  391. self.tags[tag]['commits'] += commits
  392. self.tags[tag]['authors'][author] = commits
  393. # Collect revision statistics
  394. # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
  395. lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
  396. for line in lines:
  397. parts = line.split(' ', 4)
  398. author = ''
  399. try:
  400. stamp = int(parts[0])
  401. except ValueError:
  402. stamp = 0
  403. timezone = parts[3]
  404. author, mail = parts[4].split('<', 1)
  405. author = author.rstrip()
  406. mail = mail.rstrip('>')
  407. domain = '?'
  408. if mail.find('@') != -1:
  409. domain = mail.rsplit('@', 1)[1]
  410. date = datetime.datetime.fromtimestamp(float(stamp))
  411. # First and last commit stamp (may be in any order because of cherry-picking and patches)
  412. if stamp > self.last_commit_stamp:
  413. self.last_commit_stamp = stamp
  414. if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
  415. self.first_commit_stamp = stamp
  416. # activity
  417. # hour
  418. hour = date.hour
  419. self.activity_by_hour_of_day[hour] += 1
  420. # most active hour?
  421. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  422. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  423. # day of week
  424. day = date.weekday()
  425. self.activity_by_day_of_week[day] += 1
  426. # domain stats
  427. if domain not in self.domains:
  428. self.domains[domain] = defaultdict(int)
  429. # commits
  430. self.domains[domain]['commits'] += 1
  431. # hour of week
  432. self.activity_by_hour_of_week[day][hour] += 1
  433. # most active hour?
  434. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  435. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  436. # month of year
  437. month = date.month
  438. self.activity_by_month_of_year[month] += 1
  439. # yearly/weekly activity
  440. yyw = date.strftime('%Y-%W')
  441. self.activity_by_year_week[yyw] += 1
  442. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  443. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  444. # author stats
  445. if author not in self.authors:
  446. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
  447. # commits, note again that commits may be in any date order because of cherry-picking and patches
  448. if 'last_commit_stamp' not in self.authors[author]:
  449. self.authors[author]['last_commit_stamp'] = stamp
  450. if stamp > self.authors[author]['last_commit_stamp']:
  451. self.authors[author]['last_commit_stamp'] = stamp
  452. if 'first_commit_stamp' not in self.authors[author]:
  453. self.authors[author]['first_commit_stamp'] = stamp
  454. if stamp < self.authors[author]['first_commit_stamp']:
  455. self.authors[author]['first_commit_stamp'] = stamp
  456. # author of the month/year
  457. yymm = date.strftime('%Y-%m')
  458. self.author_of_month[yymm][author] += 1
  459. self.commits_by_month[yymm] += 1
  460. yy = date.year
  461. self.author_of_year[yy][author] += 1
  462. self.commits_by_year[yy] += 1
  463. # authors: active days
  464. yymmdd = date.strftime('%Y-%m-%d')
  465. if 'last_active_day' not in self.authors[author]:
  466. self.authors[author]['last_active_day'] = yymmdd
  467. self.authors[author]['active_days'] = set([yymmdd])
  468. elif yymmdd != self.authors[author]['last_active_day']:
  469. self.authors[author]['last_active_day'] = yymmdd
  470. self.authors[author]['active_days'].add(yymmdd)
  471. # project: active days
  472. if yymmdd != self.last_active_day:
  473. self.last_active_day = yymmdd
  474. self.active_days.add(yymmdd)
  475. # timezone
  476. self.commits_by_timezone[timezone] += 1
  477. # outputs "<stamp> <files>" for each revision
  478. revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
  479. lines = []
  480. revs_to_read = []
  481. time_rev_count = []
  482. #Look up rev in cache and take info from cache if found
  483. #If not append rev to list of rev to read from repo
  484. for revline in revlines:
  485. time, rev = revline.split(' ')
  486. #if cache empty then add time and rev to list of new rev's
  487. #otherwise try to read needed info from cache
  488. if 'files_in_tree' not in self.cache:
  489. revs_to_read.append((time,rev))
  490. continue
  491. if rev in self.cache['files_in_tree']:
  492. lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
  493. else:
  494. revs_to_read.append((time,rev))
  495. #Read revisions from repo
  496. pool = Pool(processes=conf['processes'])
  497. time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
  498. pool.terminate()
  499. pool.join()
  500. #Update cache with new revisions and append then to general list
  501. for (time, rev, count) in time_rev_count:
  502. if 'files_in_tree' not in self.cache:
  503. self.cache['files_in_tree'] = {}
  504. self.cache['files_in_tree'][rev] = count
  505. lines.append('%d %d' % (int(time), count))
  506. self.total_commits += len(lines)
  507. for line in lines:
  508. parts = line.split(' ')
  509. if len(parts) != 2:
  510. continue
  511. (stamp, files) = parts[0:2]
  512. try:
  513. self.files_by_stamp[int(stamp)] = int(files)
  514. except ValueError:
  515. print('Warning: failed to parse line "%s"' % line)
  516. # extensions and size of files
  517. lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
  518. blobs_to_read = []
  519. for line in lines:
  520. if len(line) == 0:
  521. continue
  522. parts = re.split(r'\s+', line, maxsplit=4)
  523. if parts[0] == '160000' and parts[3] == '-':
  524. # skip submodules
  525. continue
  526. blob_id = parts[2]
  527. size = int(parts[3])
  528. fullpath = parts[4]
  529. self.total_size += size
  530. self.total_files += 1
  531. # Track individual file sizes
  532. self.file_sizes[fullpath] = size
  533. filename = fullpath.split('/')[-1] # strip directories
  534. if filename.find('.') == -1 or filename.rfind('.') == 0:
  535. ext = ''
  536. else:
  537. ext = filename[(filename.rfind('.') + 1):]
  538. if len(ext) > conf['max_ext_length']:
  539. ext = ''
  540. if ext not in self.extensions:
  541. self.extensions[ext] = {'files': 0, 'lines': 0}
  542. self.extensions[ext]['files'] += 1
  543. #if cache empty then add ext and blob id to list of new blob's
  544. #otherwise try to read needed info from cache
  545. if 'lines_in_blob' not in self.cache:
  546. blobs_to_read.append((ext,blob_id))
  547. continue
  548. if blob_id in self.cache['lines_in_blob']:
  549. self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
  550. else:
  551. blobs_to_read.append((ext,blob_id))
  552. #Get info abount line count for new blob's that wasn't found in cache
  553. pool = Pool(processes=conf['processes'])
  554. ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
  555. pool.terminate()
  556. pool.join()
  557. # Also get SLOC analysis for the same blobs
  558. pool = Pool(processes=conf['processes'])
  559. ext_blob_sloc = pool.map(analyzesloc, blobs_to_read)
  560. pool.terminate()
  561. pool.join()
  562. #Update cache and write down info about number of number of lines
  563. for (ext, blob_id, linecount) in ext_blob_linecount:
  564. if 'lines_in_blob' not in self.cache:
  565. self.cache['lines_in_blob'] = {}
  566. self.cache['lines_in_blob'][blob_id] = linecount
  567. self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
  568. # Update SLOC statistics
  569. for (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines) in ext_blob_sloc:
  570. # Initialize extension SLOC tracking
  571. if ext not in self.sloc_by_extension:
  572. self.sloc_by_extension[ext] = {'source': 0, 'comments': 0, 'blank': 0, 'total': 0}
  573. # Update extension SLOC counts
  574. self.sloc_by_extension[ext]['source'] += source_lines
  575. self.sloc_by_extension[ext]['comments'] += comment_lines
  576. self.sloc_by_extension[ext]['blank'] += blank_lines
  577. self.sloc_by_extension[ext]['total'] += total_lines
  578. # Update global SLOC counts
  579. self.total_source_lines += source_lines
  580. self.total_comment_lines += comment_lines
  581. self.total_blank_lines += blank_lines
  582. # File revision counting
  583. print('Collecting file revision statistics...')
  584. revision_lines = getpipeoutput(['git log --name-only --pretty=format: %s' % getlogrange('HEAD')]).strip().split('\n')
  585. for line in revision_lines:
  586. line = line.strip()
  587. if len(line) > 0 and not line.startswith('commit'):
  588. # This is a filename
  589. if line not in self.file_revisions:
  590. self.file_revisions[line] = 0
  591. self.file_revisions[line] += 1
  592. # Track directory activity
  593. directory = os.path.dirname(line) if os.path.dirname(line) else '.'
  594. self.directory_revisions[directory] += 1
  595. self.directories[directory]['files'].add(line)
  596. # Directory activity analysis
  597. print('Collecting directory activity statistics...')
  598. numstat_lines = getpipeoutput(['git log --numstat --pretty=format:"%%at %%aN" %s' % getlogrange('HEAD')]).split('\n')
  599. current_author = None
  600. current_timestamp = None
  601. for line in numstat_lines:
  602. line = line.strip()
  603. if not line:
  604. continue
  605. # Check if this is a commit header line (timestamp + author)
  606. if line.count('\t') == 0 and ' ' in line:
  607. try:
  608. parts = line.split(' ', 1)
  609. current_timestamp = int(parts[0])
  610. current_author = parts[1]
  611. continue
  612. except (ValueError, IndexError):
  613. pass
  614. # Check if this is a numstat line (additions\tdeletions\tfilename)
  615. if line.count('\t') >= 2:
  616. parts = line.split('\t')
  617. if len(parts) >= 3:
  618. try:
  619. additions = int(parts[0]) if parts[0] != '-' else 0
  620. deletions = int(parts[1]) if parts[1] != '-' else 0
  621. filename = '\t'.join(parts[2:]) # Handle filenames with tabs
  622. # Track directory activity
  623. directory = os.path.dirname(filename) if os.path.dirname(filename) else '.'
  624. self.directories[directory]['commits'] += 1 # Will be deduplicated later
  625. self.directories[directory]['lines_added'] += additions
  626. self.directories[directory]['lines_removed'] += deletions
  627. self.directories[directory]['files'].add(filename)
  628. except ValueError:
  629. pass
  630. # line statistics
  631. # outputs:
  632. # N files changed, N insertions (+), N deletions(-)
  633. # <stamp> <author>
  634. self.changes_by_date = {} # stamp -> { files, ins, del }
  635. # computation of lines of code by date is better done
  636. # on a linear history.
  637. extra = ''
  638. if conf['linear_linestats']:
  639. extra = '--first-parent -m'
  640. lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
  641. lines.reverse()
  642. files = 0; inserted = 0; deleted = 0; total_lines = 0
  643. author = None
  644. for line in lines:
  645. if len(line) == 0:
  646. continue
  647. # <stamp> <author>
  648. if re.search('files? changed', line) == None:
  649. pos = line.find(' ')
  650. if pos != -1:
  651. try:
  652. (stamp, author) = (int(line[:pos]), line[pos+1:])
  653. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  654. # Track pace of changes (total line changes)
  655. self.pace_of_changes[stamp] = inserted + deleted
  656. date = datetime.datetime.fromtimestamp(stamp)
  657. # Track last 30 days activity
  658. import time as time_mod
  659. now = time_mod.time()
  660. if now - stamp <= 30 * 24 * 3600: # 30 days in seconds
  661. self.last_30_days_commits += 1
  662. self.last_30_days_lines_added += inserted
  663. self.last_30_days_lines_removed += deleted
  664. # Track last 12 months activity
  665. if now - stamp <= 365 * 24 * 3600: # 12 months in seconds
  666. yymm = date.strftime('%Y-%m')
  667. self.last_12_months_commits[yymm] += 1
  668. self.last_12_months_lines_added[yymm] += inserted
  669. self.last_12_months_lines_removed[yymm] += deleted
  670. yymm = date.strftime('%Y-%m')
  671. self.lines_added_by_month[yymm] += inserted
  672. self.lines_removed_by_month[yymm] += deleted
  673. yy = date.year
  674. self.lines_added_by_year[yy] += inserted
  675. self.lines_removed_by_year[yy] += deleted
  676. files, inserted, deleted = 0, 0, 0
  677. except ValueError:
  678. print('Warning: unexpected line "%s"' % line)
  679. else:
  680. print('Warning: unexpected line "%s"' % line)
  681. else:
  682. numbers = getstatsummarycounts(line)
  683. if len(numbers) == 3:
  684. (files, inserted, deleted) = list(map(lambda el : int(el), numbers))
  685. total_lines += inserted
  686. total_lines -= deleted
  687. self.total_lines_added += inserted
  688. self.total_lines_removed += deleted
  689. else:
  690. print('Warning: failed to handle line "%s"' % line)
  691. (files, inserted, deleted) = (0, 0, 0)
  692. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  693. self.total_lines += total_lines
  694. # Per-author statistics
  695. # defined for stamp, author only if author commited at this timestamp.
  696. self.changes_by_date_by_author = {} # stamp -> author -> lines_added
  697. # Similar to the above, but never use --first-parent
  698. # (we need to walk through every commit to know who
  699. # committed what, not just through mainline)
  700. lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
  701. lines.reverse()
  702. files = 0; inserted = 0; deleted = 0
  703. author = None
  704. stamp = 0
  705. for line in lines:
  706. if len(line) == 0:
  707. continue
  708. # <stamp> <author>
  709. if re.search('files? changed', line) == None:
  710. pos = line.find(' ')
  711. if pos != -1:
  712. try:
  713. oldstamp = stamp
  714. (stamp, author) = (int(line[:pos]), line[pos+1:])
  715. if oldstamp > stamp:
  716. # clock skew, keep old timestamp to avoid having ugly graph
  717. stamp = oldstamp
  718. if author not in self.authors:
  719. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
  720. self.authors[author]['commits'] += 1
  721. self.authors[author]['lines_added'] += inserted
  722. self.authors[author]['lines_removed'] += deleted
  723. if stamp not in self.changes_by_date_by_author:
  724. self.changes_by_date_by_author[stamp] = {}
  725. if author not in self.changes_by_date_by_author[stamp]:
  726. self.changes_by_date_by_author[stamp][author] = {}
  727. self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
  728. self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
  729. files, inserted, deleted = 0, 0, 0
  730. except ValueError:
  731. print('Warning: unexpected line "%s"' % line)
  732. else:
  733. print('Warning: unexpected line "%s"' % line)
  734. else:
  735. numbers = getstatsummarycounts(line);
  736. if len(numbers) == 3:
  737. (files, inserted, deleted) = list(map(lambda el : int(el), numbers))
  738. else:
  739. print('Warning: failed to handle line "%s"' % line)
  740. (files, inserted, deleted) = (0, 0, 0)
  741. # Branch analysis - collect unmerged branches and per-branch statistics
  742. if conf['verbose']:
  743. print('Analyzing branches and detecting unmerged branches...')
  744. self._analyzeBranches()
  745. # Calculate repository size (this is slow as noted in TODO)
  746. if conf['verbose']:
  747. print('Calculating repository size...')
  748. try:
  749. # Get .git directory size
  750. git_dir_size = getpipeoutput(['du -sm .git']).split()[0]
  751. self.repository_size_mb = float(git_dir_size)
  752. if conf['verbose']:
  753. print(f'Repository size: {self.repository_size_mb:.1f} MB')
  754. except (ValueError, IndexError):
  755. print('Warning: Could not calculate repository size')
  756. self.repository_size_mb = 0.0
  757. # Perform advanced team analysis
  758. self._analyzeTeamCollaboration()
  759. self._analyzeCommitPatterns()
  760. self._analyzeWorkingPatterns()
  761. self._analyzeImpactAndQuality()
  762. self._calculateTeamPerformanceMetrics()
  763. def _detectMainBranch(self):
  764. """Detect the main branch (master, main, develop, etc.)"""
  765. # Try common main branch names in order of preference
  766. main_branch_candidates = ['master', 'main', 'develop', 'development']
  767. # Get all local branches
  768. branches_output = getpipeoutput(['git branch'])
  769. local_branches = [line.strip().lstrip('* ') for line in branches_output.split('\n') if line.strip()]
  770. # Check if any of the common main branches exist
  771. for candidate in main_branch_candidates:
  772. if candidate in local_branches:
  773. self.main_branch = candidate
  774. return candidate
  775. # If none found, use the first branch or fall back to 'master'
  776. if local_branches:
  777. self.main_branch = local_branches[0]
  778. return local_branches[0]
  779. # Fall back to master
  780. self.main_branch = 'master'
  781. return 'master'
  782. def _analyzeBranches(self):
  783. """Analyze all branches and detect unmerged ones"""
  784. try:
  785. # Detect main branch
  786. main_branch = self._detectMainBranch()
  787. if conf['verbose']:
  788. print(f'Detected main branch: {main_branch}')
  789. # Get all local branches
  790. branches_output = getpipeoutput(['git branch'])
  791. all_branches = [line.strip().lstrip('* ') for line in branches_output.split('\n') if line.strip()]
  792. # Get unmerged branches (branches not merged into main)
  793. try:
  794. unmerged_output = getpipeoutput([f'git branch --no-merged {main_branch}'])
  795. self.unmerged_branches = [line.strip().lstrip('* ') for line in unmerged_output.split('\n')
  796. if line.strip() and not line.strip().startswith('*')]
  797. except:
  798. # If main branch doesn't exist or command fails, assume all branches are unmerged
  799. self.unmerged_branches = [b for b in all_branches if b != main_branch]
  800. if conf['verbose']:
  801. print(f'Found {len(self.unmerged_branches)} unmerged branches: {", ".join(self.unmerged_branches)}')
  802. # Analyze each branch
  803. for branch in all_branches:
  804. if conf['verbose']:
  805. print(f'Analyzing branch: {branch}')
  806. self._analyzeBranch(branch, main_branch)
  807. except Exception as e:
  808. if conf['verbose'] or conf['debug']:
  809. print(f'Warning: Branch analysis failed: {e}')
  810. # Initialize empty structures if analysis fails
  811. self.unmerged_branches = []
  812. self.branches = {}
  813. def _analyzeBranch(self, branch_name, main_branch):
  814. """Analyze a single branch for commits, authors, and line changes"""
  815. try:
  816. # Initialize branch data
  817. self.branches[branch_name] = {
  818. 'commits': 0,
  819. 'lines_added': 0,
  820. 'lines_removed': 0,
  821. 'authors': {},
  822. 'is_merged': branch_name not in self.unmerged_branches,
  823. 'merge_base': '',
  824. 'unique_commits': []
  825. }
  826. # Get merge base with main branch
  827. try:
  828. merge_base = getpipeoutput([f'git merge-base {branch_name} {main_branch}']).strip()
  829. self.branches[branch_name]['merge_base'] = merge_base
  830. except:
  831. self.branches[branch_name]['merge_base'] = ''
  832. # Get commits unique to this branch (not in main branch)
  833. if branch_name != main_branch:
  834. try:
  835. # Get commits that are in branch but not in main
  836. unique_commits_output = getpipeoutput([f'git rev-list {branch_name} ^{main_branch}'])
  837. unique_commits = [line.strip() for line in unique_commits_output.split('\n') if line.strip()]
  838. self.branches[branch_name]['unique_commits'] = unique_commits
  839. # Analyze each unique commit
  840. for commit in unique_commits:
  841. self._analyzeBranchCommit(branch_name, commit)
  842. except:
  843. # If command fails, analyze all commits in the branch
  844. try:
  845. all_commits_output = getpipeoutput([f'git rev-list {branch_name}'])
  846. all_commits = [line.strip() for line in all_commits_output.split('\n') if line.strip()]
  847. self.branches[branch_name]['unique_commits'] = all_commits[:50] # Limit to avoid too much data
  848. for commit in all_commits[:50]:
  849. self._analyzeBranchCommit(branch_name, commit)
  850. except:
  851. pass
  852. else:
  853. # For main branch, count all commits
  854. try:
  855. all_commits_output = getpipeoutput([f'git rev-list {branch_name}'])
  856. all_commits = [line.strip() for line in all_commits_output.split('\n') if line.strip()]
  857. self.branches[branch_name]['commits'] = len(all_commits)
  858. self.branches[branch_name]['unique_commits'] = all_commits[:100] # Limit for performance
  859. except:
  860. pass
  861. except Exception as e:
  862. if conf['debug']:
  863. print(f'Warning: Failed to analyze branch {branch_name}: {e}')
  864. def _analyzeBranchCommit(self, branch_name, commit_hash):
  865. """Analyze a single commit for branch statistics"""
  866. try:
  867. # Get commit author and timestamp
  868. commit_info = getpipeoutput([f'git log -1 --pretty=format:"%aN %at" {commit_hash}'])
  869. if not commit_info:
  870. return
  871. parts = commit_info.rsplit(' ', 1)
  872. if len(parts) != 2:
  873. return
  874. author = parts[0]
  875. try:
  876. timestamp = int(parts[1])
  877. except ValueError:
  878. return
  879. # Update branch commit count
  880. self.branches[branch_name]['commits'] += 1
  881. # Update author statistics for this branch
  882. if author not in self.branches[branch_name]['authors']:
  883. self.branches[branch_name]['authors'][author] = {
  884. 'commits': 0,
  885. 'lines_added': 0,
  886. 'lines_removed': 0
  887. }
  888. self.branches[branch_name]['authors'][author]['commits'] += 1
  889. # Get line changes for this commit
  890. try:
  891. numstat_output = getpipeoutput([f'git show --numstat --format="" {commit_hash}'])
  892. for line in numstat_output.split('\n'):
  893. if line.strip() and '\t' in line:
  894. parts = line.split('\t')
  895. if len(parts) >= 2:
  896. try:
  897. additions = int(parts[0]) if parts[0] != '-' else 0
  898. deletions = int(parts[1]) if parts[1] != '-' else 0
  899. # Update branch statistics
  900. self.branches[branch_name]['lines_added'] += additions
  901. self.branches[branch_name]['lines_removed'] += deletions
  902. # Update author statistics for this branch
  903. self.branches[branch_name]['authors'][author]['lines_added'] += additions
  904. self.branches[branch_name]['authors'][author]['lines_removed'] += deletions
  905. except ValueError:
  906. pass
  907. except:
  908. pass
  909. except Exception as e:
  910. if conf['debug']:
  911. print(f'Warning: Failed to analyze commit {commit_hash}: {e}')
  912. def _analyzeTeamCollaboration(self):
  913. """Analyze how team members collaborate on files and projects"""
  914. if conf['verbose']:
  915. print('Analyzing team collaboration patterns...')
  916. try:
  917. # Get commit details with files changed
  918. commit_data = getpipeoutput(['git log --name-only --pretty=format:"COMMIT:%H:%aN:%at" %s' % getlogrange('HEAD')]).split('\n')
  919. current_commit = None
  920. current_author = None
  921. current_timestamp = None
  922. for line in commit_data:
  923. line = line.strip()
  924. if line.startswith('COMMIT:'):
  925. # Parse commit header: COMMIT:hash:author:timestamp
  926. parts = line.split(':', 3)
  927. if len(parts) >= 4:
  928. current_commit = parts[1]
  929. current_author = parts[2]
  930. try:
  931. current_timestamp = int(parts[3])
  932. except ValueError:
  933. current_timestamp = None
  934. elif line and current_author and not line.startswith('COMMIT:'):
  935. # This is a filename
  936. filename = line
  937. # Initialize author collaboration data
  938. if current_author not in self.author_collaboration:
  939. self.author_collaboration[current_author] = {
  940. 'worked_with': defaultdict(lambda: defaultdict(int)),
  941. 'file_ownership': defaultdict(int)
  942. }
  943. # Track file ownership
  944. self.author_collaboration[current_author]['file_ownership'][filename] += 1
  945. # Track who else worked on this file
  946. file_history = getpipeoutput([f'git log --pretty=format:"%aN" -- "{filename}"']).split('\n')
  947. unique_authors = set(file_history) - {current_author}
  948. for other_author in unique_authors:
  949. if other_author.strip():
  950. self.author_collaboration[current_author]['worked_with'][other_author][filename] += 1
  951. except Exception as e:
  952. if conf['debug']:
  953. print(f'Warning: Team collaboration analysis failed: {e}')
  954. def _analyzeCommitPatterns(self):
  955. """Analyze commit patterns to identify commit behavior (small vs large commits, frequency, etc.)"""
  956. if conf['verbose']:
  957. print('Analyzing commit patterns...')
  958. try:
  959. # Get detailed commit information
  960. commit_lines = getpipeoutput(['git log --shortstat --pretty=format:"COMMIT:%H:%aN:%at:%s" %s' % getlogrange('HEAD')]).split('\n')
  961. current_author = None
  962. current_timestamp = None
  963. current_message = None
  964. author_commits = defaultdict(list)
  965. for line in commit_lines:
  966. line = line.strip()
  967. if line.startswith('COMMIT:'):
  968. # Parse: COMMIT:hash:author:timestamp:subject
  969. parts = line.split(':', 4)
  970. if len(parts) >= 5:
  971. current_author = parts[2]
  972. try:
  973. current_timestamp = int(parts[3])
  974. current_message = parts[4]
  975. except ValueError:
  976. current_timestamp = None
  977. current_message = ""
  978. elif line and current_author and re.search(r'files? changed', line):
  979. # Parse shortstat line
  980. numbers = re.findall(r'\d+', line)
  981. if len(numbers) >= 1:
  982. files_changed = int(numbers[0])
  983. insertions = int(numbers[1]) if len(numbers) > 1 else 0
  984. deletions = int(numbers[2]) if len(numbers) > 2 else 0
  985. total_changes = insertions + deletions
  986. commit_info = {
  987. 'timestamp': current_timestamp,
  988. 'files_changed': files_changed,
  989. 'lines_changed': total_changes,
  990. 'insertions': insertions,
  991. 'deletions': deletions,
  992. 'message': current_message
  993. }
  994. author_commits[current_author].append(commit_info)
  995. # Analyze patterns for each author
  996. for author, commits in author_commits.items():
  997. if not commits:
  998. continue
  999. total_commits = len(commits)
  1000. total_lines = sum(c['lines_changed'] for c in commits)
  1001. avg_commit_size = total_lines / total_commits if total_commits else 0
  1002. # Categorize commits by size
  1003. small_commits = sum(1 for c in commits if c['lines_changed'] < 10)
  1004. medium_commits = sum(1 for c in commits if 10 <= c['lines_changed'] < 100)
  1005. large_commits = sum(1 for c in commits if c['lines_changed'] >= 100)
  1006. # Calculate commit frequency (commits per day)
  1007. if commits:
  1008. timestamps = [c['timestamp'] for c in commits if c['timestamp']]
  1009. if len(timestamps) > 1:
  1010. time_span = max(timestamps) - min(timestamps)
  1011. days_active = time_span / (24 * 3600) if time_span > 0 else 1
  1012. commit_frequency = total_commits / days_active
  1013. else:
  1014. commit_frequency = total_commits
  1015. else:
  1016. commit_frequency = 0
  1017. # Analyze commit messages for patterns
  1018. bug_related = sum(1 for c in commits if any(keyword in c['message'].lower()
  1019. for keyword in ['fix', 'bug', 'error', 'issue', 'patch', 'repair']))
  1020. feature_related = sum(1 for c in commits if any(keyword in c['message'].lower()
  1021. for keyword in ['add', 'new', 'feature', 'implement', 'create']))
  1022. refactor_related = sum(1 for c in commits if any(keyword in c['message'].lower()
  1023. for keyword in ['refactor', 'cleanup', 'reorganize', 'restructure', 'optimize']))
  1024. self.commit_patterns[author] = {
  1025. 'total_commits': total_commits,
  1026. 'avg_commit_size': avg_commit_size,
  1027. 'small_commits': small_commits,
  1028. 'medium_commits': medium_commits,
  1029. 'large_commits': large_commits,
  1030. 'commit_frequency': commit_frequency,
  1031. 'bug_related_commits': bug_related,
  1032. 'feature_related_commits': feature_related,
  1033. 'refactor_related_commits': refactor_related,
  1034. 'avg_files_per_commit': sum(c['files_changed'] for c in commits) / total_commits if total_commits else 0
  1035. }
  1036. except Exception as e:
  1037. if conf['debug']:
  1038. print(f'Warning: Commit pattern analysis failed: {e}')
  1039. def _analyzeWorkingPatterns(self):
  1040. """Analyze when authors typically work (time of day, day of week, timezone patterns)"""
  1041. if conf['verbose']:
  1042. print('Analyzing working time patterns...')
  1043. try:
  1044. # Get commit timestamps with timezone info
  1045. commit_lines = getpipeoutput(['git log --pretty=format:"%aN|%at|%ai|%s" %s' % getlogrange('HEAD')]).split('\n')
  1046. for line in commit_lines:
  1047. if not line.strip():
  1048. continue
  1049. parts = line.split('|', 3)
  1050. if len(parts) < 3:
  1051. continue
  1052. author = parts[0]
  1053. try:
  1054. timestamp = int(parts[1])
  1055. date_str = parts[2] # ISO format with timezone
  1056. message = parts[3] if len(parts) > 3 else ""
  1057. except (ValueError, IndexError):
  1058. continue
  1059. # Parse date and time information
  1060. date = datetime.datetime.fromtimestamp(timestamp)
  1061. hour = date.hour
  1062. day_of_week = date.weekday() # Monday = 0, Sunday = 6
  1063. # Initialize author working patterns
  1064. if author not in self.working_patterns:
  1065. self.working_patterns[author] = {
  1066. 'night_commits': 0, # 22:00 - 06:00
  1067. 'weekend_commits': 0, # Saturday, Sunday
  1068. 'peak_hours': defaultdict(int),
  1069. 'peak_days': defaultdict(int),
  1070. 'timezone_pattern': defaultdict(int),
  1071. 'early_bird': 0, # 05:00 - 09:00
  1072. 'workday': 0, # 09:00 - 17:00
  1073. 'evening': 0, # 17:00 - 22:00
  1074. 'total_commits': 0
  1075. }
  1076. self.working_patterns[author]['total_commits'] += 1
  1077. self.working_patterns[author]['peak_hours'][hour] += 1
  1078. self.working_patterns[author]['peak_days'][day_of_week] += 1
  1079. # Extract timezone from date string
  1080. if '+' in date_str or '-' in date_str:
  1081. tz_part = date_str.split()[-1]
  1082. self.working_patterns[author]['timezone_pattern'][tz_part] += 1
  1083. # Categorize by time of day
  1084. if 22 <= hour or hour < 6:
  1085. self.working_patterns[author]['night_commits'] += 1
  1086. elif 5 <= hour < 9:
  1087. self.working_patterns[author]['early_bird'] += 1
  1088. elif 9 <= hour < 17:
  1089. self.working_patterns[author]['workday'] += 1
  1090. elif 17 <= hour < 22:
  1091. self.working_patterns[author]['evening'] += 1
  1092. # Weekend commits (Saturday = 5, Sunday = 6)
  1093. if day_of_week >= 5:
  1094. self.working_patterns[author]['weekend_commits'] += 1
  1095. # Classify commit types
  1096. if any(keyword in message.lower() for keyword in ['fix', 'bug', 'error', 'patch']):
  1097. if author not in self.potential_bug_commits:
  1098. self.potential_bug_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
  1099. elif any(keyword in message.lower() for keyword in ['refactor', 'cleanup', 'optimize']):
  1100. self.refactoring_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
  1101. elif any(keyword in message.lower() for keyword in ['add', 'new', 'feature', 'implement']):
  1102. self.feature_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
  1103. # Calculate active periods for each author
  1104. for author in self.authors:
  1105. if 'active_days' in self.authors[author]:
  1106. active_days = self.authors[author]['active_days']
  1107. sorted_days = sorted(active_days)
  1108. if len(sorted_days) > 1:
  1109. # Calculate gaps between active days
  1110. gaps = []
  1111. for i in range(1, len(sorted_days)):
  1112. prev_date = datetime.datetime.strptime(sorted_days[i-1], '%Y-%m-%d')
  1113. curr_date = datetime.datetime.strptime(sorted_days[i], '%Y-%m-%d')
  1114. gap = (curr_date - prev_date).days
  1115. gaps.append(gap)
  1116. avg_gap = sum(gaps) / len(gaps) if gaps else 0
  1117. # Find longest streak
  1118. longest_streak = 1
  1119. current_streak = 1
  1120. for gap in gaps:
  1121. if gap == 1:
  1122. current_streak += 1
  1123. longest_streak = max(longest_streak, current_streak)
  1124. else:
  1125. current_streak = 1
  1126. else:
  1127. avg_gap = 0
  1128. longest_streak = 1
  1129. self.author_active_periods[author] = {
  1130. 'active_days_count': len(active_days),
  1131. 'longest_streak': longest_streak,
  1132. 'avg_gap': avg_gap
  1133. }
  1134. except Exception as e:
  1135. if conf['debug']:
  1136. print(f'Warning: Working pattern analysis failed: {e}')
  1137. def _analyzeImpactAndQuality(self):
  1138. """Analyze the impact of changes and identify critical files and potential quality issues"""
  1139. if conf['verbose']:
  1140. print('Analyzing impact and quality indicators...')
  1141. try:
  1142. # Identify critical files based on common patterns
  1143. all_files = getpipeoutput(['git ls-tree -r --name-only %s' % getcommitrange('HEAD', end_only=True)]).split('\n')
  1144. for filepath in all_files:
  1145. if not filepath.strip():
  1146. continue
  1147. filename = os.path.basename(filepath).lower()
  1148. # Mark files as critical based on common patterns
  1149. critical_patterns = [
  1150. 'main.', 'app.', 'index.', 'config.', 'settings.',
  1151. 'setup.', 'package.json', 'requirements.txt', 'Dockerfile',
  1152. 'makefile', 'readme', 'license', '.env'
  1153. ]
  1154. if any(pattern in filename for pattern in critical_patterns):
  1155. self.critical_files.add(filepath)
  1156. # Files in root directory are often critical
  1157. if '/' not in filepath:
  1158. self.critical_files.add(filepath)
  1159. # Analyze file impact scores based on change frequency and author diversity
  1160. file_authors = defaultdict(set)
  1161. file_change_count = defaultdict(int)
  1162. # Get file change history
  1163. log_lines = getpipeoutput(['git log --name-only --pretty=format:"AUTHOR:%aN" %s' % getlogrange('HEAD')]).split('\n')
  1164. current_author = None
  1165. for line in log_lines:
  1166. line = line.strip()
  1167. if line.startswith('AUTHOR:'):
  1168. current_author = line.replace('AUTHOR:', '')
  1169. elif line and current_author and not line.startswith('AUTHOR:'):
  1170. filename = line
  1171. file_authors[filename].add(current_author)
  1172. file_change_count[filename] += 1
  1173. # Calculate impact scores
  1174. for filename in file_change_count:
  1175. change_count = file_change_count[filename]
  1176. author_count = len(file_authors[filename])
  1177. # Impact score based on change frequency and author diversity
  1178. base_score = min(change_count * 10, 100) # Cap at 100
  1179. diversity_bonus = min(author_count * 5, 25) # Bonus for multiple authors
  1180. critical_bonus = 50 if filename in self.critical_files else 0
  1181. impact_score = base_score + diversity_bonus + critical_bonus
  1182. self.file_impact_scores[filename] = impact_score
  1183. # Analyze author impact
  1184. for author in self.authors:
  1185. critical_files_touched = []
  1186. total_impact_score = 0
  1187. # Check which critical files this author touched
  1188. for filename in self.critical_files:
  1189. if author in file_authors.get(filename, set()):
  1190. critical_files_touched.append(filename)
  1191. total_impact_score += self.file_impact_scores.get(filename, 0)
  1192. # Calculate bug potential based on commit messages and patterns
  1193. author_commits = self.commit_patterns.get(author, {})
  1194. bug_commits = author_commits.get('bug_related_commits', 0)
  1195. total_commits = author_commits.get('total_commits', 1)
  1196. bug_ratio = bug_commits / total_commits if total_commits > 0 else 0
  1197. # Higher bug potential if author has many bug-fix commits
  1198. bug_potential = min(bug_ratio * 100, 100)
  1199. self.impact_analysis[author] = {
  1200. 'critical_files': critical_files_touched,
  1201. 'impact_score': total_impact_score,
  1202. 'bug_potential': bug_potential,
  1203. 'high_impact_files': [f for f in file_authors if author in file_authors[f] and self.file_impact_scores.get(f, 0) > 50]
  1204. }
  1205. except Exception as e:
  1206. if conf['debug']:
  1207. print(f'Warning: Impact analysis failed: {e}')
  1208. def _calculateTeamPerformanceMetrics(self):
  1209. """Calculate comprehensive team performance metrics"""
  1210. if conf['verbose']:
  1211. print('Calculating team performance metrics...')
  1212. try:
  1213. total_commits = self.getTotalCommits()
  1214. total_lines_changed = self.total_lines_added + self.total_lines_removed
  1215. for author in self.authors:
  1216. author_info = self.authors[author]
  1217. commit_patterns = self.commit_patterns.get(author, {})
  1218. working_patterns = self.working_patterns.get(author, {})
  1219. impact_info = self.impact_analysis.get(author, {})
  1220. # Efficiency Score (based on lines changed per commit and commit quality)
  1221. avg_commit_size = commit_patterns.get('avg_commit_size', 0)
  1222. total_author_commits = author_info.get('commits', 0)
  1223. # Normalize efficiency (sweet spot is around 20-50 lines per commit)
  1224. if 20 <= avg_commit_size <= 50:
  1225. size_efficiency = 100
  1226. elif avg_commit_size < 20:
  1227. size_efficiency = max(0, avg_commit_size * 5) # Penalty for too small commits
  1228. else:
  1229. size_efficiency = max(0, 100 - (avg_commit_size - 50) * 2) # Penalty for too large commits
  1230. # Quality indicators
  1231. bug_commits = commit_patterns.get('bug_related_commits', 0)
  1232. feature_commits = commit_patterns.get('feature_related_commits', 0)
  1233. refactor_commits = commit_patterns.get('refactor_related_commits', 0)
  1234. quality_score = 0
  1235. if total_author_commits > 0:
  1236. feature_ratio = feature_commits / total_author_commits
  1237. refactor_ratio = refactor_commits / total_author_commits
  1238. bug_ratio = bug_commits / total_author_commits
  1239. quality_score = (feature_ratio * 40 + refactor_ratio * 30 - bug_ratio * 20) * 100
  1240. quality_score = max(0, min(100, quality_score))
  1241. efficiency_score = (size_efficiency * 0.6 + quality_score * 0.4)
  1242. # Consistency Score (based on commit frequency and working patterns)
  1243. commit_frequency = commit_patterns.get('commit_frequency', 0)
  1244. active_periods = self.author_active_periods.get(author, {})
  1245. longest_streak = active_periods.get('longest_streak', 1)
  1246. avg_gap = active_periods.get('avg_gap', 30)
  1247. # Consistency based on regular commits and sustained activity
  1248. frequency_score = min(commit_frequency * 20, 100) # Up to 5 commits per day = max score
  1249. streak_score = min(longest_streak * 5, 100) # Longer streaks = better consistency
  1250. gap_score = max(0, 100 - avg_gap * 3) # Smaller gaps = better consistency
  1251. consistency_score = (frequency_score * 0.4 + streak_score * 0.3 + gap_score * 0.3)
  1252. # Leadership Score (based on impact on critical files, collaboration, and mentoring indicators)
  1253. impact_score = impact_info.get('impact_score', 0)
  1254. critical_files_count = len(impact_info.get('critical_files', []))
  1255. # Collaboration score based on working with others
  1256. collaboration_data = self.author_collaboration.get(author, {})
  1257. worked_with_count = len(collaboration_data.get('worked_with', {}))
  1258. # Normalize impact and collaboration
  1259. impact_leadership = min(impact_score / 10, 100) # Scale impact score
  1260. collaboration_leadership = min(worked_with_count * 10, 100) # Max score at 10 collaborators
  1261. critical_file_leadership = min(critical_files_count * 20, 100) # Max score at 5 critical files
  1262. leadership_score = (impact_leadership * 0.4 + collaboration_leadership * 0.3 + critical_file_leadership * 0.3)
  1263. # Overall contribution percentage
  1264. author_commits = author_info.get('commits', 0)
  1265. contribution_percentage = (author_commits / total_commits * 100) if total_commits > 0 else 0
  1266. # Store performance metrics
  1267. self.team_performance[author] = {
  1268. 'efficiency_score': efficiency_score,
  1269. 'consistency': consistency_score,
  1270. 'leadership_score': leadership_score,
  1271. 'contribution_percentage': contribution_percentage,
  1272. 'overall_score': (efficiency_score * 0.4 + consistency_score * 0.3 + leadership_score * 0.3),
  1273. 'commit_quality_analysis': {
  1274. 'avg_commit_size': avg_commit_size,
  1275. 'small_commits_ratio': commit_patterns.get('small_commits', 0) / total_author_commits if total_author_commits > 0 else 0,
  1276. 'large_commits_ratio': commit_patterns.get('large_commits', 0) / total_author_commits if total_author_commits > 0 else 0,
  1277. 'bug_fix_ratio': bug_commits / total_author_commits if total_author_commits > 0 else 0,
  1278. 'feature_ratio': feature_commits / total_author_commits if total_author_commits > 0 else 0
  1279. }
  1280. }
  1281. except Exception as e:
  1282. if conf['debug']:
  1283. print(f'Warning: Team performance calculation failed: {e}')
  1284. def refine(self):
  1285. # authors
  1286. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  1287. self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  1288. self.authors_by_commits.reverse() # most first
  1289. for i, name in enumerate(self.authors_by_commits):
  1290. self.authors[name]['place_by_commits'] = i + 1
  1291. for name in list(self.authors.keys()):
  1292. a = self.authors[name]
  1293. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  1294. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  1295. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  1296. delta = date_last - date_first
  1297. a['date_first'] = date_first.strftime('%Y-%m-%d')
  1298. a['date_last'] = date_last.strftime('%Y-%m-%d')
  1299. a['timedelta'] = delta
  1300. if 'lines_added' not in a: a['lines_added'] = 0
  1301. if 'lines_removed' not in a: a['lines_removed'] = 0
  1302. def getActiveDays(self):
  1303. return self.active_days
  1304. def getActivityByDayOfWeek(self):
  1305. return self.activity_by_day_of_week
  1306. def getActivityByHourOfDay(self):
  1307. return self.activity_by_hour_of_day
  1308. def getAuthorInfo(self, author):
  1309. return self.authors[author]
  1310. def getAuthors(self, limit = None):
  1311. res = getkeyssortedbyvaluekey(self.authors, 'commits')
  1312. res.reverse()
  1313. return res[:limit]
  1314. def getCommitDeltaDays(self):
  1315. return (self.last_commit_stamp // 86400 - self.first_commit_stamp // 86400) + 1
  1316. def getDomainInfo(self, domain):
  1317. return self.domains[domain]
  1318. def getDomains(self):
  1319. return list(self.domains.keys())
  1320. def getFirstCommitDate(self):
  1321. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  1322. def getLastCommitDate(self):
  1323. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  1324. def getTags(self):
  1325. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  1326. return lines.split('\n')
  1327. def getTagDate(self, tag):
  1328. return self.revToDate('tags/' + tag)
  1329. def getTotalAuthors(self):
  1330. return self.total_authors
  1331. def getTotalCommits(self):
  1332. return self.total_commits
  1333. def getTotalFiles(self):
  1334. return self.total_files
  1335. def getTotalLOC(self):
  1336. return self.total_lines
  1337. def getTotalSourceLines(self):
  1338. return self.total_source_lines
  1339. def getTotalCommentLines(self):
  1340. return self.total_comment_lines
  1341. def getTotalBlankLines(self):
  1342. return self.total_blank_lines
  1343. def getSLOCByExtension(self):
  1344. return self.sloc_by_extension
  1345. def getLargestFiles(self, limit=10):
  1346. """Get the largest files by size."""
  1347. sorted_files = sorted(self.file_sizes.items(), key=lambda x: x[1], reverse=True)
  1348. return sorted_files[:limit]
  1349. def getFilesWithMostRevisions(self, limit=10):
  1350. """Get files with most revisions (hotspots)."""
  1351. sorted_files = sorted(self.file_revisions.items(), key=lambda x: x[1], reverse=True)
  1352. return sorted_files[:limit]
  1353. def getAverageFileSize(self):
  1354. """Get average file size in bytes."""
  1355. if not self.file_sizes:
  1356. return 0.0
  1357. return sum(self.file_sizes.values()) / len(self.file_sizes)
  1358. def getDirectoriesByActivity(self, limit=10):
  1359. """Get directories with most lines changed (added + removed)."""
  1360. if not hasattr(self, 'directories'):
  1361. return []
  1362. directory_activity = []
  1363. for directory, stats in self.directories.items():
  1364. total_lines = stats['lines_added'] + stats['lines_removed']
  1365. file_count = len(stats['files'])
  1366. directory_activity.append((directory, total_lines, stats['lines_added'], stats['lines_removed'], file_count))
  1367. return sorted(directory_activity, key=lambda x: x[1], reverse=True)[:limit]
  1368. def getDirectoriesByRevisions(self, limit=10):
  1369. """Get directories with most file revisions."""
  1370. if not hasattr(self, 'directory_revisions'):
  1371. return []
  1372. sorted_dirs = sorted(self.directory_revisions.items(), key=lambda x: x[1], reverse=True)
  1373. return sorted_dirs[:limit]
  1374. def getAverageRevisionsPerFile(self):
  1375. """Get average number of revisions per file."""
  1376. if not self.file_revisions:
  1377. return 0.0
  1378. return sum(self.file_revisions.values()) / len(self.file_revisions)
  1379. def getTotalSize(self):
  1380. return self.total_size
  1381. def getLast30DaysActivity(self):
  1382. """Get activity stats for last 30 days."""
  1383. return {
  1384. 'commits': self.last_30_days_commits,
  1385. 'lines_added': self.last_30_days_lines_added,
  1386. 'lines_removed': self.last_30_days_lines_removed
  1387. }
  1388. def getLast12MonthsActivity(self):
  1389. """Get activity stats for last 12 months."""
  1390. return {
  1391. 'commits': dict(self.last_12_months_commits),
  1392. 'lines_added': dict(self.last_12_months_lines_added),
  1393. 'lines_removed': dict(self.last_12_months_lines_removed)
  1394. }
  1395. def getPaceOfChanges(self):
  1396. """Get pace of changes (line changes over time)."""
  1397. return self.pace_of_changes
  1398. def getRepositorySize(self):
  1399. """Get repository size in MB."""
  1400. return getattr(self, 'repository_size_mb', 0.0)
  1401. def getBranches(self):
  1402. """Get all branches with their statistics."""
  1403. return self.branches
  1404. def getUnmergedBranches(self):
  1405. """Get list of unmerged branch names."""
  1406. return self.unmerged_branches
  1407. def getMainBranch(self):
  1408. """Get the detected main branch name."""
  1409. return getattr(self, 'main_branch', 'master')
  1410. def getBranchInfo(self, branch_name):
  1411. """Get detailed information about a specific branch."""
  1412. return self.branches.get(branch_name, {})
  1413. def getBranchAuthors(self, branch_name):
  1414. """Get authors who contributed to a specific branch."""
  1415. branch_info = self.branches.get(branch_name, {})
  1416. return branch_info.get('authors', {})
  1417. def getBranchesByCommits(self, limit=None):
  1418. """Get branches sorted by number of commits."""
  1419. sorted_branches = sorted(self.branches.items(),
  1420. key=lambda x: x[1].get('commits', 0),
  1421. reverse=True)
  1422. if limit:
  1423. return sorted_branches[:limit]
  1424. return sorted_branches
  1425. def getBranchesByLinesChanged(self, limit=None):
  1426. """Get branches sorted by total lines changed."""
  1427. sorted_branches = sorted(self.branches.items(),
  1428. key=lambda x: x[1].get('lines_added', 0) + x[1].get('lines_removed', 0),
  1429. reverse=True)
  1430. if limit:
  1431. return sorted_branches[:limit]
  1432. return sorted_branches
  1433. def getUnmergedBranchStats(self):
  1434. """Get statistics for unmerged branches only."""
  1435. unmerged_stats = {}
  1436. for branch_name in self.unmerged_branches:
  1437. if branch_name in self.branches:
  1438. unmerged_stats[branch_name] = self.branches[branch_name]
  1439. return unmerged_stats
  1440. # New methods for advanced team analysis
  1441. def getCommitPatterns(self):
  1442. """Get commit patterns analysis for all authors."""
  1443. return self.commit_patterns
  1444. def getCommitPatternsForAuthor(self, author):
  1445. """Get commit patterns for a specific author."""
  1446. return self.commit_patterns.get(author, {})
  1447. def getWorkingPatterns(self):
  1448. """Get working time patterns for all authors."""
  1449. return self.working_patterns
  1450. def getWorkingPatternsForAuthor(self, author):
  1451. """Get working patterns for a specific author."""
  1452. return self.working_patterns.get(author, {})
  1453. def getTeamCollaboration(self):
  1454. """Get team collaboration analysis."""
  1455. return self.author_collaboration
  1456. def getCollaborationForAuthor(self, author):
  1457. """Get collaboration data for a specific author."""
  1458. return self.author_collaboration.get(author, {})
  1459. def getImpactAnalysis(self):
  1460. """Get impact analysis for all authors."""
  1461. return self.impact_analysis
  1462. def getImpactAnalysisForAuthor(self, author):
  1463. """Get impact analysis for a specific author."""
  1464. return self.impact_analysis.get(author, {})
  1465. def getTeamPerformance(self):
  1466. """Get team performance metrics for all authors."""
  1467. return self.team_performance
  1468. def getTeamPerformanceForAuthor(self, author):
  1469. """Get team performance metrics for a specific author."""
  1470. return self.team_performance.get(author, {})
  1471. def getCriticalFiles(self):
  1472. """Get list of files identified as critical to the project."""
  1473. return list(self.critical_files)
  1474. def getFileImpactScores(self):
  1475. """Get impact scores for all files."""
  1476. return dict(self.file_impact_scores)
  1477. def getTopImpactFiles(self, limit=10):
  1478. """Get files with highest impact scores."""
  1479. sorted_files = sorted(self.file_impact_scores.items(), key=lambda x: x[1], reverse=True)
  1480. return sorted_files[:limit]
  1481. def getBugRelatedCommits(self):
  1482. """Get commits that appear to be bug-related."""
  1483. return self.potential_bug_commits
  1484. def getRefactoringCommits(self):
  1485. """Get commits that appear to be refactoring."""
  1486. return self.refactoring_commits
  1487. def getFeatureCommits(self):
  1488. """Get commits that appear to add features."""
  1489. return self.feature_commits
  1490. def getAuthorActivePeriods(self):
  1491. """Get active periods analysis for all authors."""
  1492. return self.author_active_periods
  1493. def getAuthorsByContribution(self):
  1494. """Get authors sorted by contribution percentage."""
  1495. performance_data = [(author, perf.get('contribution_percentage', 0))
  1496. for author, perf in self.team_performance.items()]
  1497. return sorted(performance_data, key=lambda x: x[1], reverse=True)
  1498. def getAuthorsByEfficiency(self):
  1499. """Get authors sorted by efficiency score."""
  1500. performance_data = [(author, perf.get('efficiency_score', 0))
  1501. for author, perf in self.team_performance.items()]
  1502. return sorted(performance_data, key=lambda x: x[1], reverse=True)
  1503. def getAuthorsByConsistency(self):
  1504. """Get authors sorted by consistency score."""
  1505. performance_data = [(author, perf.get('consistency', 0))
  1506. for author, perf in self.team_performance.items()]
  1507. return sorted(performance_data, key=lambda x: x[1], reverse=True)
  1508. def getAuthorsByLeadership(self):
  1509. """Get authors sorted by leadership score."""
  1510. performance_data = [(author, perf.get('leadership_score', 0))
  1511. for author, perf in self.team_performance.items()]
  1512. return sorted(performance_data, key=lambda x: x[1], reverse=True)
  1513. def getTeamWorkDistribution(self):
  1514. """Analyze work distribution across team members."""
  1515. total_commits = self.getTotalCommits()
  1516. total_lines = self.total_lines_added + self.total_lines_removed
  1517. distribution = {}
  1518. for author in self.authors:
  1519. author_info = self.authors[author]
  1520. author_commits = author_info.get('commits', 0)
  1521. author_lines = author_info.get('lines_added', 0) + author_info.get('lines_removed', 0)
  1522. distribution[author] = {
  1523. 'commit_percentage': (author_commits / total_commits * 100) if total_commits > 0 else 0,
  1524. 'lines_percentage': (author_lines / total_lines * 100) if total_lines > 0 else 0,
  1525. 'commits': author_commits,
  1526. 'lines_changed': author_lines
  1527. }
  1528. return distribution
  1529. def getCommitSizeAnalysis(self):
  1530. """Get analysis of commit sizes across the team."""
  1531. analysis = {
  1532. 'small_commits_authors': [], # Authors with >50% small commits
  1533. 'large_commits_authors': [], # Authors with >20% large commits
  1534. 'balanced_authors': [], # Authors with balanced commit sizes
  1535. 'overall_stats': {
  1536. 'total_small': 0,
  1537. 'total_medium': 0,
  1538. 'total_large': 0
  1539. }
  1540. }
  1541. for author, patterns in self.commit_patterns.items():
  1542. total_commits = patterns.get('total_commits', 0)
  1543. if total_commits == 0:
  1544. continue
  1545. small_ratio = patterns.get('small_commits', 0) / total_commits
  1546. large_ratio = patterns.get('large_commits', 0) / total_commits
  1547. analysis['overall_stats']['total_small'] += patterns.get('small_commits', 0)
  1548. analysis['overall_stats']['total_medium'] += patterns.get('medium_commits', 0)
  1549. analysis['overall_stats']['total_large'] += patterns.get('large_commits', 0)
  1550. if small_ratio > 0.5:
  1551. analysis['small_commits_authors'].append((author, small_ratio))
  1552. elif large_ratio > 0.2:
  1553. analysis['large_commits_authors'].append((author, large_ratio))
  1554. else:
  1555. analysis['balanced_authors'].append((author, small_ratio, large_ratio))
  1556. return analysis
  1557. def revToDate(self, rev):
  1558. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  1559. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  1560. class ReportCreator:
  1561. """Creates the actual report based on given data."""
  1562. def __init__(self):
  1563. pass
  1564. def create(self, data, path):
  1565. self.data = data
  1566. self.path = path
  1567. def html_linkify(text):
  1568. return text.lower().replace(' ', '_')
  1569. def html_header(level, text):
  1570. name = html_linkify(text)
  1571. return '\n<h%d id="%s"><a href="#%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  1572. class HTMLReportCreator(ReportCreator):
  1573. def create(self, data, path):
  1574. ReportCreator.create(self, data, path)
  1575. self.title = data.projectname
  1576. # Prepare safe local values to avoid division-by-zero and empty-collection errors
  1577. total_commits = data.getTotalCommits()
  1578. total_active_days = len(data.getActiveDays()) if hasattr(data, 'getActiveDays') else 0
  1579. delta_days = data.getCommitDeltaDays() if hasattr(data, 'getCommitDeltaDays') else 0
  1580. total_authors = data.getTotalAuthors()
  1581. # busiest counters: use 1 as denominator if no activity recorded to avoid ZeroDivisionError
  1582. hour_of_day_busiest = data.activity_by_hour_of_day_busiest if getattr(data, 'activity_by_hour_of_day_busiest', 0) > 0 else 1
  1583. hour_of_week_busiest = data.activity_by_hour_of_week_busiest if getattr(data, 'activity_by_hour_of_week_busiest', 0) > 0 else 1
  1584. # timezone max for coloring; default to 1 if empty
  1585. max_commits_on_tz = max(data.commits_by_timezone.values()) if data.commits_by_timezone else 1
  1586. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  1587. binarypath = os.path.dirname(os.path.abspath(__file__))
  1588. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  1589. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  1590. for file in (conf['style'], 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  1591. for base in basedirs:
  1592. src = base + '/' + file
  1593. if os.path.exists(src):
  1594. shutil.copyfile(src, path + '/' + file)
  1595. break
  1596. else:
  1597. print('Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs))
  1598. f = open(path + "/index.html", 'w')
  1599. format = '%Y-%m-%d %H:%M:%S'
  1600. self.printHeader(f)
  1601. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  1602. self.printNav(f)
  1603. f.write('<dl>')
  1604. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  1605. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  1606. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
  1607. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  1608. f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), total_active_days, (100.0 * total_active_days / data.getCommitDeltaDays()) if data.getCommitDeltaDays() else 0.0))
  1609. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  1610. # Add file statistics
  1611. try:
  1612. avg_size = data.getAverageFileSize()
  1613. f.write('<dt>Average File Size</dt><dd>%.2f bytes (%.1f KB)</dd>' % (avg_size, avg_size / 1024))
  1614. except:
  1615. pass
  1616. try:
  1617. avg_revisions = data.getAverageRevisionsPerFile()
  1618. f.write('<dt>Average Revisions per File</dt><dd>%.2f</dd>' % avg_revisions)
  1619. except:
  1620. pass
  1621. try:
  1622. repo_size = data.getRepositorySize()
  1623. if repo_size > 0:
  1624. f.write('<dt>Repository Size</dt><dd>%.1f MB</dd>' % repo_size)
  1625. except:
  1626. pass
  1627. f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
  1628. f.write('<dt>Source Lines of Code</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalSourceLines(), (100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
  1629. f.write('<dt>Comment Lines</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalCommentLines(), (100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
  1630. f.write('<dt>Blank Lines</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalBlankLines(), (100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
  1631. avg_active = float(total_commits) / total_active_days if total_active_days else 0.0
  1632. avg_all = float(total_commits) / delta_days if delta_days else 0.0
  1633. f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (total_commits, avg_active, avg_all))
  1634. avg_per_author = float(total_commits) / total_authors if total_authors else 0.0
  1635. f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (total_authors, avg_per_author))
  1636. # Branch statistics
  1637. branches = data.getBranches() if hasattr(data, 'getBranches') else {}
  1638. unmerged_branches = data.getUnmergedBranches() if hasattr(data, 'getUnmergedBranches') else []
  1639. main_branch = data.getMainBranch() if hasattr(data, 'getMainBranch') else 'master'
  1640. if branches:
  1641. f.write('<dt>Total Branches</dt><dd>%d</dd>' % len(branches))
  1642. if unmerged_branches:
  1643. f.write('<dt>Unmerged Branches</dt><dd>%d (%s)</dd>' % (len(unmerged_branches), ', '.join(unmerged_branches[:5]) + ('...' if len(unmerged_branches) > 5 else '')))
  1644. f.write('<dt>Main Branch</dt><dd>%s</dd>' % main_branch)
  1645. f.write('</dl>')
  1646. f.write('</body>\n</html>')
  1647. f.close()
  1648. ###
  1649. # Team Analysis - New comprehensive team analysis page
  1650. f = open(path + '/team_analysis.html', 'w')
  1651. self.printHeader(f)
  1652. f.write('<h1>Team Analysis</h1>')
  1653. self.printNav(f)
  1654. # Team Overview
  1655. f.write(html_header(2, 'Team Overview'))
  1656. total_authors = data.getTotalAuthors()
  1657. work_distribution = data.getTeamWorkDistribution()
  1658. f.write('<dl>')
  1659. f.write('<dt>Total Team Members</dt><dd>%d</dd>' % total_authors)
  1660. # Calculate work distribution metrics
  1661. commit_contributions = [dist['commit_percentage'] for dist in work_distribution.values()]
  1662. lines_contributions = [dist['lines_percentage'] for dist in work_distribution.values()]
  1663. if commit_contributions:
  1664. max_commit_contrib = max(commit_contributions)
  1665. min_commit_contrib = min(commit_contributions)
  1666. avg_commit_contrib = sum(commit_contributions) / len(commit_contributions)
  1667. f.write('<dt>Work Distribution (Commits)</dt><dd>Max: %.1f%%, Min: %.1f%%, Avg: %.1f%%</dd>' %
  1668. (max_commit_contrib, min_commit_contrib, avg_commit_contrib))
  1669. if lines_contributions:
  1670. max_lines_contrib = max(lines_contributions)
  1671. min_lines_contrib = min(lines_contributions)
  1672. avg_lines_contrib = sum(lines_contributions) / len(lines_contributions)
  1673. f.write('<dt>Work Distribution (Lines)</dt><dd>Max: %.1f%%, Min: %.1f%%, Avg: %.1f%%</dd>' %
  1674. (max_lines_contrib, min_lines_contrib, avg_lines_contrib))
  1675. f.write('</dl>')
  1676. # Team Performance Rankings
  1677. f.write(html_header(2, 'Team Performance Rankings'))
  1678. # Top contributors by different metrics
  1679. contrib_ranking = data.getAuthorsByContribution()
  1680. efficiency_ranking = data.getAuthorsByEfficiency()
  1681. consistency_ranking = data.getAuthorsByConsistency()
  1682. leadership_ranking = data.getAuthorsByLeadership()
  1683. f.write('<div class="rankings">')
  1684. f.write('<div class="ranking-section">')
  1685. f.write('<h3>Top Contributors (by Commit %)</h3>')
  1686. f.write('<ol>')
  1687. for author, percentage in contrib_ranking[:10]:
  1688. f.write('<li>%s (%.1f%%)</li>' % (author, percentage))
  1689. f.write('</ol>')
  1690. f.write('</div>')
  1691. f.write('<div class="ranking-section">')
  1692. f.write('<h3>Most Efficient (by Quality Score)</h3>')
  1693. f.write('<ol>')
  1694. for author, score in efficiency_ranking[:10]:
  1695. f.write('<li>%s (%.1f)</li>' % (author, score))
  1696. f.write('</ol>')
  1697. f.write('</div>')
  1698. f.write('<div class="ranking-section">')
  1699. f.write('<h3>Most Consistent</h3>')
  1700. f.write('<ol>')
  1701. for author, score in consistency_ranking[:10]:
  1702. f.write('<li>%s (%.1f)</li>' % (author, score))
  1703. f.write('</ol>')
  1704. f.write('</div>')
  1705. f.write('<div class="ranking-section">')
  1706. f.write('<h3>Leadership Score</h3>')
  1707. f.write('<ol>')
  1708. for author, score in leadership_ranking[:10]:
  1709. f.write('<li>%s (%.1f)</li>' % (author, score))
  1710. f.write('</ol>')
  1711. f.write('</div>')
  1712. f.write('</div>')
  1713. # Detailed Team Performance Table
  1714. f.write(html_header(2, 'Detailed Team Performance Analysis'))
  1715. f.write('<table class="team-performance sortable" id="team-performance">')
  1716. f.write('<tr>')
  1717. f.write('<th>Author</th>')
  1718. f.write('<th>Commits</th>')
  1719. f.write('<th>Contrib %</th>')
  1720. f.write('<th>Lines Changed</th>')
  1721. f.write('<th>Avg Commit Size</th>')
  1722. f.write('<th>Efficiency</th>')
  1723. f.write('<th>Consistency</th>')
  1724. f.write('<th>Leadership</th>')
  1725. f.write('<th>Overall Score</th>')
  1726. f.write('<th>Assessment</th>')
  1727. f.write('</tr>')
  1728. team_performance = data.getTeamPerformance()
  1729. commit_patterns = data.getCommitPatterns()
  1730. # Sort by overall score
  1731. sorted_authors = sorted(team_performance.items(), key=lambda x: x[1].get('overall_score', 0), reverse=True)
  1732. for author, perf in sorted_authors:
  1733. author_info = data.getAuthorInfo(author)
  1734. patterns = commit_patterns.get(author, {})
  1735. commits = author_info.get('commits', 0)
  1736. lines_changed = author_info.get('lines_added', 0) + author_info.get('lines_removed', 0)
  1737. contrib_pct = perf.get('contribution_percentage', 0)
  1738. avg_commit_size = patterns.get('avg_commit_size', 0)
  1739. efficiency = perf.get('efficiency_score', 0)
  1740. consistency = perf.get('consistency', 0)
  1741. leadership = perf.get('leadership_score', 0)
  1742. overall = perf.get('overall_score', 0)
  1743. # Generate assessment
  1744. assessment = self._generateAssessment(perf, patterns)
  1745. f.write('<tr>')
  1746. f.write('<td>%s</td>' % author)
  1747. f.write('<td>%d</td>' % commits)
  1748. f.write('<td>%.1f%%</td>' % contrib_pct)
  1749. f.write('<td>%d</td>' % lines_changed)
  1750. f.write('<td>%.1f</td>' % avg_commit_size)
  1751. f.write('<td>%.1f</td>' % efficiency)
  1752. f.write('<td>%.1f</td>' % consistency)
  1753. f.write('<td>%.1f</td>' % leadership)
  1754. f.write('<td>%.1f</td>' % overall)
  1755. f.write('<td>%s</td>' % assessment)
  1756. f.write('</tr>')
  1757. f.write('</table>')
  1758. # Commit Patterns Analysis
  1759. f.write(html_header(2, 'Commit Patterns Analysis'))
  1760. commit_size_analysis = data.getCommitSizeAnalysis()
  1761. f.write('<h3>Commit Size Distribution</h3>')
  1762. f.write('<p><strong>Small commits (&lt;10 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_small'])
  1763. f.write('<p><strong>Medium commits (10-100 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_medium'])
  1764. f.write('<p><strong>Large commits (&gt;100 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_large'])
  1765. if commit_size_analysis['small_commits_authors']:
  1766. f.write('<h4>Authors with predominantly small commits (possible commit splitting):</h4>')
  1767. f.write('<ul>')
  1768. for author, ratio in commit_size_analysis['small_commits_authors']:
  1769. f.write('<li>%s (%.1f%% small commits)</li>' % (author, ratio * 100))
  1770. f.write('</ul>')
  1771. if commit_size_analysis['large_commits_authors']:
  1772. f.write('<h4>Authors with frequent large commits:</h4>')
  1773. f.write('<ul>')
  1774. for author, ratio in commit_size_analysis['large_commits_authors']:
  1775. f.write('<li>%s (%.1f%% large commits)</li>' % (author, ratio * 100))
  1776. f.write('</ul>')
  1777. # Working Patterns Analysis
  1778. f.write(html_header(2, 'Working Time Patterns'))
  1779. working_patterns = data.getWorkingPatterns()
  1780. f.write('<table class="working-patterns sortable" id="working-patterns">')
  1781. f.write('<tr>')
  1782. f.write('<th>Author</th>')
  1783. f.write('<th>Night Worker<br>(22:00-06:00)</th>')
  1784. f.write('<th>Weekend Worker</th>')
  1785. f.write('<th>Early Bird<br>(05:00-09:00)</th>')
  1786. f.write('<th>Regular Hours<br>(09:00-17:00)</th>')
  1787. f.write('<th>Evening<br>(17:00-22:00)</th>')
  1788. f.write('<th>Peak Hour</th>')
  1789. f.write('<th>Peak Day</th>')
  1790. f.write('</tr>')
  1791. for author, patterns in working_patterns.items():
  1792. total_commits = patterns.get('total_commits', 1)
  1793. night_pct = (patterns.get('night_commits', 0) / total_commits) * 100
  1794. weekend_pct = (patterns.get('weekend_commits', 0) / total_commits) * 100
  1795. early_pct = (patterns.get('early_bird', 0) / total_commits) * 100
  1796. workday_pct = (patterns.get('workday', 0) / total_commits) * 100
  1797. evening_pct = (patterns.get('evening', 0) / total_commits) * 100
  1798. # Find peak hour and day
  1799. peak_hours = patterns.get('peak_hours', {})
  1800. peak_days = patterns.get('peak_days', {})
  1801. peak_hour = max(peak_hours.keys(), key=lambda k: peak_hours[k]) if peak_hours else 'N/A'
  1802. peak_day = max(peak_days.keys(), key=lambda k: peak_days[k]) if peak_days else 'N/A'
  1803. peak_day_name = WEEKDAYS[peak_day] if isinstance(peak_day, int) and 0 <= peak_day < 7 else peak_day
  1804. f.write('<tr>')
  1805. f.write('<td>%s</td>' % author)
  1806. f.write('<td>%.1f%%</td>' % night_pct)
  1807. f.write('<td>%.1f%%</td>' % weekend_pct)
  1808. f.write('<td>%.1f%%</td>' % early_pct)
  1809. f.write('<td>%.1f%%</td>' % workday_pct)
  1810. f.write('<td>%.1f%%</td>' % evening_pct)
  1811. f.write('<td>%s:00</td>' % peak_hour)
  1812. f.write('<td>%s</td>' % peak_day_name)
  1813. f.write('</tr>')
  1814. f.write('</table>')
  1815. # Impact Analysis
  1816. f.write(html_header(2, 'Impact and Quality Analysis'))
  1817. impact_analysis = data.getImpactAnalysis()
  1818. critical_files = data.getCriticalFiles()
  1819. f.write('<h3>Critical Files in Project (%d files identified)</h3>' % len(critical_files))
  1820. if critical_files:
  1821. f.write('<ul>')
  1822. for critical_file in critical_files[:20]: # Show first 20
  1823. f.write('<li>%s</li>' % critical_file)
  1824. f.write('</ul>')
  1825. if len(critical_files) > 20:
  1826. f.write('<p>... and %d more files</p>' % (len(critical_files) - 20))
  1827. f.write('<h3>Author Impact Analysis</h3>')
  1828. f.write('<table class="impact-analysis sortable" id="impact-analysis">')
  1829. f.write('<tr>')
  1830. f.write('<th>Author</th>')
  1831. f.write('<th>Impact Score</th>')
  1832. f.write('<th>Critical Files Touched</th>')
  1833. f.write('<th>Bug Potential</th>')
  1834. f.write('<th>High Impact Files</th>')
  1835. f.write('<th>Assessment</th>')
  1836. f.write('</tr>')
  1837. # Sort by impact score
  1838. sorted_impact = sorted(impact_analysis.items(), key=lambda x: x[1].get('impact_score', 0), reverse=True)
  1839. for author, impact in sorted_impact:
  1840. impact_score = impact.get('impact_score', 0)
  1841. critical_files_touched = len(impact.get('critical_files', []))
  1842. bug_potential = impact.get('bug_potential', 0)
  1843. high_impact_files = len(impact.get('high_impact_files', []))
  1844. # Generate impact assessment
  1845. if impact_score > 200:
  1846. impact_assessment = "Very High Impact"
  1847. elif impact_score > 100:
  1848. impact_assessment = "High Impact"
  1849. elif impact_score > 50:
  1850. impact_assessment = "Medium Impact"
  1851. else:
  1852. impact_assessment = "Low Impact"
  1853. if bug_potential > 30:
  1854. impact_assessment += " (High Bug Risk)"
  1855. elif bug_potential > 15:
  1856. impact_assessment += " (Medium Bug Risk)"
  1857. f.write('<tr>')
  1858. f.write('<td>%s</td>' % author)
  1859. f.write('<td>%.1f</td>' % impact_score)
  1860. f.write('<td>%d</td>' % critical_files_touched)
  1861. f.write('<td>%.1f%%</td>' % bug_potential)
  1862. f.write('<td>%d</td>' % high_impact_files)
  1863. f.write('<td>%s</td>' % impact_assessment)
  1864. f.write('</tr>')
  1865. f.write('</table>')
  1866. # Team Collaboration Analysis
  1867. f.write(html_header(2, 'Team Collaboration Analysis'))
  1868. collaboration_data = data.getTeamCollaboration()
  1869. f.write('<table class="collaboration sortable" id="collaboration">')
  1870. f.write('<tr>')
  1871. f.write('<th>Author</th>')
  1872. f.write('<th>Files Owned</th>')
  1873. f.write('<th>Collaborators</th>')
  1874. f.write('<th>Shared Files</th>')
  1875. f.write('<th>Top Collaborations</th>')
  1876. f.write('</tr>')
  1877. for author, collab in collaboration_data.items():
  1878. files_owned = len(collab.get('file_ownership', {}))
  1879. worked_with = collab.get('worked_with', {})
  1880. collaborators_count = len(worked_with)
  1881. # Count total shared files
  1882. shared_files = 0
  1883. top_collabs = []
  1884. for other_author, shared_files_dict in worked_with.items():
  1885. shared_count = len(shared_files_dict)
  1886. shared_files += shared_count
  1887. top_collabs.append((other_author, shared_count))
  1888. # Sort and take top 3 collaborations
  1889. top_collabs.sort(key=lambda x: x[1], reverse=True)
  1890. top_collabs_str = ', '.join([f"{author}({count})" for author, count in top_collabs[:3]])
  1891. f.write('<tr>')
  1892. f.write('<td>%s</td>' % author)
  1893. f.write('<td>%d</td>' % files_owned)
  1894. f.write('<td>%d</td>' % collaborators_count)
  1895. f.write('<td>%d</td>' % shared_files)
  1896. f.write('<td>%s</td>' % top_collabs_str)
  1897. f.write('</tr>')
  1898. f.write('</table>')
  1899. f.write('</body></html>')
  1900. f.close()
  1901. ###
  1902. # Activity
  1903. f = open(path + '/activity.html', 'w')
  1904. self.printHeader(f)
  1905. f.write('<h1>Activity</h1>')
  1906. self.printNav(f)
  1907. # Last 30 days
  1908. f.write(html_header(2, 'Last 30 Days'))
  1909. last_30_days = data.getLast30DaysActivity()
  1910. f.write('<dl>')
  1911. f.write('<dt>Commits</dt><dd>%d</dd>' % last_30_days['commits'])
  1912. f.write('<dt>Lines added</dt><dd>%d</dd>' % last_30_days['lines_added'])
  1913. f.write('<dt>Lines removed</dt><dd>%d</dd>' % last_30_days['lines_removed'])
  1914. f.write('<dt>Net lines</dt><dd>%d</dd>' % (last_30_days['lines_added'] - last_30_days['lines_removed']))
  1915. f.write('</dl>')
  1916. # Last 12 months
  1917. f.write(html_header(2, 'Last 12 Months'))
  1918. last_12_months = data.getLast12MonthsActivity()
  1919. if last_12_months['commits']:
  1920. f.write('<table class="sortable" id="last12months">')
  1921. f.write('<tr><th>Month</th><th>Commits</th><th>Lines Added</th><th>Lines Removed</th><th>Net Lines</th></tr>')
  1922. # Sort months in reverse chronological order
  1923. sorted_months = sorted(last_12_months['commits'].keys(), reverse=True)
  1924. for month in sorted_months:
  1925. commits = last_12_months['commits'][month]
  1926. lines_added = last_12_months['lines_added'].get(month, 0)
  1927. lines_removed = last_12_months['lines_removed'].get(month, 0)
  1928. net_lines = lines_added - lines_removed
  1929. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' %
  1930. (month, commits, lines_added, lines_removed, net_lines))
  1931. f.write('</table>')
  1932. else:
  1933. f.write('<p>No activity in the last 12 months.</p>')
  1934. # Pace of Changes
  1935. f.write(html_header(2, 'Pace of Changes'))
  1936. f.write('<p>Number of line changes (additions + deletions) over time</p>')
  1937. pace_data = data.getPaceOfChanges()
  1938. if pace_data:
  1939. f.write('<img src="pace_of_changes.png" alt="Pace of Changes">')
  1940. # Generate pace of changes data file
  1941. fg = open(path + '/pace_of_changes.dat', 'w')
  1942. for stamp in sorted(pace_data.keys()):
  1943. fg.write('%d %d\n' % (stamp, pace_data[stamp]))
  1944. fg.close()
  1945. else:
  1946. f.write('<p>No pace data available.</p>')
  1947. # Weekly activity
  1948. WEEKS = 32
  1949. f.write(html_header(2, 'Weekly activity'))
  1950. f.write('<p>Last %d weeks</p>' % WEEKS)
  1951. # generate weeks to show (previous N weeks from now)
  1952. now = datetime.datetime.now()
  1953. deltaweek = datetime.timedelta(7)
  1954. weeks = []
  1955. stampcur = now
  1956. for i in range(0, WEEKS):
  1957. weeks.insert(0, stampcur.strftime('%Y-%W'))
  1958. stampcur -= deltaweek
  1959. # top row: commits & bar
  1960. f.write('<table class="noborders"><tr>')
  1961. for i in range(0, WEEKS):
  1962. commits = 0
  1963. if weeks[i] in data.activity_by_year_week:
  1964. commits = data.activity_by_year_week[weeks[i]]
  1965. percentage = 0
  1966. if weeks[i] in data.activity_by_year_week:
  1967. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  1968. height = max(1, int(200 * percentage))
  1969. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  1970. # bottom row: year/week
  1971. f.write('</tr><tr>')
  1972. for i in range(0, WEEKS):
  1973. f.write('<td>%s</td>' % (WEEKS - i))
  1974. f.write('</tr></table>')
  1975. # Hour of Day
  1976. f.write(html_header(2, 'Hour of Day'))
  1977. hour_of_day = data.getActivityByHourOfDay()
  1978. f.write('<table><tr><th>Hour</th>')
  1979. for i in range(0, 24):
  1980. f.write('<th>%d</th>' % i)
  1981. f.write('</tr>\n<tr><th>Commits</th>')
  1982. fp = open(path + '/hour_of_day.dat', 'w')
  1983. for i in range(0, 24):
  1984. if i in hour_of_day:
  1985. r = 127 + int((float(hour_of_day[i]) / hour_of_day_busiest) * 128)
  1986. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  1987. fp.write('%d %d\n' % (i, hour_of_day[i]))
  1988. else:
  1989. f.write('<td>0</td>')
  1990. fp.write('%d 0\n' % i)
  1991. fp.close()
  1992. f.write('</tr>\n<tr><th>%</th>')
  1993. totalcommits = total_commits
  1994. for i in range(0, 24):
  1995. if i in hour_of_day:
  1996. r = 127 + int((float(hour_of_day[i]) / hour_of_day_busiest) * 128)
  1997. percent = (100.0 * hour_of_day[i]) / totalcommits if totalcommits else 0.0
  1998. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, percent))
  1999. else:
  2000. f.write('<td>0.00</td>')
  2001. f.write('</tr></table>')
  2002. f.write('<img src="hour_of_day.png" alt="Hour of Day">')
  2003. fg = open(path + '/hour_of_day.dat', 'w')
  2004. for i in range(0, 24):
  2005. if i in hour_of_day:
  2006. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  2007. else:
  2008. fg.write('%d 0\n' % (i + 1))
  2009. fg.close()
  2010. # Day of Week
  2011. f.write(html_header(2, 'Day of Week'))
  2012. day_of_week = data.getActivityByDayOfWeek()
  2013. f.write('<div class="vtable"><table>')
  2014. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  2015. fp = open(path + '/day_of_week.dat', 'w')
  2016. for d in range(0, 7):
  2017. commits = 0
  2018. if d in day_of_week:
  2019. commits = day_of_week[d]
  2020. fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
  2021. f.write('<tr>')
  2022. f.write('<th>%s</th>' % (WEEKDAYS[d]))
  2023. if d in day_of_week:
  2024. percent = (100.0 * day_of_week[d]) / totalcommits if totalcommits else 0.0
  2025. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], percent))
  2026. else:
  2027. f.write('<td>0</td>')
  2028. f.write('</tr>')
  2029. f.write('</table></div>')
  2030. f.write('<img src="day_of_week.png" alt="Day of Week">')
  2031. fp.close()
  2032. # Hour of Week
  2033. f.write(html_header(2, 'Hour of Week'))
  2034. f.write('<table>')
  2035. f.write('<tr><th>Weekday</th>')
  2036. for hour in range(0, 24):
  2037. f.write('<th>%d</th>' % (hour))
  2038. f.write('</tr>')
  2039. for weekday in range(0, 7):
  2040. f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
  2041. for hour in range(0, 24):
  2042. try:
  2043. commits = data.activity_by_hour_of_week[weekday][hour]
  2044. except KeyError:
  2045. commits = 0
  2046. if commits != 0:
  2047. f.write('<td')
  2048. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  2049. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  2050. f.write('>%d</td>' % commits)
  2051. else:
  2052. f.write('<td></td>')
  2053. f.write('</tr>')
  2054. f.write('</table>')
  2055. # Month of Year
  2056. f.write(html_header(2, 'Month of Year'))
  2057. f.write('<div class="vtable"><table>')
  2058. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  2059. fp = open (path + '/month_of_year.dat', 'w')
  2060. for mm in range(1, 13):
  2061. commits = 0
  2062. if mm in data.activity_by_month_of_year:
  2063. commits = data.activity_by_month_of_year[mm]
  2064. percent = (100.0 * commits) / total_commits if total_commits else 0.0
  2065. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, percent))
  2066. fp.write('%d %d\n' % (mm, commits))
  2067. fp.close()
  2068. f.write('</table></div>')
  2069. f.write('<img src="month_of_year.png" alt="Month of Year">')
  2070. # Commits by year/month
  2071. f.write(html_header(2, 'Commits by year/month'))
  2072. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
  2073. for yymm in reversed(sorted(data.commits_by_month.keys())):
  2074. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
  2075. f.write('</table></div>')
  2076. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month">')
  2077. fg = open(path + '/commits_by_year_month.dat', 'w')
  2078. for yymm in sorted(data.commits_by_month.keys()):
  2079. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  2080. fg.close()
  2081. # Commits by year
  2082. f.write(html_header(2, 'Commits by Year'))
  2083. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
  2084. for yy in reversed(sorted(data.commits_by_year.keys())):
  2085. commits = data.commits_by_year.get(yy, 0)
  2086. percent = (100.0 * commits) / total_commits if total_commits else 0.0
  2087. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, commits, percent, data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
  2088. f.write('</table></div>')
  2089. f.write('<img src="commits_by_year.png" alt="Commits by Year">')
  2090. fg = open(path + '/commits_by_year.dat', 'w')
  2091. for yy in sorted(data.commits_by_year.keys()):
  2092. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  2093. fg.close()
  2094. # Commits by timezone
  2095. f.write(html_header(2, 'Commits by Timezone'))
  2096. f.write('<table><tr>')
  2097. f.write('<th>Timezone</th><th>Commits</th>')
  2098. f.write('</tr>')
  2099. max_commits_on_tz = max(data.commits_by_timezone.values()) if data.commits_by_timezone else 1
  2100. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  2101. commits = data.commits_by_timezone[i]
  2102. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  2103. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  2104. f.write('</table>')
  2105. f.write('</body></html>')
  2106. f.close()
  2107. ###
  2108. # Authors
  2109. f = open(path + '/authors.html', 'w')
  2110. self.printHeader(f)
  2111. f.write('<h1>Authors</h1>')
  2112. self.printNav(f)
  2113. # Authors :: List of authors
  2114. f.write(html_header(2, 'List of Authors'))
  2115. f.write('<table class="authors sortable" id="authors">')
  2116. f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  2117. for author in data.getAuthors(conf['max_authors']):
  2118. info = data.getAuthorInfo(author)
  2119. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
  2120. f.write('</table>')
  2121. allauthors = data.getAuthors()
  2122. if len(allauthors) > conf['max_authors']:
  2123. rest = allauthors[conf['max_authors']:]
  2124. f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
  2125. f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
  2126. f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author">')
  2127. if len(allauthors) > conf['max_authors']:
  2128. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  2129. f.write(html_header(2, 'Commits per Author'))
  2130. f.write('<img src="commits_by_author.png" alt="Commits per Author">')
  2131. if len(allauthors) > conf['max_authors']:
  2132. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  2133. fgl = open(path + '/lines_of_code_by_author.dat', 'w')
  2134. fgc = open(path + '/commits_by_author.dat', 'w')
  2135. lines_by_authors = {} # cumulated added lines by
  2136. # author. to save memory,
  2137. # changes_by_date_by_author[stamp][author] is defined
  2138. # only at points where author commits.
  2139. # lines_by_authors allows us to generate all the
  2140. # points in the .dat file.
  2141. # Don't rely on getAuthors to give the same order each
  2142. # time. Be robust and keep the list in a variable.
  2143. commits_by_authors = {} # cumulated added lines by
  2144. self.authors_to_plot = data.getAuthors(conf['max_authors'])
  2145. for author in self.authors_to_plot:
  2146. lines_by_authors[author] = 0
  2147. commits_by_authors[author] = 0
  2148. for stamp in sorted(data.changes_by_date_by_author.keys()):
  2149. fgl.write('%d' % stamp)
  2150. fgc.write('%d' % stamp)
  2151. for author in self.authors_to_plot:
  2152. if author in data.changes_by_date_by_author[stamp]:
  2153. lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
  2154. commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
  2155. fgl.write(' %d' % lines_by_authors[author])
  2156. fgc.write(' %d' % commits_by_authors[author])
  2157. fgl.write('\n')
  2158. fgc.write('\n')
  2159. fgl.close()
  2160. fgc.close()
  2161. # Authors :: Author of Month
  2162. f.write(html_header(2, 'Author of Month'))
  2163. f.write('<table class="sortable" id="aom">')
  2164. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  2165. for yymm in reversed(sorted(data.author_of_month.keys())):
  2166. authordict = data.author_of_month[yymm]
  2167. authors = getkeyssortedbyvalues(authordict)
  2168. authors.reverse()
  2169. commits = data.author_of_month[yymm][authors[0]]
  2170. next = ', '.join(authors[1:conf['authors_top']+1])
  2171. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))
  2172. f.write('</table>')
  2173. f.write(html_header(2, 'Author of Year'))
  2174. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  2175. for yy in reversed(sorted(data.author_of_year.keys())):
  2176. authordict = data.author_of_year[yy]
  2177. authors = getkeyssortedbyvalues(authordict)
  2178. authors.reverse()
  2179. commits = data.author_of_year[yy][authors[0]]
  2180. next = ', '.join(authors[1:conf['authors_top']+1])
  2181. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
  2182. f.write('</table>')
  2183. # Domains
  2184. f.write(html_header(2, 'Commits by Domains'))
  2185. domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
  2186. domains_by_commits.reverse() # most first
  2187. f.write('<div class="vtable"><table>')
  2188. f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
  2189. fp = open(path + '/domains.dat', 'w')
  2190. n = 0
  2191. for domain in domains_by_commits:
  2192. if n == conf['max_domains']:
  2193. break
  2194. commits = 0
  2195. n += 1
  2196. info = data.getDomainInfo(domain)
  2197. fp.write('%s %d %d\n' % (domain, n , info['commits']))
  2198. percent = (100.0 * info['commits'] / total_commits) if total_commits else 0.0
  2199. f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], percent))
  2200. f.write('</table></div>')
  2201. f.write('<img src="domains.png" alt="Commits by Domains">')
  2202. fp.close()
  2203. f.write('</body></html>')
  2204. f.close()
  2205. ###
  2206. # Branches
  2207. f = open(path + '/branches.html', 'w')
  2208. self.printHeader(f)
  2209. f.write('<h1>Branches</h1>')
  2210. self.printNav(f)
  2211. # Branch summary
  2212. branches = data.getBranches() if hasattr(data, 'getBranches') else {}
  2213. unmerged_branches = data.getUnmergedBranches() if hasattr(data, 'getUnmergedBranches') else []
  2214. main_branch = data.getMainBranch() if hasattr(data, 'getMainBranch') else 'master'
  2215. f.write('<dl>')
  2216. f.write('<dt>Total Branches</dt><dd>%d</dd>' % len(branches))
  2217. if unmerged_branches:
  2218. f.write('<dt>Unmerged Branches</dt><dd>%d</dd>' % len(unmerged_branches))
  2219. f.write('<dt>Main Branch</dt><dd>%s</dd>' % main_branch)
  2220. f.write('</dl>')
  2221. if branches:
  2222. # Branches :: All Branches
  2223. f.write(html_header(2, 'All Branches'))
  2224. f.write('<table class="branches sortable" id="branches">')
  2225. f.write('<tr><th>Branch</th><th>Status</th><th>Commits</th><th>Lines Added</th><th>Lines Removed</th><th>Total Changes</th><th>Authors</th></tr>')
  2226. # Sort branches by total changes (lines added + removed)
  2227. sorted_branches = sorted(branches.items(),
  2228. key=lambda x: x[1].get('lines_added', 0) + x[1].get('lines_removed', 0),
  2229. reverse=True)
  2230. for branch_name, branch_info in sorted_branches:
  2231. status = 'Merged' if branch_info.get('is_merged', True) else 'Unmerged'
  2232. commits = branch_info.get('commits', 0)
  2233. lines_added = branch_info.get('lines_added', 0)
  2234. lines_removed = branch_info.get('lines_removed', 0)
  2235. total_changes = lines_added + lines_removed
  2236. authors_count = len(branch_info.get('authors', {}))
  2237. # Highlight unmerged branches
  2238. row_class = 'class="unmerged"' if not branch_info.get('is_merged', True) else ''
  2239. f.write('<tr %s><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' %
  2240. (row_class, branch_name, status, commits, lines_added, lines_removed, total_changes, authors_count))
  2241. f.write('</table>')
  2242. # Unmerged Branches Detail
  2243. if unmerged_branches:
  2244. f.write(html_header(2, 'Unmerged Branches Detail'))
  2245. f.write('<p>These branches have not been merged into the main branch (%s) and may represent ongoing work or abandoned features.</p>' % main_branch)
  2246. f.write('<table class="unmerged-branches sortable" id="unmerged">')
  2247. f.write('<tr><th>Branch</th><th>Commits</th><th>Authors</th><th>Top Contributors</th><th>Lines Added</th><th>Lines Removed</th></tr>')
  2248. unmerged_stats = data.getUnmergedBranchStats() if hasattr(data, 'getUnmergedBranchStats') else {}
  2249. for branch_name in unmerged_branches:
  2250. if branch_name in unmerged_stats:
  2251. branch_info = unmerged_stats[branch_name]
  2252. commits = branch_info.get('commits', 0)
  2253. authors = branch_info.get('authors', {})
  2254. lines_added = branch_info.get('lines_added', 0)
  2255. lines_removed = branch_info.get('lines_removed', 0)
  2256. # Get top contributors
  2257. top_contributors = sorted(authors.items(), key=lambda x: x[1].get('commits', 0), reverse=True)[:3]
  2258. contributors_str = ', '.join([f"{author} ({info.get('commits', 0)})" for author, info in top_contributors])
  2259. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%s</td><td>%d</td><td>%d</td></tr>' %
  2260. (branch_name, commits, len(authors), contributors_str, lines_added, lines_removed))
  2261. f.write('</table>')
  2262. # Branch Activity by Author
  2263. f.write(html_header(2, 'Branch Activity by Author'))
  2264. f.write('<p>This table shows which authors have contributed to which branches.</p>')
  2265. # Collect all unique authors across all branches
  2266. all_authors = set()
  2267. for branch_info in branches.values():
  2268. all_authors.update(branch_info.get('authors', {}).keys())
  2269. if all_authors and len(branches) > 1:
  2270. f.write('<table class="branch-authors sortable" id="branch-authors">')
  2271. header = '<tr><th>Author</th>'
  2272. for branch_name in sorted(branches.keys()):
  2273. header += '<th>%s</th>' % branch_name
  2274. header += '<th>Total Branches</th></tr>'
  2275. f.write(header)
  2276. for author in sorted(all_authors):
  2277. row = '<tr><td>%s</td>' % author
  2278. branch_count = 0
  2279. for branch_name in sorted(branches.keys()):
  2280. branch_authors = branches[branch_name].get('authors', {})
  2281. if author in branch_authors:
  2282. commits = branch_authors[author].get('commits', 0)
  2283. row += '<td>%d</td>' % commits
  2284. branch_count += 1
  2285. else:
  2286. row += '<td>-</td>'
  2287. row += '<td>%d</td></tr>' % branch_count
  2288. f.write(row)
  2289. f.write('</table>')
  2290. f.write('</body></html>')
  2291. f.close()
  2292. ###
  2293. # Files
  2294. f = open(path + '/files.html', 'w')
  2295. self.printHeader(f)
  2296. f.write('<h1>Files</h1>')
  2297. self.printNav(f)
  2298. f.write('<dl>\n')
  2299. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  2300. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  2301. try:
  2302. avg_size = data.getAverageFileSize()
  2303. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % avg_size)
  2304. except (AttributeError, ZeroDivisionError):
  2305. # Fallback to old calculation if new method fails
  2306. avg_size = float(data.getTotalSize()) / data.getTotalFiles() if data.getTotalFiles() else 0.0
  2307. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % avg_size)
  2308. try:
  2309. avg_revisions = data.getAverageRevisionsPerFile()
  2310. f.write('<dt>Average revisions per file</dt><dd>%.2f</dd>' % avg_revisions)
  2311. except AttributeError:
  2312. pass
  2313. f.write('</dl>\n')
  2314. # Files :: File count by date
  2315. f.write(html_header(2, 'File count by date'))
  2316. # use set to get rid of duplicate/unnecessary entries
  2317. files_by_date = set()
  2318. for stamp in sorted(data.files_by_stamp.keys()):
  2319. files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  2320. fg = open(path + '/files_by_date.dat', 'w')
  2321. for line in sorted(list(files_by_date)):
  2322. fg.write('%s\n' % line)
  2323. #for stamp in sorted(data.files_by_stamp.keys()):
  2324. # fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  2325. fg.close()
  2326. f.write('<img src="files_by_date.png" alt="Files by Date">')
  2327. #f.write('<h2>Average file size by date</h2>')
  2328. # Files :: Extensions
  2329. f.write(html_header(2, 'Extensions'))
  2330. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  2331. for ext in sorted(data.extensions.keys()):
  2332. files = data.extensions[ext]['files']
  2333. lines = data.extensions[ext]['lines']
  2334. loc_percentage = (100.0 * lines) / data.getTotalLOC() if data.getTotalLOC() else 0.0
  2335. files_percentage = (100.0 * files) / data.getTotalFiles() if data.getTotalFiles() else 0.0
  2336. lines_per_file = (lines // files) if files else 0
  2337. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, files_percentage, lines, loc_percentage, lines_per_file))
  2338. f.write('</table>')
  2339. # SLOC Breakdown by Extension
  2340. f.write(html_header(2, 'Source Lines of Code (SLOC) Breakdown'))
  2341. f.write('<table class="sortable" id="sloc"><tr><th>Extension</th><th>Source Lines (%)</th><th>Comment Lines (%)</th><th>Blank Lines (%)</th><th>Total Lines</th></tr>')
  2342. sloc_data = data.getSLOCByExtension()
  2343. for ext in sorted(sloc_data.keys()):
  2344. if sloc_data[ext]['total'] == 0:
  2345. continue
  2346. source = sloc_data[ext]['source']
  2347. comments = sloc_data[ext]['comments']
  2348. blank = sloc_data[ext]['blank']
  2349. total = sloc_data[ext]['total']
  2350. source_pct = (100.0 * source / total) if total else 0.0
  2351. comment_pct = (100.0 * comments / total) if total else 0.0
  2352. blank_pct = (100.0 * blank / total) if total else 0.0
  2353. f.write('<tr><td>%s</td><td>%d (%.1f%%)</td><td>%d (%.1f%%)</td><td>%d (%.1f%%)</td><td>%d</td></tr>' %
  2354. (ext, source, source_pct, comments, comment_pct, blank, blank_pct, total))
  2355. f.write('</table>')
  2356. # Largest Files
  2357. try:
  2358. largest_files = data.getLargestFiles(15)
  2359. if largest_files:
  2360. f.write(html_header(2, 'Largest Files'))
  2361. f.write('<table class="sortable" id="largest_files"><tr><th>File</th><th>Size (bytes)</th><th>Size (KB)</th></tr>')
  2362. for filepath, size in largest_files:
  2363. size_kb = size / 1024.0
  2364. f.write('<tr><td>%s</td><td>%d</td><td>%.1f</td></tr>' % (filepath, size, size_kb))
  2365. f.write('</table>')
  2366. except (AttributeError, TypeError):
  2367. pass
  2368. # Files with Most Revisions (Hotspots)
  2369. try:
  2370. hotspot_files = data.getFilesWithMostRevisions(15)
  2371. if hotspot_files:
  2372. f.write(html_header(2, 'Files with Most Revisions (Hotspots)'))
  2373. f.write('<table class="sortable" id="hotspot_files"><tr><th>File</th><th>Revisions</th><th>% of Total Commits</th></tr>')
  2374. total_commits = data.getTotalCommits()
  2375. for filepath, revisions in hotspot_files:
  2376. revision_pct = (100.0 * revisions / total_commits) if total_commits else 0.0
  2377. f.write('<tr><td>%s</td><td>%d</td><td>%.2f%%</td></tr>' % (filepath, revisions, revision_pct))
  2378. f.write('</table>')
  2379. except (AttributeError, TypeError):
  2380. pass
  2381. # Directory Activity
  2382. try:
  2383. active_directories = data.getDirectoriesByActivity(15)
  2384. if active_directories:
  2385. f.write(html_header(2, 'Most Active Directories'))
  2386. f.write('<table class="sortable" id="active_directories"><tr><th>Directory</th><th>Total Lines Changed</th><th>Lines Added</th><th>Lines Removed</th><th>Files</th></tr>')
  2387. for directory, total_lines, lines_added, lines_removed, file_count in active_directories:
  2388. directory_display = directory if directory != '.' else '(root)'
  2389. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (directory_display, total_lines, lines_added, lines_removed, file_count))
  2390. f.write('</table>')
  2391. except (AttributeError, TypeError):
  2392. pass
  2393. f.write('</body></html>')
  2394. f.close()
  2395. ###
  2396. # Lines
  2397. f = open(path + '/lines.html', 'w')
  2398. self.printHeader(f)
  2399. f.write('<h1>Lines</h1>')
  2400. self.printNav(f)
  2401. f.write('<dl>\n')
  2402. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  2403. f.write('<dt>Source lines</dt><dd>%d (%.1f%%)</dd>' % (
  2404. data.getTotalSourceLines(),
  2405. (100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
  2406. ))
  2407. f.write('<dt>Comment lines</dt><dd>%d (%.1f%%)</dd>' % (
  2408. data.getTotalCommentLines(),
  2409. (100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
  2410. ))
  2411. f.write('<dt>Blank lines</dt><dd>%d (%.1f%%)</dd>' % (
  2412. data.getTotalBlankLines(),
  2413. (100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
  2414. ))
  2415. f.write('</dl>\n')
  2416. f.write(html_header(2, 'Lines of Code'))
  2417. f.write('<p>This chart shows the total lines of code over time, including source code, comments, and blank lines.</p>')
  2418. f.write('<img src="lines_of_code.png" alt="Lines of Code">')
  2419. fg = open(path + '/lines_of_code.dat', 'w')
  2420. for stamp in sorted(data.changes_by_date.keys()):
  2421. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  2422. fg.close()
  2423. # Add SLOC composition chart data
  2424. f.write(html_header(2, 'Source Lines of Code (SLOC) Composition'))
  2425. f.write('<p>Breakdown of code composition by file type and content type:</p>')
  2426. sloc_data = data.getSLOCByExtension()
  2427. if sloc_data:
  2428. f.write('<table class="sortable" id="sloc_breakdown">')
  2429. f.write('<tr><th>Extension</th><th>Source Lines</th><th>Comment Lines</th><th>Blank Lines</th><th>Total</th><th>Source %</th><th>Comment %</th></tr>')
  2430. sorted_sloc = sorted(sloc_data.items(), key=lambda x: x[1]['total'], reverse=True)
  2431. for ext, sloc_info in sorted_sloc[:15]: # Top 15 extensions
  2432. if sloc_info['total'] == 0:
  2433. continue
  2434. ext_display = ext if ext else '(no extension)'
  2435. source_pct = (100.0 * sloc_info['source'] / sloc_info['total']) if sloc_info['total'] else 0.0
  2436. comment_pct = (100.0 * sloc_info['comments'] / sloc_info['total']) if sloc_info['total'] else 0.0
  2437. f.write('<tr>')
  2438. f.write('<td>%s</td>' % ext_display)
  2439. f.write('<td>%d</td>' % sloc_info['source'])
  2440. f.write('<td>%d</td>' % sloc_info['comments'])
  2441. f.write('<td>%d</td>' % sloc_info['blank'])
  2442. f.write('<td>%d</td>' % sloc_info['total'])
  2443. f.write('<td>%.1f%%</td>' % source_pct)
  2444. f.write('<td>%.1f%%</td>' % comment_pct)
  2445. f.write('</tr>')
  2446. f.write('</table>')
  2447. else:
  2448. f.write('<p>No SLOC data available.</p>')
  2449. f.write('</body></html>')
  2450. f.close()
  2451. ###
  2452. # tags.html
  2453. f = open(path + '/tags.html', 'w')
  2454. self.printHeader(f)
  2455. f.write('<h1>Tags</h1>')
  2456. self.printNav(f)
  2457. f.write('<dl>')
  2458. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  2459. if len(data.tags) > 0:
  2460. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  2461. f.write('</dl>')
  2462. f.write('<table class="tags">')
  2463. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  2464. # sort the tags by date desc
  2465. tags_sorted_by_date_desc = list(map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items())))))
  2466. for tag in tags_sorted_by_date_desc:
  2467. authorinfo = []
  2468. self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  2469. for i in reversed(self.authors_by_commits):
  2470. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  2471. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  2472. f.write('</table>')
  2473. f.write('</body></html>')
  2474. f.close()
  2475. self.createGraphs(path)
  2476. def _generateAssessment(self, performance, patterns):
  2477. """Generate a text assessment for an author based on their performance metrics."""
  2478. efficiency = performance.get('efficiency_score', 0)
  2479. consistency = performance.get('consistency', 0)
  2480. leadership = performance.get('leadership_score', 0)
  2481. contribution = performance.get('contribution_percentage', 0)
  2482. small_commits_ratio = patterns.get('small_commits', 0) / max(patterns.get('total_commits', 1), 1)
  2483. large_commits_ratio = patterns.get('large_commits', 0) / max(patterns.get('total_commits', 1), 1)
  2484. assessments = []
  2485. # Contribution level
  2486. if contribution > 25:
  2487. assessments.append("Major Contributor")
  2488. elif contribution > 10:
  2489. assessments.append("Regular Contributor")
  2490. elif contribution > 2:
  2491. assessments.append("Minor Contributor")
  2492. else:
  2493. assessments.append("Occasional Contributor")
  2494. # Quality assessment
  2495. if efficiency > 80:
  2496. assessments.append("High Quality")
  2497. elif efficiency > 60:
  2498. assessments.append("Good Quality")
  2499. elif efficiency > 40:
  2500. assessments.append("Average Quality")
  2501. else:
  2502. assessments.append("Needs Improvement")
  2503. # Work pattern assessment
  2504. if small_commits_ratio > 0.7:
  2505. assessments.append("Frequent Small Commits")
  2506. elif large_commits_ratio > 0.3:
  2507. assessments.append("Prefers Large Commits")
  2508. if consistency > 80:
  2509. assessments.append("Very Consistent")
  2510. elif consistency > 60:
  2511. assessments.append("Consistent")
  2512. if leadership > 70:
  2513. assessments.append("Leadership Role")
  2514. elif leadership > 50:
  2515. assessments.append("Collaborative")
  2516. return ", ".join(assessments) if assessments else "Standard Contributor"
  2517. def createGraphs(self, path):
  2518. print('Generating graphs...')
  2519. # hour of day
  2520. f = open(path + '/hour_of_day.plot', 'w')
  2521. f.write(GNUPLOT_COMMON)
  2522. f.write(
  2523. """
  2524. set output 'hour_of_day.png'
  2525. unset key
  2526. set xrange [0.5:24.5]
  2527. set yrange [0:]
  2528. set xtics 4
  2529. set grid y
  2530. set ylabel "Commits"
  2531. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  2532. """)
  2533. f.close()
  2534. # day of week
  2535. f = open(path + '/day_of_week.plot', 'w')
  2536. f.write(GNUPLOT_COMMON)
  2537. f.write(
  2538. """
  2539. set output 'day_of_week.png'
  2540. unset key
  2541. set xrange [0.5:7.5]
  2542. set yrange [0:]
  2543. set xtics 1
  2544. set grid y
  2545. set ylabel "Commits"
  2546. plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
  2547. """)
  2548. f.close()
  2549. # Domains
  2550. f = open(path + '/domains.plot', 'w')
  2551. f.write(GNUPLOT_COMMON)
  2552. f.write(
  2553. """
  2554. set output 'domains.png'
  2555. unset key
  2556. unset xtics
  2557. set yrange [0:]
  2558. set grid y
  2559. set ylabel "Commits"
  2560. plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
  2561. """)
  2562. f.close()
  2563. # Month of Year
  2564. f = open(path + '/month_of_year.plot', 'w')
  2565. f.write(GNUPLOT_COMMON)
  2566. f.write(
  2567. """
  2568. set output 'month_of_year.png'
  2569. unset key
  2570. set xrange [0.5:12.5]
  2571. set yrange [0:]
  2572. set xtics 1
  2573. set grid y
  2574. set ylabel "Commits"
  2575. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  2576. """)
  2577. f.close()
  2578. # commits_by_year_month
  2579. f = open(path + '/commits_by_year_month.plot', 'w')
  2580. f.write(GNUPLOT_COMMON)
  2581. f.write(
  2582. """
  2583. set output 'commits_by_year_month.png'
  2584. unset key
  2585. set yrange [0:]
  2586. set xdata time
  2587. set timefmt "%Y-%m"
  2588. set format x "%Y-%m"
  2589. set xtics rotate
  2590. set bmargin 5
  2591. set grid y
  2592. set ylabel "Commits"
  2593. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  2594. """)
  2595. f.close()
  2596. # commits_by_year
  2597. f = open(path + '/commits_by_year.plot', 'w')
  2598. f.write(GNUPLOT_COMMON)
  2599. f.write(
  2600. """
  2601. set output 'commits_by_year.png'
  2602. unset key
  2603. set yrange [0:]
  2604. set xtics 1 rotate
  2605. set grid y
  2606. set ylabel "Commits"
  2607. set yrange [0:]
  2608. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  2609. """)
  2610. f.close()
  2611. # Files by date
  2612. f = open(path + '/files_by_date.plot', 'w')
  2613. f.write(GNUPLOT_COMMON)
  2614. f.write(
  2615. """
  2616. set output 'files_by_date.png'
  2617. unset key
  2618. set yrange [0:]
  2619. set xdata time
  2620. set timefmt "%Y-%m-%d"
  2621. set format x "%Y-%m-%d"
  2622. set grid y
  2623. set ylabel "Files"
  2624. set xtics rotate
  2625. set ytics autofreq
  2626. set bmargin 6
  2627. plot 'files_by_date.dat' using 1:2 w steps
  2628. """)
  2629. f.close()
  2630. # Lines of Code
  2631. f = open(path + '/lines_of_code.plot', 'w')
  2632. f.write(GNUPLOT_COMMON)
  2633. f.write(
  2634. """
  2635. set output 'lines_of_code.png'
  2636. unset key
  2637. set yrange [0:]
  2638. set xdata time
  2639. set timefmt "%s"
  2640. set format x "%Y-%m-%d"
  2641. set grid y
  2642. set ylabel "Lines"
  2643. set xtics rotate
  2644. set bmargin 6
  2645. plot 'lines_of_code.dat' using 1:2 w lines
  2646. """)
  2647. f.close()
  2648. # Lines of Code Added per author
  2649. f = open(path + '/lines_of_code_by_author.plot', 'w')
  2650. f.write(GNUPLOT_COMMON)
  2651. f.write(
  2652. """
  2653. set terminal png transparent size 640,480
  2654. set output 'lines_of_code_by_author.png'
  2655. set key left top
  2656. set yrange [0:]
  2657. set xdata time
  2658. set timefmt "%s"
  2659. set format x "%Y-%m-%d"
  2660. set grid y
  2661. set ylabel "Lines"
  2662. set xtics rotate
  2663. set bmargin 6
  2664. plot """
  2665. )
  2666. i = 1
  2667. plots = []
  2668. for a in self.authors_to_plot:
  2669. i = i + 1
  2670. author = a.replace("\"", "\\\"").replace("`", "")
  2671. plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
  2672. f.write(", ".join(plots))
  2673. f.write('\n')
  2674. f.close()
  2675. # Commits per author
  2676. f = open(path + '/commits_by_author.plot', 'w')
  2677. f.write(GNUPLOT_COMMON)
  2678. f.write(
  2679. """
  2680. set terminal png transparent size 640,480
  2681. set output 'commits_by_author.png'
  2682. set key left top
  2683. set yrange [0:]
  2684. set xdata time
  2685. set timefmt "%s"
  2686. set format x "%Y-%m-%d"
  2687. set grid y
  2688. set ylabel "Commits"
  2689. set xtics rotate
  2690. set bmargin 6
  2691. plot """
  2692. )
  2693. i = 1
  2694. plots = []
  2695. for a in self.authors_to_plot:
  2696. i = i + 1
  2697. author = a.replace("\"", "\\\"").replace("`", "")
  2698. plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
  2699. f.write(", ".join(plots))
  2700. f.write('\n')
  2701. f.close()
  2702. # Pace of Changes plot
  2703. f = open(path + '/pace_of_changes.plot', 'w')
  2704. f.write(GNUPLOT_COMMON)
  2705. f.write(
  2706. """
  2707. set output 'pace_of_changes.png'
  2708. unset key
  2709. set yrange [0:]
  2710. set xdata time
  2711. set timefmt "%s"
  2712. set format x "%Y-%m-%d"
  2713. set grid y
  2714. set ylabel "Line Changes (Additions + Deletions)"
  2715. set xtics rotate
  2716. set bmargin 6
  2717. plot 'pace_of_changes.dat' using 1:2 w lines lw 2
  2718. """)
  2719. f.close()
  2720. os.chdir(path)
  2721. files = glob.glob(path + '/*.plot')
  2722. for f in files:
  2723. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  2724. if len(out) > 0:
  2725. print(out)
  2726. def printHeader(self, f, title = ''):
  2727. f.write(
  2728. """<!DOCTYPE html>
  2729. <html>
  2730. <head>
  2731. <meta charset="UTF-8">
  2732. <title>GitStats - %s</title>
  2733. <link rel="stylesheet" href="%s" type="text/css">
  2734. <meta name="generator" content="GitStats %s">
  2735. <script type="text/javascript" src="sortable.js"></script>
  2736. </head>
  2737. <body>
  2738. """ % (self.title, conf['style'], getversion()))
  2739. def printNav(self, f):
  2740. f.write("""
  2741. <div class="nav">
  2742. <ul>
  2743. <li><a href="index.html">General</a></li>
  2744. <li><a href="activity.html">Activity</a></li>
  2745. <li><a href="authors.html">Authors</a></li>
  2746. <li><a href="team_analysis.html">Team Analysis</a></li>
  2747. <li><a href="branches.html">Branches</a></li>
  2748. <li><a href="files.html">Files</a></li>
  2749. <li><a href="lines.html">Lines</a></li>
  2750. <li><a href="tags.html">Tags</a></li>
  2751. </ul>
  2752. </div>
  2753. """)
  2754. class PDFReportCreator(ReportCreator):
  2755. """Creates PDF reports using fpdf2 library with embedded charts and tab-based structure."""
  2756. def __init__(self):
  2757. ReportCreator.__init__(self)
  2758. self.pdf = None
  2759. self.output_path = None
  2760. # Define color schemes for better visual appeal
  2761. self.colors = {
  2762. 'header': (41, 128, 185), # Blue
  2763. 'text': (0, 0, 0), # Black
  2764. 'table_header': (52, 152, 219), # Light blue
  2765. 'table_alt': (245, 245, 245) # Light gray
  2766. }
  2767. def _set_color(self, color_type='text', fill=False):
  2768. """Set text or fill color using predefined color scheme."""
  2769. if color_type in self.colors:
  2770. r, g, b = self.colors[color_type]
  2771. if fill:
  2772. self.pdf.set_fill_color(r, g, b)
  2773. else:
  2774. self.pdf.set_text_color(r, g, b)
  2775. def _add_section_header(self, title, level=1):
  2776. """Add a standardized section header with consistent formatting."""
  2777. # Add some space before header
  2778. self.pdf.ln(h=10)
  2779. # Set header color and font
  2780. self._set_color('header')
  2781. if level == 1:
  2782. self.pdf.set_font('helvetica', 'B', 20)
  2783. height = 15
  2784. elif level == 2:
  2785. self.pdf.set_font('helvetica', 'B', 16)
  2786. height = 12
  2787. else:
  2788. self.pdf.set_font('helvetica', 'B', 14)
  2789. height = 10
  2790. # Add the header
  2791. self.pdf.cell(0, height, title, 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2792. # Reset color to text
  2793. self._set_color('text')
  2794. self.pdf.ln(h=5) # Small gap after header
  2795. def _create_table_header(self, headers, widths=None, font_size=9):
  2796. """Create a standardized table header with consistent formatting."""
  2797. if widths is None:
  2798. # Auto-calculate widths if not provided
  2799. total_width = 180 # Reasonable default
  2800. widths = [total_width // len(headers)] * len(headers)
  2801. # Set header styling
  2802. self._set_color('table_header')
  2803. self._set_color('table_header', fill=True)
  2804. self.pdf.set_font('helvetica', 'B', font_size)
  2805. # Create header cells
  2806. for i, (header, width) in enumerate(zip(headers, widths)):
  2807. is_last = (i == len(headers) - 1)
  2808. new_x = XPos.LMARGIN if is_last else XPos.RIGHT
  2809. new_y = YPos.NEXT if is_last else YPos.TOP
  2810. self.pdf.cell(width, 8, str(header), 1,
  2811. new_x=new_x, new_y=new_y, align='C', fill=True)
  2812. # Reset styling for table content
  2813. self._set_color('text')
  2814. self.pdf.set_font('helvetica', '', font_size - 1)
  2815. def _create_table_row(self, values, widths, alternate_row=False, font_size=8):
  2816. """Create a table row with optional alternating background."""
  2817. if alternate_row:
  2818. self._set_color('table_alt', fill=True)
  2819. for i, (value, width) in enumerate(zip(values, widths)):
  2820. is_last = (i == len(values) - 1)
  2821. new_x = XPos.LMARGIN if is_last else XPos.RIGHT
  2822. new_y = YPos.NEXT if is_last else YPos.TOP
  2823. # Truncate long values to fit
  2824. str_value = str(value)
  2825. if len(str_value) > width // 3: # Rough character width estimation
  2826. str_value = str_value[:width//3-2] + '...'
  2827. self.pdf.cell(width, 6, str_value, 1,
  2828. new_x=new_x, new_y=new_y, align='C', fill=alternate_row)
  2829. def create(self, data, path):
  2830. ReportCreator.create(self, data, path)
  2831. self.title = data.projectname
  2832. self.output_path = path
  2833. # Initialize PDF document with fpdf2 features
  2834. self.pdf = FPDF()
  2835. self.pdf.set_auto_page_break(auto=True, margin=15)
  2836. # Set metadata for better PDF properties
  2837. self.pdf.set_title(f"GitStats Report - {data.projectname}")
  2838. self.pdf.set_author("GitStats")
  2839. self.pdf.set_subject(f"Git repository analysis for {data.projectname}")
  2840. self.pdf.set_creator("GitStats with fpdf2")
  2841. self.pdf.set_keywords("git,statistics,analysis,repository")
  2842. # Create all pages (tabs)
  2843. self._create_title_page(data)
  2844. self._create_general_page(data)
  2845. self._create_activity_page(data)
  2846. self._create_authors_page(data)
  2847. self._create_team_analysis_page(data)
  2848. self._create_files_page(data)
  2849. self._create_lines_page(data)
  2850. self._create_tags_page(data)
  2851. self._create_branches_page(data)
  2852. # Save PDF with fpdf2's enhanced output method
  2853. pdf_path = os.path.join(path, f"gitstats_{data.projectname.replace(' ', '_')}.pdf")
  2854. # Use fpdf2's output method with proper file handling
  2855. try:
  2856. self.pdf.output(pdf_path)
  2857. print(f"PDF report saved to: {pdf_path}")
  2858. # Verify file was created and has content
  2859. if os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0:
  2860. print(f"PDF file size: {os.path.getsize(pdf_path)} bytes")
  2861. else:
  2862. print("Warning: PDF file was not created properly")
  2863. except Exception as e:
  2864. print(f"Error saving PDF: {e}")
  2865. raise
  2866. def _add_chart_if_exists(self, chart_filename, width=None, height=None):
  2867. """Add a chart image to the PDF if it exists, with improved fpdf2 handling."""
  2868. chart_path = os.path.join(self.output_path, chart_filename)
  2869. if os.path.exists(chart_path):
  2870. try:
  2871. # Get current position
  2872. x = self.pdf.get_x()
  2873. y = self.pdf.get_y()
  2874. # Calculate dimensions with better defaults
  2875. if width is None:
  2876. width = 150 # Default width
  2877. if height is None:
  2878. height = 80 # Default height
  2879. # Get page dimensions for better space calculation
  2880. page_width = self.pdf.w
  2881. page_height = self.pdf.h
  2882. margin = 15 # Same as auto_page_break margin
  2883. # Check if there's enough space on current page
  2884. if y + height > (page_height - margin):
  2885. self.pdf.add_page()
  2886. x = self.pdf.get_x()
  2887. y = self.pdf.get_y()
  2888. # Add image with fpdf2's enhanced image handling
  2889. # fpdf2 automatically handles different image formats
  2890. self.pdf.image(chart_path, x=x, y=y, w=width, h=height)
  2891. # Move cursor below image with better spacing
  2892. self.pdf.set_y(y + height + 8) # Increased spacing for better layout
  2893. return True
  2894. except Exception as e:
  2895. print(f"Warning: Could not add chart {chart_filename}: {e}")
  2896. return False
  2897. return False
  2898. def _create_title_page(self, data):
  2899. """Create the title page of the PDF report."""
  2900. self.pdf.add_page()
  2901. self.pdf.set_font('helvetica', 'B', 24)
  2902. self.pdf.cell(0, 20, f'GitStats Report - {data.projectname}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  2903. self.pdf.ln(h=10)
  2904. self.pdf.set_font('helvetica', '', 12)
  2905. format = '%Y-%m-%d %H:%M:%S'
  2906. # Report generation info
  2907. self.pdf.cell(0, 10, f'Generated: {datetime.datetime.now().strftime(format)}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
  2908. self.pdf.cell(0, 10, f'Generator: GitStats (version {getversion()})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
  2909. self.pdf.cell(0, 10, f'Git Version: {getgitversion()}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
  2910. if getgnuplotversion():
  2911. self.pdf.cell(0, 10, f'Gnuplot Version: {getgnuplotversion()}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
  2912. self.pdf.ln(h=10)
  2913. self.pdf.cell(0, 10, f'Report Period: {data.getFirstCommitDate().strftime(format)} to {data.getLastCommitDate().strftime(format)}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
  2914. # Table of contents
  2915. self.pdf.ln(h=15)
  2916. self.pdf.set_font('helvetica', 'B', 16)
  2917. self.pdf.cell(0, 10, 'Table of Contents', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2918. self.pdf.set_font('helvetica', '', 12)
  2919. sections = [
  2920. '1. General Statistics',
  2921. '2. Activity Statistics',
  2922. '3. Authors Statistics',
  2923. '4. Team Analysis',
  2924. '5. Files Statistics',
  2925. '6. Lines of Code Statistics',
  2926. '7. Tags Statistics',
  2927. '8. Branches Statistics'
  2928. ]
  2929. for section in sections:
  2930. self.pdf.cell(0, 8, section, 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2931. def _create_general_page(self, data):
  2932. """Create the general statistics page (mirrors index.html)."""
  2933. self.pdf.add_page()
  2934. self.pdf.set_font('helvetica', 'B', 20)
  2935. self.pdf.cell(0, 15, '1. General Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2936. self.pdf.set_font('helvetica', '', 12)
  2937. # Calculate basic stats
  2938. total_commits = data.getTotalCommits()
  2939. total_active_days = len(data.getActiveDays()) if hasattr(data, 'getActiveDays') else 0
  2940. delta_days = data.getCommitDeltaDays() if hasattr(data, 'getCommitDeltaDays') else 0
  2941. total_authors = data.getTotalAuthors()
  2942. # General statistics (matching index.html exactly)
  2943. stats = [
  2944. ('Project name', data.projectname),
  2945. ('Generated', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
  2946. ('Report Period', f"{data.getFirstCommitDate().strftime('%Y-%m-%d %H:%M:%S')} to {data.getLastCommitDate().strftime('%Y-%m-%d %H:%M:%S')}"),
  2947. ('Age', f"{delta_days} days, {total_active_days} active days ({(100.0 * total_active_days / delta_days) if delta_days else 0.0:.2f}%)"),
  2948. ('Total Files', str(data.getTotalFiles())),
  2949. ('Total Lines of Code', f"{data.getTotalLOC()} ({data.total_lines_added} added, {data.total_lines_removed} removed)"),
  2950. ('Source Lines of Code', f"{data.getTotalSourceLines()} ({(100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
  2951. ('Comment Lines', f"{data.getTotalCommentLines()} ({(100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
  2952. ('Blank Lines', f"{data.getTotalBlankLines()} ({(100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
  2953. ('Total Commits', f"{total_commits} (average {(float(total_commits) / total_active_days) if total_active_days else 0.0:.1f} commits per active day, {(float(total_commits) / delta_days) if delta_days else 0.0:.1f} per all days)"),
  2954. ('Authors', f"{total_authors} (average {(float(total_commits) / total_authors) if total_authors else 0.0:.1f} commits per author)"),
  2955. ('Total Branches', str(len(data.getBranches()))),
  2956. ('Unmerged Branches', str(len(data.getUnmergedBranches()))),
  2957. ('Main Branch', data.main_branch if hasattr(data, 'main_branch') else 'N/A')
  2958. ]
  2959. # Display stats
  2960. for label, value in stats:
  2961. self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  2962. self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2963. self.pdf.ln(h=10)
  2964. def _create_activity_page(self, data):
  2965. """Create the activity statistics page with charts (mirrors activity.html)."""
  2966. self.pdf.add_page()
  2967. self.pdf.set_font('helvetica', 'B', 20)
  2968. self.pdf.cell(0, 15, '2. Activity Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2969. # Weekly activity section
  2970. self.pdf.set_font('helvetica', 'B', 14)
  2971. self.pdf.cell(0, 10, 'Weekly Activity', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2972. self.pdf.set_font('helvetica', '', 10)
  2973. self.pdf.cell(0, 6, 'Last 32 weeks activity (see chart below)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2974. self.pdf.ln(h=5)
  2975. # Hour of Day section
  2976. self.pdf.set_font('helvetica', 'B', 14)
  2977. self.pdf.cell(0, 10, 'Hour of Day', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  2978. self.pdf.set_font('helvetica', '', 10)
  2979. hour_of_day = data.getActivityByHourOfDay()
  2980. total_commits = data.getTotalCommits()
  2981. # Create hour of day table
  2982. self.pdf.set_font('helvetica', 'B', 8)
  2983. self.pdf.cell(20, 6, 'Hour', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  2984. for h in range(0, 24):
  2985. self.pdf.cell(7, 6, str(h), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  2986. self.pdf.ln()
  2987. self.pdf.cell(20, 6, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  2988. for h in range(0, 24):
  2989. commits = hour_of_day.get(h, 0)
  2990. self.pdf.cell(7, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  2991. self.pdf.ln()
  2992. self.pdf.cell(20, 6, '%', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  2993. for h in range(0, 24):
  2994. commits = hour_of_day.get(h, 0)
  2995. percent = (100.0 * commits / total_commits) if total_commits else 0.0
  2996. self.pdf.cell(7, 6, f"{percent:.1f}", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  2997. self.pdf.ln(h=10)
  2998. # Add hour of day chart
  2999. self._add_chart_if_exists('hour_of_day.png', 180, 90)
  3000. # Day of Week section
  3001. self.pdf.set_font('helvetica', 'B', 14)
  3002. self.pdf.cell(0, 10, 'Day of Week', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3003. self.pdf.set_font('helvetica', '', 10)
  3004. day_of_week = data.getActivityByDayOfWeek()
  3005. # Create day of week table
  3006. self.pdf.set_font('helvetica', 'B', 10)
  3007. self.pdf.cell(30, 8, 'Day', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3008. self.pdf.cell(30, 8, 'Total (%)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3009. self.pdf.set_font('helvetica', '', 10)
  3010. for d in range(0, 7):
  3011. day_name = WEEKDAYS[d]
  3012. commits = day_of_week.get(d, 0)
  3013. percent = (100.0 * commits / total_commits) if total_commits else 0.0
  3014. self.pdf.cell(30, 6, day_name, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3015. self.pdf.cell(30, 6, f"{commits} ({percent:.2f}%)", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3016. self.pdf.ln(h=5)
  3017. self._add_chart_if_exists('day_of_week.png', 180, 90)
  3018. # Month of Year section
  3019. if hasattr(data, 'activity_by_month_of_year'):
  3020. self.pdf.set_font('helvetica', 'B', 14)
  3021. self.pdf.cell(0, 10, 'Month of Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3022. self.pdf.set_font('helvetica', 'B', 10)
  3023. self.pdf.cell(30, 8, 'Month', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3024. self.pdf.cell(40, 8, 'Commits (%)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3025. self.pdf.set_font('helvetica', '', 10)
  3026. for mm in range(1, 13):
  3027. commits = data.activity_by_month_of_year.get(mm, 0)
  3028. percent = (100.0 * commits / total_commits) if total_commits else 0.0
  3029. self.pdf.cell(30, 6, str(mm), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3030. self.pdf.cell(40, 6, f"{commits} ({percent:.2f} %)", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3031. self.pdf.ln(h=5)
  3032. self._add_chart_if_exists('month_of_year.png', 180, 90)
  3033. # Add page break for next major chart
  3034. if self.pdf.get_y() > 200:
  3035. self.pdf.add_page()
  3036. # Commits by year/month chart
  3037. self.pdf.set_font('helvetica', 'B', 14)
  3038. self.pdf.cell(0, 10, 'Commits by Year/Month', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3039. self._add_chart_if_exists('commits_by_year_month.png', 180, 100)
  3040. # Commits by year chart
  3041. self.pdf.set_font('helvetica', 'B', 14)
  3042. self.pdf.cell(0, 10, 'Commits by Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3043. self._add_chart_if_exists('commits_by_year.png', 180, 100)
  3044. def _create_authors_page(self, data):
  3045. """Create the authors statistics page with charts (mirrors authors.html)."""
  3046. self.pdf.add_page()
  3047. self.pdf.set_font('helvetica', 'B', 20)
  3048. self.pdf.cell(0, 15, '3. Authors Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3049. # List of Authors table
  3050. self.pdf.set_font('helvetica', 'B', 14)
  3051. self.pdf.cell(0, 10, 'List of Authors', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3052. authors = data.getAuthors(conf['max_authors'])
  3053. # Table header
  3054. self.pdf.set_font('helvetica', 'B', 8)
  3055. self.pdf.cell(35, 6, 'Author', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3056. self.pdf.cell(20, 6, 'Commits (%)', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3057. self.pdf.cell(15, 6, '+ lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3058. self.pdf.cell(15, 6, '- lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3059. self.pdf.cell(25, 6, 'First commit', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3060. self.pdf.cell(25, 6, 'Last commit', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3061. self.pdf.cell(20, 6, 'Age', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3062. self.pdf.cell(15, 6, 'Active days', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3063. # Table data
  3064. self.pdf.set_font('helvetica', '', 7)
  3065. for author in authors[:20]: # Top 20 authors
  3066. info = data.getAuthorInfo(author)
  3067. # Truncate long author names
  3068. display_author = author[:18] + "..." if len(author) > 21 else author
  3069. self.pdf.cell(35, 5, display_author, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3070. self.pdf.cell(20, 5, f"{info['commits']} ({info['commits_frac']:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3071. self.pdf.cell(15, 5, str(info['lines_added']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3072. self.pdf.cell(15, 5, str(info['lines_removed']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3073. self.pdf.cell(25, 5, info['date_first'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3074. self.pdf.cell(25, 5, info['date_last'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3075. # Calculate age
  3076. try:
  3077. age_days = (datetime.datetime.strptime(info['date_last'][:10], '%Y-%m-%d') -
  3078. datetime.datetime.strptime(info['date_first'][:10], '%Y-%m-%d')).days
  3079. age_text = f"{age_days} days" if age_days > 0 else "1 day"
  3080. except:
  3081. age_text = "N/A"
  3082. active_days = len(info.get('active_days', [0])) if 'active_days' in info else 1
  3083. self.pdf.cell(20, 5, age_text[:12], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3084. self.pdf.cell(15, 5, str(active_days), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3085. self.pdf.ln(h=10)
  3086. # Lines of code by author chart
  3087. self.pdf.set_font('helvetica', 'B', 14)
  3088. self.pdf.cell(0, 10, 'Cumulated Added Lines of Code per Author', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3089. self._add_chart_if_exists('lines_of_code_by_author.png', 180, 110)
  3090. # Commits per author chart
  3091. self.pdf.set_font('helvetica', 'B', 14)
  3092. self.pdf.cell(0, 10, 'Commits per Author', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3093. self._add_chart_if_exists('commits_by_author.png', 180, 110)
  3094. # Commits by domains chart
  3095. self.pdf.set_font('helvetica', 'B', 14)
  3096. self.pdf.cell(0, 10, 'Commits by Domains', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3097. self._add_chart_if_exists('domains.png', 180, 100)
  3098. def _create_team_analysis_page(self, data):
  3099. """Create the team analysis page for comprehensive team evaluation (new feature)."""
  3100. self.pdf.add_page()
  3101. self.pdf.set_font('helvetica', 'B', 20)
  3102. self.pdf.cell(0, 15, '4. Team Analysis', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3103. # Team Overview
  3104. self.pdf.set_font('helvetica', 'B', 14)
  3105. self.pdf.cell(0, 10, 'Team Overview', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3106. self.pdf.set_font('helvetica', '', 12)
  3107. total_authors = data.getTotalAuthors()
  3108. work_distribution = data.getTeamWorkDistribution()
  3109. self.pdf.cell(50, 8, 'Total Team Members:', 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3110. self.pdf.cell(0, 8, str(total_authors), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3111. # Calculate work distribution metrics
  3112. commit_contributions = [dist['commit_percentage'] for dist in work_distribution.values()]
  3113. if commit_contributions:
  3114. max_contrib = max(commit_contributions)
  3115. min_contrib = min(commit_contributions)
  3116. avg_contrib = sum(commit_contributions) / len(commit_contributions)
  3117. self.pdf.cell(50, 8, 'Work Distribution:', 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3118. self.pdf.cell(0, 8, f'Max: {max_contrib:.1f}%, Min: {min_contrib:.1f}%, Avg: {avg_contrib:.1f}%', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3119. self.pdf.ln(h=10)
  3120. # Team Performance Rankings
  3121. self.pdf.set_font('helvetica', 'B', 14)
  3122. self.pdf.cell(0, 10, 'Team Performance Rankings', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3123. # Top Contributors
  3124. contrib_ranking = data.getAuthorsByContribution()
  3125. efficiency_ranking = data.getAuthorsByEfficiency()
  3126. self.pdf.set_font('helvetica', 'B', 12)
  3127. self.pdf.cell(0, 8, 'Top 10 Contributors (by commit percentage):', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3128. self.pdf.set_font('helvetica', '', 10)
  3129. for i, (author, percentage) in enumerate(contrib_ranking[:10], 1):
  3130. display_author = author[:30] + "..." if len(author) > 33 else author
  3131. self.pdf.cell(0, 6, f'{i}. {display_author} ({percentage:.1f}%)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3132. self.pdf.ln(h=5)
  3133. # Team Performance Table
  3134. self.pdf.set_font('helvetica', 'B', 14)
  3135. self.pdf.cell(0, 10, 'Detailed Performance Analysis', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3136. team_performance = data.getTeamPerformance()
  3137. commit_patterns = data.getCommitPatterns()
  3138. # Table header
  3139. self.pdf.set_font('helvetica', 'B', 8)
  3140. self.pdf.cell(35, 6, 'Author', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3141. self.pdf.cell(20, 6, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3142. self.pdf.cell(20, 6, 'Contrib %', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3143. self.pdf.cell(25, 6, 'Efficiency', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3144. self.pdf.cell(25, 6, 'Consistency', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3145. self.pdf.cell(25, 6, 'Leadership', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3146. self.pdf.cell(25, 6, 'Overall', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3147. # Table data - show top 15 performers
  3148. self.pdf.set_font('helvetica', '', 7)
  3149. sorted_authors = sorted(team_performance.items(), key=lambda x: x[1].get('overall_score', 0), reverse=True)
  3150. for author, perf in sorted_authors[:15]:
  3151. author_info = data.getAuthorInfo(author)
  3152. commits = author_info.get('commits', 0)
  3153. contrib_pct = perf.get('contribution_percentage', 0)
  3154. efficiency = perf.get('efficiency_score', 0)
  3155. consistency = perf.get('consistency', 0)
  3156. leadership = perf.get('leadership_score', 0)
  3157. overall = perf.get('overall_score', 0)
  3158. # Truncate long author names
  3159. display_author = author[:18] + "..." if len(author) > 21 else author
  3160. self.pdf.cell(35, 5, display_author, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3161. self.pdf.cell(20, 5, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3162. self.pdf.cell(20, 5, f'{contrib_pct:.1f}%', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3163. self.pdf.cell(25, 5, f'{efficiency:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3164. self.pdf.cell(25, 5, f'{consistency:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3165. self.pdf.cell(25, 5, f'{leadership:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3166. self.pdf.cell(25, 5, f'{overall:.1f}', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3167. self.pdf.ln(h=10)
  3168. # Team Assessment Conclusion
  3169. self.pdf.set_font('helvetica', 'B', 14)
  3170. self.pdf.cell(0, 10, 'Team Assessment Conclusion', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3171. self.pdf.set_font('helvetica', '', 10)
  3172. # Generate team insights
  3173. top_contributor = contrib_ranking[0] if contrib_ranking else ("N/A", 0)
  3174. most_efficient = efficiency_ranking[0] if efficiency_ranking else ("N/A", 0)
  3175. self.pdf.cell(0, 6, f'- Top contributor: {top_contributor[0]} ({top_contributor[1]:.1f}% of commits)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3176. self.pdf.cell(0, 6, f'- Most efficient developer: {most_efficient[0]} (score: {most_efficient[1]:.1f})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3177. self.pdf.cell(0, 6, f'- Team size: {total_authors} active contributors', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3178. # Work distribution assessment
  3179. if commit_contributions:
  3180. gini_coefficient = self._calculate_gini_coefficient(commit_contributions)
  3181. if gini_coefficient < 0.3:
  3182. distribution_assessment = "Well-distributed (very balanced team)"
  3183. elif gini_coefficient < 0.5:
  3184. distribution_assessment = "Moderately distributed (some imbalance)"
  3185. else:
  3186. distribution_assessment = "Highly concentrated (few dominant contributors)"
  3187. self.pdf.cell(0, 6, f'- Work distribution: {distribution_assessment}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3188. def _calculate_gini_coefficient(self, values):
  3189. """Calculate Gini coefficient for work distribution analysis."""
  3190. if not values:
  3191. return 0
  3192. sorted_values = sorted(values)
  3193. n = len(sorted_values)
  3194. cumsum = sum(sorted_values)
  3195. if cumsum == 0:
  3196. return 0
  3197. sum_of_differences = 0
  3198. for i in range(n):
  3199. for j in range(n):
  3200. sum_of_differences += abs(sorted_values[i] - sorted_values[j])
  3201. gini = sum_of_differences / (2 * n * cumsum)
  3202. return gini
  3203. def _create_files_page(self, data):
  3204. """Create the files statistics page with charts (mirrors files.html)."""
  3205. self.pdf.add_page()
  3206. self.pdf.set_font('helvetica', 'B', 20)
  3207. self.pdf.cell(0, 15, '5. Files Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3208. # Basic file stats
  3209. total_files = data.getTotalFiles()
  3210. total_loc = data.getTotalLOC()
  3211. self.pdf.set_font('helvetica', '', 12)
  3212. stats = [
  3213. ('Total files', str(total_files)),
  3214. ('Total lines', str(total_loc)),
  3215. ]
  3216. try:
  3217. avg_size = data.getAverageFileSize()
  3218. stats.append(('Average file size', f"{avg_size:.2f} bytes"))
  3219. except (AttributeError, ZeroDivisionError):
  3220. # Fallback to old calculation if new method fails
  3221. avg_size = float(data.getTotalSize()) / total_files if total_files else 0.0
  3222. stats.append(('Average file size', f"{avg_size:.2f} bytes"))
  3223. try:
  3224. avg_revisions = data.getAverageRevisionsPerFile()
  3225. stats.append(('Average revisions per file', f"{avg_revisions:.2f}"))
  3226. except AttributeError:
  3227. pass
  3228. for label, value in stats:
  3229. self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3230. self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3231. self.pdf.ln(h=10)
  3232. # File extensions
  3233. if hasattr(data, 'extensions') and data.extensions:
  3234. self.pdf.set_font('helvetica', 'B', 14)
  3235. self.pdf.cell(0, 10, 'File Extensions', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3236. # Table header
  3237. self.pdf.set_font('helvetica', 'B', 9)
  3238. self.pdf.cell(25, 8, 'Extension', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3239. self.pdf.cell(20, 8, 'Files', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3240. self.pdf.cell(20, 8, '% Files', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3241. self.pdf.cell(25, 8, 'Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3242. self.pdf.cell(20, 8, '% Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3243. self.pdf.cell(25, 8, 'Lines/File', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3244. # Table data - show top extensions
  3245. self.pdf.set_font('helvetica', '', 8)
  3246. sorted_extensions = sorted(data.extensions.items(),
  3247. key=lambda x: x[1]['files'], reverse=True)[:15]
  3248. for ext, ext_data in sorted_extensions:
  3249. files = ext_data['files']
  3250. lines = ext_data['lines']
  3251. loc_percentage = (100.0 * lines / total_loc) if total_loc else 0.0
  3252. files_percentage = (100.0 * files / total_files) if total_files else 0.0
  3253. lines_per_file = (lines // files) if files else 0
  3254. display_ext = ext if ext else '(no ext)'
  3255. self.pdf.cell(25, 6, display_ext[:12], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3256. self.pdf.cell(20, 6, str(files), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3257. self.pdf.cell(20, 6, f"{files_percentage:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3258. self.pdf.cell(25, 6, str(lines), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3259. self.pdf.cell(20, 6, f"{loc_percentage:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3260. self.pdf.cell(25, 6, str(lines_per_file), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3261. self.pdf.ln(h=10)
  3262. # SLOC Breakdown by Extension
  3263. sloc_data = data.getSLOCByExtension()
  3264. if sloc_data:
  3265. self.pdf.set_font('helvetica', 'B', 14)
  3266. self.pdf.cell(0, 10, 'Source Lines of Code (SLOC) Breakdown', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3267. # Table header
  3268. self.pdf.set_font('helvetica', 'B', 8)
  3269. self.pdf.cell(20, 8, 'Extension', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3270. self.pdf.cell(25, 8, 'Source Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3271. self.pdf.cell(25, 8, 'Comment Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3272. self.pdf.cell(25, 8, 'Blank Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3273. self.pdf.cell(20, 8, 'Total', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3274. # Table data
  3275. self.pdf.set_font('helvetica', '', 7)
  3276. sorted_sloc = sorted(sloc_data.items(),
  3277. key=lambda x: x[1]['total'], reverse=True)[:15]
  3278. for ext, sloc_info in sorted_sloc:
  3279. if sloc_info['total'] == 0:
  3280. continue
  3281. display_ext = ext if ext else '(no ext)'
  3282. source_pct = (100.0 * sloc_info['source'] / sloc_info['total']) if sloc_info['total'] else 0.0
  3283. comment_pct = (100.0 * sloc_info['comments'] / sloc_info['total']) if sloc_info['total'] else 0.0
  3284. blank_pct = (100.0 * sloc_info['blank'] / sloc_info['total']) if sloc_info['total'] else 0.0
  3285. self.pdf.cell(20, 5, display_ext[:8], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3286. self.pdf.cell(25, 5, f"{sloc_info['source']} ({source_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3287. self.pdf.cell(25, 5, f"{sloc_info['comments']} ({comment_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3288. self.pdf.cell(25, 5, f"{sloc_info['blank']} ({blank_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3289. self.pdf.cell(20, 5, str(sloc_info['total']), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3290. self.pdf.ln(h=10)
  3291. # Add new file statistics tables
  3292. try:
  3293. # Largest Files
  3294. largest_files = data.getLargestFiles(10)
  3295. if largest_files:
  3296. self.pdf.set_font('helvetica', 'B', 14)
  3297. self.pdf.cell(0, 10, 'Largest Files', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3298. # Table header
  3299. self.pdf.set_font('helvetica', 'B', 9)
  3300. self.pdf.cell(80, 8, 'File', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3301. self.pdf.cell(30, 8, 'Size (bytes)', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3302. self.pdf.cell(30, 8, 'Size (KB)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3303. # Table data
  3304. self.pdf.set_font('helvetica', '', 8)
  3305. for filepath, size in largest_files:
  3306. size_kb = size / 1024.0
  3307. display_path = filepath[:40] + '...' if len(filepath) > 40 else filepath
  3308. self.pdf.cell(80, 6, display_path, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3309. self.pdf.cell(30, 6, str(size), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3310. self.pdf.cell(30, 6, f"{size_kb:.1f}", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3311. except (AttributeError, TypeError):
  3312. pass
  3313. try:
  3314. # Files with Most Revisions (Hotspots)
  3315. hotspot_files = data.getFilesWithMostRevisions(10)
  3316. if hotspot_files:
  3317. self.pdf.ln(h=10)
  3318. self.pdf.set_font('helvetica', 'B', 14)
  3319. self.pdf.cell(0, 10, 'Files with Most Revisions (Hotspots)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3320. # Table header
  3321. self.pdf.set_font('helvetica', 'B', 9)
  3322. self.pdf.cell(80, 8, 'File', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3323. self.pdf.cell(30, 8, 'Revisions', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3324. self.pdf.cell(30, 8, '% of Commits', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3325. # Table data
  3326. self.pdf.set_font('helvetica', '', 8)
  3327. total_commits = data.getTotalCommits()
  3328. for filepath, revisions in hotspot_files:
  3329. revision_pct = (100.0 * revisions / total_commits) if total_commits else 0.0
  3330. display_path = filepath[:40] + '...' if len(filepath) > 40 else filepath
  3331. self.pdf.cell(80, 6, display_path, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3332. self.pdf.cell(30, 6, str(revisions), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3333. self.pdf.cell(30, 6, f"{revision_pct:.2f}%", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3334. except (AttributeError, TypeError):
  3335. pass
  3336. self.pdf.ln(h=10)
  3337. # Files by date chart
  3338. self.pdf.set_font('helvetica', 'B', 14)
  3339. self.pdf.cell(0, 10, 'Files by Date', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3340. self._add_chart_if_exists('files_by_date.png', 180, 100)
  3341. def _create_lines_page(self, data):
  3342. """Create the lines of code statistics page with charts (mirrors lines.html)."""
  3343. self.pdf.add_page()
  3344. self.pdf.set_font('helvetica', 'B', 20)
  3345. self.pdf.cell(0, 15, '6. Lines of Code Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3346. # Basic line stats
  3347. self.pdf.set_font('helvetica', '', 12)
  3348. stats = [
  3349. ('Total lines', str(data.getTotalLOC())),
  3350. ('Lines added', str(data.total_lines_added)),
  3351. ('Lines removed', str(data.total_lines_removed)),
  3352. ('Net lines', str(data.total_lines_added - data.total_lines_removed)),
  3353. ]
  3354. for label, value in stats:
  3355. self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3356. self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3357. self.pdf.ln(h=10)
  3358. # Lines by year
  3359. if hasattr(data, 'commits_by_year') and data.commits_by_year:
  3360. self.pdf.set_font('helvetica', 'B', 14)
  3361. self.pdf.cell(0, 10, 'Activity by Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3362. # Table header
  3363. self.pdf.set_font('helvetica', 'B', 10)
  3364. self.pdf.cell(25, 8, 'Year', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3365. self.pdf.cell(30, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3366. self.pdf.cell(30, 8, '% of Total', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3367. self.pdf.cell(35, 8, 'Lines Added', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3368. self.pdf.cell(35, 8, 'Lines Removed', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3369. # Table data
  3370. self.pdf.set_font('helvetica', '', 9)
  3371. total_commits = data.getTotalCommits()
  3372. for yy in sorted(data.commits_by_year.keys(), reverse=True):
  3373. commits = data.commits_by_year.get(yy, 0)
  3374. percent = (100.0 * commits / total_commits) if total_commits else 0.0
  3375. lines_added = data.lines_added_by_year.get(yy, 0) if hasattr(data, 'lines_added_by_year') else 0
  3376. lines_removed = data.lines_removed_by_year.get(yy, 0) if hasattr(data, 'lines_removed_by_year') else 0
  3377. self.pdf.cell(25, 6, str(yy), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3378. self.pdf.cell(30, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3379. self.pdf.cell(30, 6, f"{percent:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3380. self.pdf.cell(35, 6, str(lines_added), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3381. self.pdf.cell(35, 6, str(lines_removed), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3382. self.pdf.ln(h=10)
  3383. # Lines of code chart
  3384. self.pdf.set_font('helvetica', 'B', 14)
  3385. self.pdf.cell(0, 10, 'Lines of Code Over Time', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3386. self._add_chart_if_exists('lines_of_code.png', 180, 100)
  3387. def _create_tags_page(self, data):
  3388. """Create the tags statistics page (mirrors tags.html)."""
  3389. self.pdf.add_page()
  3390. self.pdf.set_font('helvetica', 'B', 20)
  3391. self.pdf.cell(0, 15, '7. Tags Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3392. self.pdf.set_font('helvetica', '', 12)
  3393. if not hasattr(data, 'tags') or not data.tags:
  3394. self.pdf.cell(0, 10, 'No tags found in repository.', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3395. return
  3396. # Basic tag stats
  3397. total_tags = len(data.tags)
  3398. avg_commits_per_tag = (1.0 * data.getTotalCommits() / total_tags) if total_tags else 0.0
  3399. stats = [
  3400. ('Total tags', str(total_tags)),
  3401. ('Average commits per tag', f"{avg_commits_per_tag:.2f}"),
  3402. ]
  3403. for label, value in stats:
  3404. self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3405. self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3406. self.pdf.ln(h=10)
  3407. # Tags table
  3408. if hasattr(data, 'tags') and data.tags:
  3409. self.pdf.set_font('helvetica', 'B', 12)
  3410. self.pdf.cell(0, 10, 'List of Tags', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3411. # Table header
  3412. self.pdf.set_font('helvetica', 'B', 10)
  3413. self.pdf.cell(40, 8, 'Tag', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3414. self.pdf.cell(30, 8, 'Date', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3415. self.pdf.cell(30, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3416. self.pdf.cell(50, 8, 'Author', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3417. # Table data
  3418. self.pdf.set_font('helvetica', '', 9)
  3419. tag_list = sorted(data.tags.items(), key=lambda x: x[1]['date'], reverse=True)
  3420. for tag, tag_data in tag_list[:20]: # Show top 20 tags
  3421. self.pdf.cell(40, 6, tag[:20], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3422. self.pdf.cell(30, 6, tag_data.get('date', 'N/A')[:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3423. self.pdf.cell(30, 6, str(tag_data.get('commits', 0)), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3424. author = tag_data.get('author', 'N/A')[:25]
  3425. self.pdf.cell(50, 6, author, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3426. # Tags table
  3427. self.pdf.set_font('helvetica', 'B', 14)
  3428. self.pdf.cell(0, 10, 'Recent Tags', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3429. # Table header
  3430. self.pdf.set_font('helvetica', 'B', 10)
  3431. self.pdf.cell(40, 8, 'Tag Name', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3432. self.pdf.cell(30, 8, 'Date', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3433. self.pdf.cell(25, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3434. self.pdf.cell(80, 8, 'Top Authors', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3435. # Sort tags by date (most recent first)
  3436. tags_sorted_by_date_desc = list(map(lambda el : el[1],
  3437. reversed(sorted(map(lambda el : (el[1]['date'], el[0]),
  3438. data.tags.items())))))
  3439. # Show up to 20 most recent tags
  3440. self.pdf.set_font('helvetica', '', 8)
  3441. for tag in tags_sorted_by_date_desc[:20]:
  3442. tag_info = data.tags[tag]
  3443. # Get top authors for this tag
  3444. if 'authors' in tag_info:
  3445. authors = sorted(tag_info['authors'].items(),
  3446. key=lambda x: x[1], reverse=True)[:3]
  3447. author_list = ', '.join([f"{author}({commits})" for author, commits in authors])
  3448. else:
  3449. author_list = ''
  3450. # Truncate long names
  3451. display_tag = tag[:18] + "..." if len(tag) > 21 else tag
  3452. display_authors = author_list[:35] + "..." if len(author_list) > 38 else author_list
  3453. self.pdf.cell(40, 6, display_tag, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3454. self.pdf.cell(30, 6, tag_info['date'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3455. self.pdf.cell(25, 6, str(tag_info['commits']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3456. self.pdf.cell(80, 6, display_authors, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3457. def _create_branches_page(self, data):
  3458. """Create the branches statistics page (mirrors branches.html)."""
  3459. self.pdf.add_page()
  3460. self.pdf.set_font('helvetica', 'B', 20)
  3461. self.pdf.cell(0, 15, '8. Branches Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3462. self.pdf.set_font('helvetica', '', 12)
  3463. if not hasattr(data, 'branches') or not data.branches:
  3464. self.pdf.cell(0, 10, 'No branches found in repository.', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3465. return
  3466. # Basic branch stats
  3467. total_branches = len(data.getBranches())
  3468. unmerged_branches = data.getUnmergedBranches()
  3469. total_unmerged = len(unmerged_branches)
  3470. main_branch = data.main_branch if hasattr(data, 'main_branch') else 'N/A'
  3471. stats = [
  3472. ('Total branches', str(total_branches)),
  3473. ('Unmerged branches', str(total_unmerged)),
  3474. ('Main branch', main_branch),
  3475. ]
  3476. for label, value in stats:
  3477. self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3478. self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3479. self.pdf.ln(h=10)
  3480. # Branches summary table
  3481. self.pdf.set_font('helvetica', 'B', 12)
  3482. self.pdf.cell(0, 10, 'All Branches', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3483. # Table header
  3484. self.pdf.set_font('helvetica', 'B', 9)
  3485. self.pdf.cell(35, 8, 'Branch Name', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3486. self.pdf.cell(20, 8, 'Status', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3487. self.pdf.cell(20, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3488. self.pdf.cell(25, 8, 'Lines Added', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3489. self.pdf.cell(25, 8, 'Lines Removed', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3490. self.pdf.cell(20, 8, 'Authors', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3491. self.pdf.cell(45, 8, 'First Author', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
  3492. # Table data - sort by commits descending
  3493. self.pdf.set_font('helvetica', '', 8)
  3494. branches_sorted = sorted(data.branches.items(),
  3495. key=lambda x: x[1].get('commits', 0), reverse=True)
  3496. for branch_name, branch_data in branches_sorted:
  3497. # Determine status
  3498. status = 'Unmerged' if branch_name in [b for b in unmerged_branches] else 'Merged'
  3499. # Get branch statistics
  3500. commits = branch_data.get('commits', 0)
  3501. lines_added = branch_data.get('lines_added', 0)
  3502. lines_removed = branch_data.get('lines_removed', 0)
  3503. authors_count = len(branch_data.get('authors', {}))
  3504. # Get first/main author
  3505. authors = branch_data.get('authors', {})
  3506. if authors:
  3507. first_author = max(authors.items(), key=lambda x: x[1])[0]
  3508. first_author = first_author[:20] + "..." if len(first_author) > 23 else first_author
  3509. else:
  3510. first_author = 'N/A'
  3511. # Truncate branch name if too long
  3512. display_branch = branch_name[:18] + "..." if len(branch_name) > 21 else branch_name
  3513. self.pdf.cell(35, 6, display_branch, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
  3514. self.pdf.cell(20, 6, status, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3515. self.pdf.cell(20, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3516. self.pdf.cell(25, 6, str(lines_added), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3517. self.pdf.cell(25, 6, str(lines_removed), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3518. self.pdf.cell(20, 6, str(authors_count), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
  3519. self.pdf.cell(45, 6, first_author, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3520. # Unmerged branches detail section
  3521. if total_unmerged > 0:
  3522. self.pdf.ln(h=10)
  3523. self.pdf.set_font('helvetica', 'B', 14)
  3524. self.pdf.cell(0, 10, f'Unmerged Branches Details ({total_unmerged})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3525. self.pdf.set_font('helvetica', '', 10)
  3526. for branch_name in unmerged_branches:
  3527. if branch_name in data.branches:
  3528. branch_data = data.branches[branch_name]
  3529. self.pdf.set_font('helvetica', 'B', 10)
  3530. self.pdf.cell(0, 8, f"Branch: {branch_name}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3531. self.pdf.set_font('helvetica', '', 9)
  3532. self.pdf.cell(20, 6, f" Commits: {branch_data.get('commits', 0)}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3533. self.pdf.cell(20, 6, f" Lines: +{branch_data.get('lines_added', 0)} -{branch_data.get('lines_removed', 0)}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3534. # Show authors
  3535. authors = branch_data.get('authors', {})
  3536. if authors:
  3537. author_list = sorted(authors.items(), key=lambda x: x[1], reverse=True)
  3538. author_str = ', '.join([f"{author}({commits})" for author, commits in author_list[:3]])
  3539. if len(author_list) > 3:
  3540. author_str += f" and {len(author_list) - 3} more"
  3541. self.pdf.cell(20, 6, f" Authors: {author_str}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
  3542. self.pdf.ln(h=2)
  3543. def is_git_repository(path):
  3544. """Check if a directory is a valid git repository."""
  3545. if not os.path.exists(path) or not os.path.isdir(path):
  3546. return False
  3547. git_dir = os.path.join(path, '.git')
  3548. return os.path.exists(git_dir)
  3549. def discover_repositories(scan_path):
  3550. """Discover all git repositories in a directory.
  3551. Returns a list of tuples: (repo_name, repo_path)
  3552. where repo_name matches the regex pattern and repo_path is the full path.
  3553. """
  3554. repositories = []
  3555. if not os.path.exists(scan_path) or not os.path.isdir(scan_path):
  3556. return repositories
  3557. try:
  3558. for item in os.listdir(scan_path):
  3559. item_path = os.path.join(scan_path, item)
  3560. if os.path.isdir(item_path) and is_git_repository(item_path):
  3561. # Use directory name as repository name
  3562. repo_name = item
  3563. repositories.append((repo_name, item_path))
  3564. if conf['verbose']:
  3565. print(f' Found repository: {repo_name} at {item_path}')
  3566. except (PermissionError, OSError) as e:
  3567. print(f'Warning: Could not scan directory {scan_path}: {e}')
  3568. return repositories
  3569. def usage():
  3570. print("""
  3571. Usage: gitstats [options] <gitpath..> <outputpath>
  3572. gitstats [options] --multi-repo <scan-folder> <outputpath>
  3573. Options:
  3574. -c key=value Override configuration value
  3575. --debug Enable debug output
  3576. --verbose Enable verbose output
  3577. --multi-repo Scan folder for multiple repositories and generate reports for each
  3578. -h, --help Show this help message
  3579. Note: GitStats always generates both HTML and PDF reports.
  3580. Examples:
  3581. gitstats repo output # Generates both HTML and PDF reports
  3582. gitstats --verbose repo output # With verbose output
  3583. gitstats --multi-repo /path/to/repos output # Generate reports for all repos in folder
  3584. gitstats --debug -c max_authors=50 repo output
  3585. With --multi-repo mode:
  3586. - Scans the specified folder for git repositories
  3587. - Creates a report for each repository in a subfolder named <reponame>_report
  3588. - Only processes directories that are valid git repositories
  3589. Default config values:
  3590. %s
  3591. Please see the manual page for more details.
  3592. """ % conf)
  3593. class GitStats:
  3594. def run(self, args_orig):
  3595. multi_repo_mode = False
  3596. optlist, args = getopt.getopt(args_orig, 'hc:', ["help", "debug", "verbose", "multi-repo"])
  3597. for o,v in optlist:
  3598. if o == '-c':
  3599. if '=' not in v:
  3600. print(f'FATAL: Invalid configuration format. Use key=value: {v}')
  3601. sys.exit(1)
  3602. key, value = v.split('=', 1)
  3603. if key not in conf:
  3604. raise KeyError('no such key "%s" in config' % key)
  3605. # Validate configuration values
  3606. try:
  3607. if isinstance(conf[key], int):
  3608. new_value = int(value)
  3609. if key in ['max_authors', 'max_domains'] and new_value < 1:
  3610. print(f'FATAL: {key} must be a positive integer, got: {new_value}')
  3611. sys.exit(1)
  3612. conf[key] = new_value
  3613. elif isinstance(conf[key], bool):
  3614. conf[key] = value.lower() in ('true', '1', 'yes', 'on')
  3615. else:
  3616. conf[key] = value
  3617. except ValueError as e:
  3618. print(f'FATAL: Invalid value for {key}: {value} ({e})')
  3619. sys.exit(1)
  3620. elif o == '--debug':
  3621. conf['debug'] = True
  3622. conf['verbose'] = True # Debug implies verbose
  3623. elif o == '--verbose':
  3624. conf['verbose'] = True
  3625. elif o == '--multi-repo':
  3626. multi_repo_mode = True
  3627. elif o in ('-h', '--help'):
  3628. usage()
  3629. sys.exit()
  3630. if multi_repo_mode:
  3631. if len(args) != 2:
  3632. print('FATAL: --multi-repo requires exactly two arguments: <scan-folder> <outputpath>')
  3633. usage()
  3634. sys.exit(1)
  3635. scan_folder = os.path.abspath(args[0])
  3636. outputpath = os.path.abspath(args[1])
  3637. # Validate scan folder
  3638. if not os.path.exists(scan_folder):
  3639. print(f'FATAL: Scan folder does not exist: {scan_folder}')
  3640. sys.exit(1)
  3641. if not os.path.isdir(scan_folder):
  3642. print(f'FATAL: Scan folder is not a directory: {scan_folder}')
  3643. sys.exit(1)
  3644. # Discover repositories
  3645. print(f'Scanning folder for git repositories: {scan_folder}')
  3646. repositories = discover_repositories(scan_folder)
  3647. if not repositories:
  3648. print(f'No git repositories found in: {scan_folder}')
  3649. sys.exit(0)
  3650. print(f'Found {len(repositories)} git repositories:')
  3651. for repo_name, repo_path in repositories:
  3652. print(f' - {repo_name}')
  3653. # Generate reports for each repository
  3654. self.run_multi_repo(repositories, outputpath)
  3655. else:
  3656. # Original single/multiple repository mode
  3657. if len(args) < 2:
  3658. usage()
  3659. sys.exit(0)
  3660. self.run_single_mode(args)
  3661. def run_multi_repo(self, repositories, base_outputpath):
  3662. """Generate reports for multiple repositories."""
  3663. rundir = os.getcwd()
  3664. # Validate and create base output directory
  3665. try:
  3666. os.makedirs(base_outputpath, exist_ok=True)
  3667. except PermissionError:
  3668. print(f'FATAL: Permission denied creating output directory: {base_outputpath}')
  3669. sys.exit(1)
  3670. except OSError as e:
  3671. print(f'FATAL: Error creating output directory {base_outputpath}: {e}')
  3672. sys.exit(1)
  3673. if not os.path.isdir(base_outputpath):
  3674. print('FATAL: Output path is not a directory or does not exist')
  3675. sys.exit(1)
  3676. # Check write permissions
  3677. if not os.access(base_outputpath, os.W_OK):
  3678. print(f'FATAL: No write permission for output directory: {base_outputpath}')
  3679. sys.exit(1)
  3680. if not getgnuplotversion():
  3681. print('gnuplot not found')
  3682. sys.exit(1)
  3683. if conf['verbose']:
  3684. print('Configuration:')
  3685. for key, value in conf.items():
  3686. print(f' {key}: {value}')
  3687. print()
  3688. print(f'Base output path: {base_outputpath}')
  3689. successful_reports = 0
  3690. failed_reports = []
  3691. for repo_name, repo_path in repositories:
  3692. print(f'\n{"="*60}')
  3693. print(f'Processing repository: {repo_name}')
  3694. print(f'Repository path: {repo_path}')
  3695. # Create repository-specific output directory with pattern: repositoryname_report
  3696. repo_output_path = os.path.join(base_outputpath, f'{repo_name}_report')
  3697. try:
  3698. os.makedirs(repo_output_path, exist_ok=True)
  3699. print(f'Report output path: {repo_output_path}')
  3700. # Process this repository
  3701. self.process_single_repository(repo_path, repo_output_path, rundir)
  3702. successful_reports += 1
  3703. print(f'✓ Successfully generated report for {repo_name}')
  3704. except Exception as e:
  3705. failed_reports.append((repo_name, str(e)))
  3706. print(f'✗ Failed to generate report for {repo_name}: {e}')
  3707. if conf['debug']:
  3708. import traceback
  3709. traceback.print_exc()
  3710. # Summary
  3711. print(f'\n{"="*60}')
  3712. print(f'Multi-repository report generation complete!')
  3713. print(f'Successfully processed: {successful_reports}/{len(repositories)} repositories')
  3714. if failed_reports:
  3715. print(f'\nFailed repositories:')
  3716. for repo_name, error in failed_reports:
  3717. print(f' - {repo_name}: {error}')
  3718. if successful_reports > 0:
  3719. print(f'\nReports generated in: {base_outputpath}')
  3720. print('Repository reports:')
  3721. for repo_name, repo_path in repositories:
  3722. if (repo_name, f'Error processing {repo_name}') not in failed_reports:
  3723. report_path = os.path.join(base_outputpath, f'{repo_name}_report')
  3724. print(f' - {repo_name}: {report_path}/index.html')
  3725. def run_single_mode(self, args):
  3726. """Original single/multiple repository mode."""
  3727. outputpath = os.path.abspath(args[-1])
  3728. rundir = os.getcwd()
  3729. # Validate git paths
  3730. git_paths = args[0:-1]
  3731. for gitpath in git_paths:
  3732. if not os.path.exists(gitpath):
  3733. print(f'FATAL: Git repository path does not exist: {gitpath}')
  3734. sys.exit(1)
  3735. if not os.path.isdir(gitpath):
  3736. print(f'FATAL: Git repository path is not a directory: {gitpath}')
  3737. sys.exit(1)
  3738. git_dir = os.path.join(gitpath, '.git')
  3739. if not os.path.exists(git_dir):
  3740. print(f'FATAL: Path is not a git repository (no .git directory found): {gitpath}')
  3741. sys.exit(1)
  3742. # Validate and create output directory
  3743. try:
  3744. os.makedirs(outputpath, exist_ok=True)
  3745. except PermissionError:
  3746. print(f'FATAL: Permission denied creating output directory: {outputpath}')
  3747. sys.exit(1)
  3748. except OSError as e:
  3749. print(f'FATAL: Error creating output directory {outputpath}: {e}')
  3750. sys.exit(1)
  3751. if not os.path.isdir(outputpath):
  3752. print('FATAL: Output path is not a directory or does not exist')
  3753. sys.exit(1)
  3754. # Check write permissions
  3755. if not os.access(outputpath, os.W_OK):
  3756. print(f'FATAL: No write permission for output directory: {outputpath}')
  3757. sys.exit(1)
  3758. if not getgnuplotversion():
  3759. print('gnuplot not found')
  3760. sys.exit(1)
  3761. if conf['verbose']:
  3762. print('Configuration:')
  3763. for key, value in conf.items():
  3764. print(f' {key}: {value}')
  3765. print()
  3766. print('Output path: %s' % outputpath)
  3767. cachefile = os.path.join(outputpath, 'gitstats.cache')
  3768. data = GitDataCollector()
  3769. data.loadCache(cachefile)
  3770. for gitpath in git_paths:
  3771. print('Git path: %s' % gitpath)
  3772. prevdir = os.getcwd()
  3773. os.chdir(gitpath)
  3774. print('Collecting data...')
  3775. data.collect(gitpath)
  3776. os.chdir(prevdir)
  3777. print('Refining data...')
  3778. data.saveCache(cachefile)
  3779. data.refine()
  3780. os.chdir(rundir)
  3781. print('Generating report...')
  3782. # Always generate both HTML and PDF reports
  3783. print('Creating HTML report...')
  3784. html_report = HTMLReportCreator()
  3785. html_report.create(data, outputpath)
  3786. print('Creating PDF report...')
  3787. pdf_report = PDFReportCreator()
  3788. pdf_report.create(data, outputpath)
  3789. time_end = time.time()
  3790. exectime_internal = time_end - time_start
  3791. external_percentage = (100.0 * exectime_external) / exectime_internal if exectime_internal > 0 else 0.0
  3792. print('Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, external_percentage))
  3793. if sys.stdin.isatty():
  3794. print('You may now run:')
  3795. print()
  3796. print(' sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''"))
  3797. pdf_filename = f"gitstats_{data.projectname.replace(' ', '_')}.pdf"
  3798. print(' PDF report: \'%s\'' % os.path.join(outputpath, pdf_filename).replace("'", "'\\''"))
  3799. print()
  3800. def process_single_repository(self, repo_path, output_path, rundir):
  3801. """Process a single repository and generate its report."""
  3802. cachefile = os.path.join(output_path, 'gitstats.cache')
  3803. data = GitDataCollector()
  3804. data.loadCache(cachefile)
  3805. print(f' Collecting data from: {repo_path}')
  3806. prevdir = os.getcwd()
  3807. os.chdir(repo_path)
  3808. data.collect(repo_path)
  3809. os.chdir(prevdir)
  3810. print(' Refining data...')
  3811. data.saveCache(cachefile)
  3812. data.refine()
  3813. os.chdir(rundir)
  3814. print(' Generating report...')
  3815. # Always generate both HTML and PDF reports
  3816. print(' Creating HTML report...')
  3817. html_report = HTMLReportCreator()
  3818. html_report.create(data, output_path)
  3819. print(' Creating PDF report...')
  3820. pdf_report = PDFReportCreator()
  3821. pdf_report.create(data, output_path)
  3822. print(f' Report generated in: {output_path}')
  3823. if __name__=='__main__':
  3824. try:
  3825. g = GitStats()
  3826. g.run(sys.argv[1:])
  3827. except KeyboardInterrupt:
  3828. print('\nInterrupted by user')
  3829. sys.exit(1)
  3830. except KeyError as e:
  3831. print(f'FATAL: Configuration error: {e}')
  3832. sys.exit(1)
  3833. except Exception as e:
  3834. print(f'FATAL: Unexpected error: {e}')
  3835. if conf.get('debug', False):
  3836. import traceback
  3837. traceback.print_exc()
  3838. sys.exit(1)