| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589 |
- import datetime
- import getopt
- import glob
- import os
- import pickle
- import platform
- import re
- import shutil
- import subprocess
- import sys
- import time
- import zlib
- from collections import defaultdict
- from fpdf import FPDF
- from fpdf.enums import XPos, YPos
-
- if sys.version_info < (3, 6):
- print("Python 3.6 or higher is required for gitstats", file=sys.stderr)
- sys.exit(1)
-
- from multiprocessing import Pool
-
- os.environ['LC_ALL'] = 'C'
-
- GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
- ON_LINUX = (platform.system() == 'Linux')
- WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
-
- exectime_internal = 0.0
- exectime_external = 0.0
- time_start = time.time()
-
- # By default, gnuplot is searched from path, but can be overridden with the
- # environment variable "GNUPLOT"
- gnuplot_cmd = 'gnuplot'
- if 'GNUPLOT' in os.environ:
- gnuplot_cmd = os.environ['GNUPLOT']
-
- conf = {
- 'max_domains': 10,
- 'max_ext_length': 10,
- 'style': 'gitstats.css',
- 'max_authors': 20,
- 'authors_top': 5,
- 'commit_begin': '',
- 'commit_end': 'HEAD',
- 'linear_linestats': 1,
- 'project_name': '',
- 'processes': 8,
- 'start_date': '',
- 'debug': False,
- 'verbose': False
- }
-
- def getpipeoutput(cmds, quiet = False):
- global exectime_external
- start = time.time()
-
- # Basic input validation to prevent command injection
- for cmd in cmds:
- if not isinstance(cmd, str):
- raise TypeError("Commands must be strings")
- # Check for obvious command injection attempts
- if any(dangerous in cmd for dangerous in [';', '&&', '||', '`', '$(']):
- print(f'Warning: Potentially dangerous command detected: {cmd}')
-
- if (not quiet and ON_LINUX and os.isatty(1)) or conf['verbose']:
- print('>> ' + ' | '.join(cmds), end='')
- sys.stdout.flush()
- p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
- processes=[p]
- for x in cmds[1:]:
- p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True)
- processes.append(p)
- output = p.communicate()[0]
- for p in processes:
- p.wait()
- end = time.time()
- if not quiet or conf['verbose'] or conf['debug']:
- if ON_LINUX and os.isatty(1):
- print('\r', end='')
- print('[%.5f] >> %s' % (end - start, ' | '.join(cmds)))
- if conf['debug']:
- print(f'DEBUG: Command output ({len(output)} bytes): {output[:200].decode("utf-8", errors="replace")}...')
- exectime_external += (end - start)
- return output.decode('utf-8', errors='replace').rstrip('\n')
-
- def getlogrange(defaultrange = 'HEAD', end_only = True):
- commit_range = getcommitrange(defaultrange, end_only)
- if len(conf['start_date']) > 0:
- return '--since="%s" "%s"' % (conf['start_date'], commit_range)
- return commit_range
-
- def getcommitrange(defaultrange = 'HEAD', end_only = False):
- if len(conf['commit_end']) > 0:
- if end_only or len(conf['commit_begin']) == 0:
- return conf['commit_end']
- return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
- return defaultrange
-
- def getkeyssortedbyvalues(dict):
- return list(map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items()))))
-
- # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
- def getkeyssortedbyvaluekey(d, key):
- return list(map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys()))))
-
- def getstatsummarycounts(line):
- numbers = re.findall(r'\d+', line)
- if len(numbers) == 1:
- # neither insertions nor deletions: may probably only happen for "0 files changed"
- numbers.append(0);
- numbers.append(0);
- elif len(numbers) == 2 and line.find('(+)') != -1:
- numbers.append(0); # only insertions were printed on line
- elif len(numbers) == 2 and line.find('(-)') != -1:
- numbers.insert(1, 0); # only deletions were printed on line
- return numbers
-
- VERSION = 0
- def getversion():
- global VERSION
- if VERSION == 0:
- gitstats_repo = os.path.dirname(os.path.abspath(__file__))
- VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" %
- (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])])
- return VERSION
-
- def getgitversion():
- return getpipeoutput(['git --version']).split('\n')[0]
-
- def getgnuplotversion():
- return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
-
- def getnumoffilesfromrev(time_rev):
- """
- Get number of files changed in commit
- """
- time, rev = time_rev
- return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))
-
- def getnumoflinesinblob(ext_blob):
- """
- Get number of lines in blob
- """
- ext, blob_id = ext_blob
- return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0]))
-
- def analyzesloc(ext_blob):
- """
- Analyze source lines of code vs comments vs blank lines in a blob
- Returns (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines)
- """
- ext, blob_id = ext_blob
- content = getpipeoutput(['git cat-file blob %s' % blob_id])
-
- total_lines = 0
- source_lines = 0
- comment_lines = 0
- blank_lines = 0
-
- # Define comment patterns for different file types
- comment_patterns = {
- '.py': [r'^\s*#', r'^\s*"""', r'^\s*\'\'\''],
- '.js': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
- '.ts': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
- '.java': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
- '.cpp': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
- '.c': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
- '.h': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
- '.css': [r'^\s*/\*', r'^\s*\*'],
- '.html': [r'^\s*<!--', r'^\s*<!\-\-'],
- '.xml': [r'^\s*<!--', r'^\s*<!\-\-'],
- '.sh': [r'^\s*#'],
- '.rb': [r'^\s*#'],
- '.pl': [r'^\s*#'],
- '.php': [r'^\s*//', r'^\s*/\*', r'^\s*\*', r'^\s*#'],
- }
-
- import re
- patterns = comment_patterns.get(ext, [])
-
- for line in content.split('\n'):
- total_lines += 1
- line_stripped = line.strip()
-
- if not line_stripped:
- blank_lines += 1
- elif any(re.match(pattern, line) for pattern in patterns):
- comment_lines += 1
- else:
- source_lines += 1
-
- return (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines)
-
- class DataCollector:
- """Manages data collection from a revision control repository."""
- def __init__(self):
- self.stamp_created = time.time()
- self.cache = {}
- self.total_authors = 0
- self.activity_by_hour_of_day = defaultdict(int) # hour -> commits
- self.activity_by_day_of_week = defaultdict(int) # day -> commits
- self.activity_by_month_of_year = defaultdict(int) # month [1-12] -> commits
- self.activity_by_hour_of_week = defaultdict(lambda: defaultdict(int)) # weekday -> hour -> commits
- self.activity_by_hour_of_day_busiest = 0
- self.activity_by_hour_of_week_busiest = 0
- self.activity_by_year_week = defaultdict(int) # yy_wNN -> commits
- self.activity_by_year_week_peak = 0
-
- self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
-
- self.total_commits = 0
- self.total_files = 0
- self.authors_by_commits = 0
-
- # domains
- self.domains = defaultdict(lambda: defaultdict(int)) # domain -> commits
-
- # author of the month
- self.author_of_month = defaultdict(lambda: defaultdict(int)) # month -> author -> commits
- self.author_of_year = defaultdict(lambda: defaultdict(int)) # year -> author -> commits
- self.commits_by_month = defaultdict(int) # month -> commits
- self.commits_by_year = defaultdict(int) # year -> commits
- self.lines_added_by_month = defaultdict(int) # month -> lines added
- self.lines_added_by_year = defaultdict(int) # year -> lines added
- self.lines_removed_by_month = defaultdict(int) # month -> lines removed
- self.lines_removed_by_year = defaultdict(int) # year -> lines removed
- self.first_commit_stamp = 0
- self.last_commit_stamp = 0
- self.last_active_day = None
- self.active_days = set()
-
- # lines
- self.total_lines = 0
- self.total_lines_added = 0
- self.total_lines_removed = 0
-
- # SLOC (Source Lines of Code) analysis
- self.total_source_lines = 0
- self.total_comment_lines = 0
- self.total_blank_lines = 0
- self.sloc_by_extension = {} # ext -> {'source': 0, 'comments': 0, 'blank': 0, 'total': 0}
-
- # File size and revision tracking
- self.file_sizes = {} # filepath -> size in bytes
- self.file_revisions = {} # filepath -> revision count
-
- # Directory activity tracking
- self.directories = defaultdict(lambda: {'commits': 0, 'lines_added': 0, 'lines_removed': 0, 'files': set()})
- self.directory_revisions = defaultdict(int) # directory -> total file revisions in directory
-
- # size
- self.total_size = 0
-
- # timezone
- self.commits_by_timezone = defaultdict(int) # timezone -> commits
-
- # tags
- self.tags = {}
-
- self.files_by_stamp = {} # stamp -> files
-
- # extensions
- self.extensions = {} # extension -> files, lines
-
- # line statistics
- self.changes_by_date = {} # stamp -> { files, ins, del }
-
- # Pace of Changes tracking (number of line changes happening over time)
- self.pace_of_changes = {} # stamp -> total_line_changes (ins + del)
-
- # Last 30 days activity
- self.last_30_days_commits = 0
- self.last_30_days_lines_added = 0
- self.last_30_days_lines_removed = 0
-
- # Last 12 months activity
- self.last_12_months_commits = defaultdict(int) # month -> commits
- self.last_12_months_lines_added = defaultdict(int) # month -> lines added
- self.last_12_months_lines_removed = defaultdict(int) # month -> lines removed
-
- # Repository size tracking
- self.repository_size_mb = 0.0
-
- # Branch analysis
- self.branches = {} # branch_name -> {'commits': 0, 'lines_added': 0, 'lines_removed': 0, 'authors': {}, 'is_merged': True, 'merge_base': '', 'unique_commits': []}
- self.unmerged_branches = [] # list of branch names that are not merged into main branch
- self.main_branch = 'master' # will be detected automatically
-
- # Team collaboration analysis
- self.author_collaboration = {} # author -> {'worked_with': {other_author: shared_files}, 'file_ownership': {file: change_count}}
- self.commit_patterns = {} # author -> {'avg_commit_size': lines, 'small_commits': count, 'large_commits': count, 'commit_frequency': commits_per_day}
- self.working_patterns = {} # author -> {'night_commits': count, 'weekend_commits': count, 'peak_hours': [hours], 'timezone_pattern': {tz: count}}
- self.impact_analysis = {} # author -> {'critical_files': [files], 'impact_score': score, 'bug_potential': score}
- self.team_performance = {} # author -> {'efficiency_score': score, 'consistency': score, 'leadership_score': score}
-
- # File importance tracking
- self.critical_files = set() # Files that are likely critical (main.py, app.py, index.html, etc.)
- self.file_impact_scores = {} # file -> impact_score based on how often it's changed and by whom
-
- # Time-based analysis
- self.commits_by_time_of_day = defaultdict(lambda: defaultdict(int)) # author -> hour -> commits
- self.commits_by_day_of_week = defaultdict(lambda: defaultdict(int)) # author -> day -> commits
- self.author_active_periods = {} # author -> {'active_days': set, 'longest_streak': days, 'avg_gap': days}
-
- # Quality indicators
- self.potential_bug_commits = [] # List of commits that might indicate bugs (reverts, fixes, etc.)
- self.refactoring_commits = [] # List of commits that appear to be refactoring
- self.feature_commits = [] # List of commits that appear to add features
-
- ##
- # This should be the main function to extract data from the repository.
- def collect(self, dir):
- self.dir = dir
- if len(conf['project_name']) == 0:
- self.projectname = os.path.basename(os.path.abspath(dir))
- else:
- self.projectname = conf['project_name']
-
- ##
- # Load cacheable data
- def loadCache(self, cachefile):
- if not os.path.exists(cachefile):
- return
- print('Loading cache...')
- try:
- with open(cachefile, 'rb') as f:
- try:
- self.cache = pickle.loads(zlib.decompress(f.read()))
- except (zlib.error, pickle.PickleError) as e:
- # temporary hack to upgrade non-compressed caches
- try:
- f.seek(0)
- self.cache = pickle.load(f)
- except (pickle.PickleError, EOFError) as e2:
- print(f'Warning: Failed to load cache file {cachefile}: {e2}')
- self.cache = {}
- except Exception as e:
- print(f'Warning: Unexpected error loading cache file {cachefile}: {e}')
- self.cache = {}
- except IOError as e:
- print(f'Warning: Could not open cache file {cachefile}: {e}')
- self.cache = {}
-
- ##
- # Produce any additional statistics from the extracted data.
- def refine(self):
- pass
-
- ##
- # : get a dictionary of author
- def getAuthorInfo(self, author):
- return None
-
- def getActivityByDayOfWeek(self):
- return {}
-
- def getActivityByHourOfDay(self):
- return {}
-
- # : get a dictionary of domains
- def getDomainInfo(self, domain):
- return None
-
- ##
- # Get a list of authors
- def getAuthors(self):
- return []
-
- def getFirstCommitDate(self):
- return datetime.datetime.now()
-
- def getLastCommitDate(self):
- return datetime.datetime.now()
-
- def getStampCreated(self):
- return self.stamp_created
-
- def getTags(self):
- return []
-
- def getTotalAuthors(self):
- return -1
-
- def getTotalCommits(self):
- return -1
-
- def getTotalFiles(self):
- return -1
-
- def getTotalLOC(self):
- return -1
-
- ##
- # Save cacheable data
- def saveCache(self, cachefile):
- print('Saving cache...')
- tempfile = cachefile + '.tmp'
- try:
- with open(tempfile, 'wb') as f:
- #pickle.dump(self.cache, f)
- data = zlib.compress(pickle.dumps(self.cache))
- f.write(data)
- try:
- os.remove(cachefile)
- except OSError:
- pass
- os.rename(tempfile, cachefile)
- except IOError as e:
- print(f'Warning: Could not save cache file {cachefile}: {e}')
- # Clean up temp file if it exists
- try:
- os.remove(tempfile)
- except OSError:
- pass
-
- class GitDataCollector(DataCollector):
- def collect(self, dir):
- DataCollector.collect(self, dir)
-
- self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
- #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
-
- # Clear tags for each repository to avoid multirepo contamination
- if not hasattr(self, '_first_repo'):
- self._first_repo = True
- else:
- # For subsequent repos, clear tags to avoid mixing
- self.tags = {}
-
- # tags
- lines = getpipeoutput(['git show-ref --tags']).split('\n')
- for line in lines:
- if len(line) == 0:
- continue
- (hash, tag) = line.split(' ')
-
- tag = tag.replace('refs/tags/', '')
- output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
- if len(output) > 0:
- parts = output.split(' ')
- stamp = 0
- try:
- stamp = int(parts[0])
- except ValueError:
- stamp = 0
- self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
-
- # collect info on tags, starting from latest
- tags_sorted_by_date_desc = list(map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items())))))
- prev = None
- for tag in reversed(tags_sorted_by_date_desc):
- cmd = 'git shortlog -s "%s"' % tag
- if prev != None:
- cmd += ' "^%s"' % prev
- output = getpipeoutput([cmd])
- if len(output) == 0:
- continue
- prev = tag
- for line in output.split('\n'):
- parts = re.split(r'\s+', line, maxsplit=2)
- commits = int(parts[1])
- author = parts[2]
- self.tags[tag]['commits'] += commits
- self.tags[tag]['authors'][author] = commits
-
- # Collect revision statistics
- # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
- lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
- for line in lines:
- parts = line.split(' ', 4)
- author = ''
- try:
- stamp = int(parts[0])
- except ValueError:
- stamp = 0
- timezone = parts[3]
- author, mail = parts[4].split('<', 1)
- author = author.rstrip()
- mail = mail.rstrip('>')
- domain = '?'
- if mail.find('@') != -1:
- domain = mail.rsplit('@', 1)[1]
- date = datetime.datetime.fromtimestamp(float(stamp))
-
- # First and last commit stamp (may be in any order because of cherry-picking and patches)
- if stamp > self.last_commit_stamp:
- self.last_commit_stamp = stamp
- if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
- self.first_commit_stamp = stamp
-
- # activity
- # hour
- hour = date.hour
- self.activity_by_hour_of_day[hour] += 1
- # most active hour?
- if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
- self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
-
- # day of week
- day = date.weekday()
- self.activity_by_day_of_week[day] += 1
-
- # domain stats
- if domain not in self.domains:
- self.domains[domain] = defaultdict(int)
- # commits
- self.domains[domain]['commits'] += 1
-
- # hour of week
- self.activity_by_hour_of_week[day][hour] += 1
- # most active hour?
- if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
- self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
-
- # month of year
- month = date.month
- self.activity_by_month_of_year[month] += 1
-
- # yearly/weekly activity
- yyw = date.strftime('%Y-%W')
- self.activity_by_year_week[yyw] += 1
- if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
- self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
-
- # author stats
- if author not in self.authors:
- self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
- # commits, note again that commits may be in any date order because of cherry-picking and patches
- if 'last_commit_stamp' not in self.authors[author]:
- self.authors[author]['last_commit_stamp'] = stamp
- if stamp > self.authors[author]['last_commit_stamp']:
- self.authors[author]['last_commit_stamp'] = stamp
- if 'first_commit_stamp' not in self.authors[author]:
- self.authors[author]['first_commit_stamp'] = stamp
- if stamp < self.authors[author]['first_commit_stamp']:
- self.authors[author]['first_commit_stamp'] = stamp
-
- # author of the month/year
- yymm = date.strftime('%Y-%m')
- self.author_of_month[yymm][author] += 1
- self.commits_by_month[yymm] += 1
-
- yy = date.year
- self.author_of_year[yy][author] += 1
- self.commits_by_year[yy] += 1
-
- # authors: active days
- yymmdd = date.strftime('%Y-%m-%d')
- if 'last_active_day' not in self.authors[author]:
- self.authors[author]['last_active_day'] = yymmdd
- self.authors[author]['active_days'] = set([yymmdd])
- elif yymmdd != self.authors[author]['last_active_day']:
- self.authors[author]['last_active_day'] = yymmdd
- self.authors[author]['active_days'].add(yymmdd)
-
- # project: active days
- if yymmdd != self.last_active_day:
- self.last_active_day = yymmdd
- self.active_days.add(yymmdd)
-
- # timezone
- self.commits_by_timezone[timezone] += 1
-
- # outputs "<stamp> <files>" for each revision
- revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
- lines = []
- revs_to_read = []
- time_rev_count = []
- #Look up rev in cache and take info from cache if found
- #If not append rev to list of rev to read from repo
- for revline in revlines:
- time, rev = revline.split(' ')
- #if cache empty then add time and rev to list of new rev's
- #otherwise try to read needed info from cache
- if 'files_in_tree' not in self.cache:
- revs_to_read.append((time,rev))
- continue
- if rev in self.cache['files_in_tree']:
- lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
- else:
- revs_to_read.append((time,rev))
-
- #Read revisions from repo
- pool = Pool(processes=conf['processes'])
- time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
- pool.terminate()
- pool.join()
-
- #Update cache with new revisions and append then to general list
- for (time, rev, count) in time_rev_count:
- if 'files_in_tree' not in self.cache:
- self.cache['files_in_tree'] = {}
- self.cache['files_in_tree'][rev] = count
- lines.append('%d %d' % (int(time), count))
-
- self.total_commits += len(lines)
- for line in lines:
- parts = line.split(' ')
- if len(parts) != 2:
- continue
- (stamp, files) = parts[0:2]
- try:
- self.files_by_stamp[int(stamp)] = int(files)
- except ValueError:
- print('Warning: failed to parse line "%s"' % line)
-
- # extensions and size of files
- lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
- blobs_to_read = []
- for line in lines:
- if len(line) == 0:
- continue
- parts = re.split(r'\s+', line, maxsplit=4)
- if parts[0] == '160000' and parts[3] == '-':
- # skip submodules
- continue
- blob_id = parts[2]
- size = int(parts[3])
- fullpath = parts[4]
-
- self.total_size += size
- self.total_files += 1
-
- # Track individual file sizes
- self.file_sizes[fullpath] = size
-
- filename = fullpath.split('/')[-1] # strip directories
- if filename.find('.') == -1 or filename.rfind('.') == 0:
- ext = ''
- else:
- ext = filename[(filename.rfind('.') + 1):]
- if len(ext) > conf['max_ext_length']:
- ext = ''
- if ext not in self.extensions:
- self.extensions[ext] = {'files': 0, 'lines': 0}
- self.extensions[ext]['files'] += 1
- #if cache empty then add ext and blob id to list of new blob's
- #otherwise try to read needed info from cache
- if 'lines_in_blob' not in self.cache:
- blobs_to_read.append((ext,blob_id))
- continue
- if blob_id in self.cache['lines_in_blob']:
- self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
- else:
- blobs_to_read.append((ext,blob_id))
-
- #Get info abount line count for new blob's that wasn't found in cache
- pool = Pool(processes=conf['processes'])
- ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
- pool.terminate()
- pool.join()
-
- # Also get SLOC analysis for the same blobs
- pool = Pool(processes=conf['processes'])
- ext_blob_sloc = pool.map(analyzesloc, blobs_to_read)
- pool.terminate()
- pool.join()
-
- #Update cache and write down info about number of number of lines
- for (ext, blob_id, linecount) in ext_blob_linecount:
- if 'lines_in_blob' not in self.cache:
- self.cache['lines_in_blob'] = {}
- self.cache['lines_in_blob'][blob_id] = linecount
- self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
-
- # Update SLOC statistics
- for (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines) in ext_blob_sloc:
- # Initialize extension SLOC tracking
- if ext not in self.sloc_by_extension:
- self.sloc_by_extension[ext] = {'source': 0, 'comments': 0, 'blank': 0, 'total': 0}
-
- # Update extension SLOC counts
- self.sloc_by_extension[ext]['source'] += source_lines
- self.sloc_by_extension[ext]['comments'] += comment_lines
- self.sloc_by_extension[ext]['blank'] += blank_lines
- self.sloc_by_extension[ext]['total'] += total_lines
-
- # Update global SLOC counts
- self.total_source_lines += source_lines
- self.total_comment_lines += comment_lines
- self.total_blank_lines += blank_lines
-
- # File revision counting
- print('Collecting file revision statistics...')
- revision_lines = getpipeoutput(['git log --name-only --pretty=format: %s' % getlogrange('HEAD')]).strip().split('\n')
- for line in revision_lines:
- line = line.strip()
- if len(line) > 0 and not line.startswith('commit'):
- # This is a filename
- if line not in self.file_revisions:
- self.file_revisions[line] = 0
- self.file_revisions[line] += 1
-
- # Track directory activity
- directory = os.path.dirname(line) if os.path.dirname(line) else '.'
- self.directory_revisions[directory] += 1
- self.directories[directory]['files'].add(line)
-
- # Directory activity analysis
- print('Collecting directory activity statistics...')
- numstat_lines = getpipeoutput(['git log --numstat --pretty=format:"%%at %%aN" %s' % getlogrange('HEAD')]).split('\n')
- current_author = None
- current_timestamp = None
-
- for line in numstat_lines:
- line = line.strip()
- if not line:
- continue
-
- # Check if this is a commit header line (timestamp + author)
- if line.count('\t') == 0 and ' ' in line:
- try:
- parts = line.split(' ', 1)
- current_timestamp = int(parts[0])
- current_author = parts[1]
- continue
- except (ValueError, IndexError):
- pass
-
- # Check if this is a numstat line (additions\tdeletions\tfilename)
- if line.count('\t') >= 2:
- parts = line.split('\t')
- if len(parts) >= 3:
- try:
- additions = int(parts[0]) if parts[0] != '-' else 0
- deletions = int(parts[1]) if parts[1] != '-' else 0
- filename = '\t'.join(parts[2:]) # Handle filenames with tabs
-
- # Track directory activity
- directory = os.path.dirname(filename) if os.path.dirname(filename) else '.'
- self.directories[directory]['commits'] += 1 # Will be deduplicated later
- self.directories[directory]['lines_added'] += additions
- self.directories[directory]['lines_removed'] += deletions
- self.directories[directory]['files'].add(filename)
- except ValueError:
- pass
-
- # line statistics
- # outputs:
- # N files changed, N insertions (+), N deletions(-)
- # <stamp> <author>
- self.changes_by_date = {} # stamp -> { files, ins, del }
- # computation of lines of code by date is better done
- # on a linear history.
- extra = ''
- if conf['linear_linestats']:
- extra = '--first-parent -m'
- lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
- lines.reverse()
- files = 0; inserted = 0; deleted = 0; total_lines = 0
- author = None
- for line in lines:
- if len(line) == 0:
- continue
-
- # <stamp> <author>
- if re.search('files? changed', line) == None:
- pos = line.find(' ')
- if pos != -1:
- try:
- (stamp, author) = (int(line[:pos]), line[pos+1:])
- self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
-
- # Track pace of changes (total line changes)
- self.pace_of_changes[stamp] = inserted + deleted
-
- date = datetime.datetime.fromtimestamp(stamp)
-
- # Track last 30 days activity
- import time as time_mod
- now = time_mod.time()
- if now - stamp <= 30 * 24 * 3600: # 30 days in seconds
- self.last_30_days_commits += 1
- self.last_30_days_lines_added += inserted
- self.last_30_days_lines_removed += deleted
-
- # Track last 12 months activity
- if now - stamp <= 365 * 24 * 3600: # 12 months in seconds
- yymm = date.strftime('%Y-%m')
- self.last_12_months_commits[yymm] += 1
- self.last_12_months_lines_added[yymm] += inserted
- self.last_12_months_lines_removed[yymm] += deleted
-
- yymm = date.strftime('%Y-%m')
- self.lines_added_by_month[yymm] += inserted
- self.lines_removed_by_month[yymm] += deleted
-
- yy = date.year
- self.lines_added_by_year[yy] += inserted
- self.lines_removed_by_year[yy] += deleted
-
- files, inserted, deleted = 0, 0, 0
- except ValueError:
- print('Warning: unexpected line "%s"' % line)
- else:
- print('Warning: unexpected line "%s"' % line)
- else:
- numbers = getstatsummarycounts(line)
-
- if len(numbers) == 3:
- (files, inserted, deleted) = list(map(lambda el : int(el), numbers))
- total_lines += inserted
- total_lines -= deleted
- self.total_lines_added += inserted
- self.total_lines_removed += deleted
-
- else:
- print('Warning: failed to handle line "%s"' % line)
- (files, inserted, deleted) = (0, 0, 0)
- #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
- self.total_lines += total_lines
-
- # Per-author statistics
-
- # defined for stamp, author only if author commited at this timestamp.
- self.changes_by_date_by_author = {} # stamp -> author -> lines_added
-
- # Similar to the above, but never use --first-parent
- # (we need to walk through every commit to know who
- # committed what, not just through mainline)
- lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
- lines.reverse()
- files = 0; inserted = 0; deleted = 0
- author = None
- stamp = 0
- for line in lines:
- if len(line) == 0:
- continue
-
- # <stamp> <author>
- if re.search('files? changed', line) == None:
- pos = line.find(' ')
- if pos != -1:
- try:
- oldstamp = stamp
- (stamp, author) = (int(line[:pos]), line[pos+1:])
- if oldstamp > stamp:
- # clock skew, keep old timestamp to avoid having ugly graph
- stamp = oldstamp
- if author not in self.authors:
- self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
- self.authors[author]['commits'] += 1
- self.authors[author]['lines_added'] += inserted
- self.authors[author]['lines_removed'] += deleted
- if stamp not in self.changes_by_date_by_author:
- self.changes_by_date_by_author[stamp] = {}
- if author not in self.changes_by_date_by_author[stamp]:
- self.changes_by_date_by_author[stamp][author] = {}
- self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
- self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
- files, inserted, deleted = 0, 0, 0
- except ValueError:
- print('Warning: unexpected line "%s"' % line)
- else:
- print('Warning: unexpected line "%s"' % line)
- else:
- numbers = getstatsummarycounts(line);
-
- if len(numbers) == 3:
- (files, inserted, deleted) = list(map(lambda el : int(el), numbers))
- else:
- print('Warning: failed to handle line "%s"' % line)
- (files, inserted, deleted) = (0, 0, 0)
-
- # Branch analysis - collect unmerged branches and per-branch statistics
- if conf['verbose']:
- print('Analyzing branches and detecting unmerged branches...')
- self._analyzeBranches()
-
- # Calculate repository size (this is slow as noted in TODO)
- if conf['verbose']:
- print('Calculating repository size...')
- try:
- # Get .git directory size
- git_dir_size = getpipeoutput(['du -sm .git']).split()[0]
- self.repository_size_mb = float(git_dir_size)
- if conf['verbose']:
- print(f'Repository size: {self.repository_size_mb:.1f} MB')
- except (ValueError, IndexError):
- print('Warning: Could not calculate repository size')
- self.repository_size_mb = 0.0
-
- # Perform advanced team analysis
- self._analyzeTeamCollaboration()
- self._analyzeCommitPatterns()
- self._analyzeWorkingPatterns()
- self._analyzeImpactAndQuality()
- self._calculateTeamPerformanceMetrics()
-
- def _detectMainBranch(self):
- """Detect the main branch (master, main, develop, etc.)"""
- # Try common main branch names in order of preference
- main_branch_candidates = ['master', 'main', 'develop', 'development']
-
- # Get all local branches
- branches_output = getpipeoutput(['git branch'])
- local_branches = [line.strip().lstrip('* ') for line in branches_output.split('\n') if line.strip()]
-
- # Check if any of the common main branches exist
- for candidate in main_branch_candidates:
- if candidate in local_branches:
- self.main_branch = candidate
- return candidate
-
- # If none found, use the first branch or fall back to 'master'
- if local_branches:
- self.main_branch = local_branches[0]
- return local_branches[0]
-
- # Fall back to master
- self.main_branch = 'master'
- return 'master'
-
- def _analyzeBranches(self):
- """Analyze all branches and detect unmerged ones"""
- try:
- # Detect main branch
- main_branch = self._detectMainBranch()
- if conf['verbose']:
- print(f'Detected main branch: {main_branch}')
-
- # Get all local branches
- branches_output = getpipeoutput(['git branch'])
- all_branches = [line.strip().lstrip('* ') for line in branches_output.split('\n') if line.strip()]
-
- # Get unmerged branches (branches not merged into main)
- try:
- unmerged_output = getpipeoutput([f'git branch --no-merged {main_branch}'])
- self.unmerged_branches = [line.strip().lstrip('* ') for line in unmerged_output.split('\n')
- if line.strip() and not line.strip().startswith('*')]
- except:
- # If main branch doesn't exist or command fails, assume all branches are unmerged
- self.unmerged_branches = [b for b in all_branches if b != main_branch]
-
- if conf['verbose']:
- print(f'Found {len(self.unmerged_branches)} unmerged branches: {", ".join(self.unmerged_branches)}')
-
- # Analyze each branch
- for branch in all_branches:
- if conf['verbose']:
- print(f'Analyzing branch: {branch}')
- self._analyzeBranch(branch, main_branch)
-
- except Exception as e:
- if conf['verbose'] or conf['debug']:
- print(f'Warning: Branch analysis failed: {e}')
- # Initialize empty structures if analysis fails
- self.unmerged_branches = []
- self.branches = {}
-
- def _analyzeBranch(self, branch_name, main_branch):
- """Analyze a single branch for commits, authors, and line changes"""
- try:
- # Initialize branch data
- self.branches[branch_name] = {
- 'commits': 0,
- 'lines_added': 0,
- 'lines_removed': 0,
- 'authors': {},
- 'is_merged': branch_name not in self.unmerged_branches,
- 'merge_base': '',
- 'unique_commits': []
- }
-
- # Get merge base with main branch
- try:
- merge_base = getpipeoutput([f'git merge-base {branch_name} {main_branch}']).strip()
- self.branches[branch_name]['merge_base'] = merge_base
- except:
- self.branches[branch_name]['merge_base'] = ''
-
- # Get commits unique to this branch (not in main branch)
- if branch_name != main_branch:
- try:
- # Get commits that are in branch but not in main
- unique_commits_output = getpipeoutput([f'git rev-list {branch_name} ^{main_branch}'])
- unique_commits = [line.strip() for line in unique_commits_output.split('\n') if line.strip()]
- self.branches[branch_name]['unique_commits'] = unique_commits
-
- # Analyze each unique commit
- for commit in unique_commits:
- self._analyzeBranchCommit(branch_name, commit)
-
- except:
- # If command fails, analyze all commits in the branch
- try:
- all_commits_output = getpipeoutput([f'git rev-list {branch_name}'])
- all_commits = [line.strip() for line in all_commits_output.split('\n') if line.strip()]
- self.branches[branch_name]['unique_commits'] = all_commits[:50] # Limit to avoid too much data
-
- for commit in all_commits[:50]:
- self._analyzeBranchCommit(branch_name, commit)
- except:
- pass
- else:
- # For main branch, count all commits
- try:
- all_commits_output = getpipeoutput([f'git rev-list {branch_name}'])
- all_commits = [line.strip() for line in all_commits_output.split('\n') if line.strip()]
- self.branches[branch_name]['commits'] = len(all_commits)
- self.branches[branch_name]['unique_commits'] = all_commits[:100] # Limit for performance
- except:
- pass
-
- except Exception as e:
- if conf['debug']:
- print(f'Warning: Failed to analyze branch {branch_name}: {e}')
-
- def _analyzeBranchCommit(self, branch_name, commit_hash):
- """Analyze a single commit for branch statistics"""
- try:
- # Get commit author and timestamp
- commit_info = getpipeoutput([f'git log -1 --pretty=format:"%aN %at" {commit_hash}'])
- if not commit_info:
- return
-
- parts = commit_info.rsplit(' ', 1)
- if len(parts) != 2:
- return
-
- author = parts[0]
- try:
- timestamp = int(parts[1])
- except ValueError:
- return
-
- # Update branch commit count
- self.branches[branch_name]['commits'] += 1
-
- # Update author statistics for this branch
- if author not in self.branches[branch_name]['authors']:
- self.branches[branch_name]['authors'][author] = {
- 'commits': 0,
- 'lines_added': 0,
- 'lines_removed': 0
- }
- self.branches[branch_name]['authors'][author]['commits'] += 1
-
- # Get line changes for this commit
- try:
- numstat_output = getpipeoutput([f'git show --numstat --format="" {commit_hash}'])
- for line in numstat_output.split('\n'):
- if line.strip() and '\t' in line:
- parts = line.split('\t')
- if len(parts) >= 2:
- try:
- additions = int(parts[0]) if parts[0] != '-' else 0
- deletions = int(parts[1]) if parts[1] != '-' else 0
-
- # Update branch statistics
- self.branches[branch_name]['lines_added'] += additions
- self.branches[branch_name]['lines_removed'] += deletions
-
- # Update author statistics for this branch
- self.branches[branch_name]['authors'][author]['lines_added'] += additions
- self.branches[branch_name]['authors'][author]['lines_removed'] += deletions
-
- except ValueError:
- pass
- except:
- pass
-
- except Exception as e:
- if conf['debug']:
- print(f'Warning: Failed to analyze commit {commit_hash}: {e}')
-
- def _analyzeTeamCollaboration(self):
- """Analyze how team members collaborate on files and projects"""
- if conf['verbose']:
- print('Analyzing team collaboration patterns...')
-
- try:
- # Get commit details with files changed
- commit_data = getpipeoutput(['git log --name-only --pretty=format:"COMMIT:%H:%aN:%at" %s' % getlogrange('HEAD')]).split('\n')
-
- current_commit = None
- current_author = None
- current_timestamp = None
-
- for line in commit_data:
- line = line.strip()
- if line.startswith('COMMIT:'):
- # Parse commit header: COMMIT:hash:author:timestamp
- parts = line.split(':', 3)
- if len(parts) >= 4:
- current_commit = parts[1]
- current_author = parts[2]
- try:
- current_timestamp = int(parts[3])
- except ValueError:
- current_timestamp = None
- elif line and current_author and not line.startswith('COMMIT:'):
- # This is a filename
- filename = line
-
- # Initialize author collaboration data
- if current_author not in self.author_collaboration:
- self.author_collaboration[current_author] = {
- 'worked_with': defaultdict(lambda: defaultdict(int)),
- 'file_ownership': defaultdict(int)
- }
-
- # Track file ownership
- self.author_collaboration[current_author]['file_ownership'][filename] += 1
-
- # Track who else worked on this file
- file_history = getpipeoutput([f'git log --pretty=format:"%aN" -- "{filename}"']).split('\n')
- unique_authors = set(file_history) - {current_author}
-
- for other_author in unique_authors:
- if other_author.strip():
- self.author_collaboration[current_author]['worked_with'][other_author][filename] += 1
-
- except Exception as e:
- if conf['debug']:
- print(f'Warning: Team collaboration analysis failed: {e}')
-
- def _analyzeCommitPatterns(self):
- """Analyze commit patterns to identify commit behavior (small vs large commits, frequency, etc.)"""
- if conf['verbose']:
- print('Analyzing commit patterns...')
-
- try:
- # Get detailed commit information
- commit_lines = getpipeoutput(['git log --shortstat --pretty=format:"COMMIT:%H:%aN:%at:%s" %s' % getlogrange('HEAD')]).split('\n')
-
- current_author = None
- current_timestamp = None
- current_message = None
- author_commits = defaultdict(list)
-
- for line in commit_lines:
- line = line.strip()
- if line.startswith('COMMIT:'):
- # Parse: COMMIT:hash:author:timestamp:subject
- parts = line.split(':', 4)
- if len(parts) >= 5:
- current_author = parts[2]
- try:
- current_timestamp = int(parts[3])
- current_message = parts[4]
- except ValueError:
- current_timestamp = None
- current_message = ""
- elif line and current_author and re.search(r'files? changed', line):
- # Parse shortstat line
- numbers = re.findall(r'\d+', line)
- if len(numbers) >= 1:
- files_changed = int(numbers[0])
- insertions = int(numbers[1]) if len(numbers) > 1 else 0
- deletions = int(numbers[2]) if len(numbers) > 2 else 0
- total_changes = insertions + deletions
-
- commit_info = {
- 'timestamp': current_timestamp,
- 'files_changed': files_changed,
- 'lines_changed': total_changes,
- 'insertions': insertions,
- 'deletions': deletions,
- 'message': current_message
- }
- author_commits[current_author].append(commit_info)
-
- # Analyze patterns for each author
- for author, commits in author_commits.items():
- if not commits:
- continue
-
- total_commits = len(commits)
- total_lines = sum(c['lines_changed'] for c in commits)
- avg_commit_size = total_lines / total_commits if total_commits else 0
-
- # Categorize commits by size
- small_commits = sum(1 for c in commits if c['lines_changed'] < 10)
- medium_commits = sum(1 for c in commits if 10 <= c['lines_changed'] < 100)
- large_commits = sum(1 for c in commits if c['lines_changed'] >= 100)
-
- # Calculate commit frequency (commits per day)
- if commits:
- timestamps = [c['timestamp'] for c in commits if c['timestamp']]
- if len(timestamps) > 1:
- time_span = max(timestamps) - min(timestamps)
- days_active = time_span / (24 * 3600) if time_span > 0 else 1
- commit_frequency = total_commits / days_active
- else:
- commit_frequency = total_commits
- else:
- commit_frequency = 0
-
- # Analyze commit messages for patterns
- bug_related = sum(1 for c in commits if any(keyword in c['message'].lower()
- for keyword in ['fix', 'bug', 'error', 'issue', 'patch', 'repair']))
- feature_related = sum(1 for c in commits if any(keyword in c['message'].lower()
- for keyword in ['add', 'new', 'feature', 'implement', 'create']))
- refactor_related = sum(1 for c in commits if any(keyword in c['message'].lower()
- for keyword in ['refactor', 'cleanup', 'reorganize', 'restructure', 'optimize']))
-
- self.commit_patterns[author] = {
- 'total_commits': total_commits,
- 'avg_commit_size': avg_commit_size,
- 'small_commits': small_commits,
- 'medium_commits': medium_commits,
- 'large_commits': large_commits,
- 'commit_frequency': commit_frequency,
- 'bug_related_commits': bug_related,
- 'feature_related_commits': feature_related,
- 'refactor_related_commits': refactor_related,
- 'avg_files_per_commit': sum(c['files_changed'] for c in commits) / total_commits if total_commits else 0
- }
-
- except Exception as e:
- if conf['debug']:
- print(f'Warning: Commit pattern analysis failed: {e}')
-
- def _analyzeWorkingPatterns(self):
- """Analyze when authors typically work (time of day, day of week, timezone patterns)"""
- if conf['verbose']:
- print('Analyzing working time patterns...')
-
- try:
- # Get commit timestamps with timezone info
- commit_lines = getpipeoutput(['git log --pretty=format:"%aN|%at|%ai|%s" %s' % getlogrange('HEAD')]).split('\n')
-
- for line in commit_lines:
- if not line.strip():
- continue
-
- parts = line.split('|', 3)
- if len(parts) < 3:
- continue
-
- author = parts[0]
- try:
- timestamp = int(parts[1])
- date_str = parts[2] # ISO format with timezone
- message = parts[3] if len(parts) > 3 else ""
- except (ValueError, IndexError):
- continue
-
- # Parse date and time information
- date = datetime.datetime.fromtimestamp(timestamp)
- hour = date.hour
- day_of_week = date.weekday() # Monday = 0, Sunday = 6
-
- # Initialize author working patterns
- if author not in self.working_patterns:
- self.working_patterns[author] = {
- 'night_commits': 0, # 22:00 - 06:00
- 'weekend_commits': 0, # Saturday, Sunday
- 'peak_hours': defaultdict(int),
- 'peak_days': defaultdict(int),
- 'timezone_pattern': defaultdict(int),
- 'early_bird': 0, # 05:00 - 09:00
- 'workday': 0, # 09:00 - 17:00
- 'evening': 0, # 17:00 - 22:00
- 'total_commits': 0
- }
-
- self.working_patterns[author]['total_commits'] += 1
- self.working_patterns[author]['peak_hours'][hour] += 1
- self.working_patterns[author]['peak_days'][day_of_week] += 1
-
- # Extract timezone from date string
- if '+' in date_str or '-' in date_str:
- tz_part = date_str.split()[-1]
- self.working_patterns[author]['timezone_pattern'][tz_part] += 1
-
- # Categorize by time of day
- if 22 <= hour or hour < 6:
- self.working_patterns[author]['night_commits'] += 1
- elif 5 <= hour < 9:
- self.working_patterns[author]['early_bird'] += 1
- elif 9 <= hour < 17:
- self.working_patterns[author]['workday'] += 1
- elif 17 <= hour < 22:
- self.working_patterns[author]['evening'] += 1
-
- # Weekend commits (Saturday = 5, Sunday = 6)
- if day_of_week >= 5:
- self.working_patterns[author]['weekend_commits'] += 1
-
- # Classify commit types
- if any(keyword in message.lower() for keyword in ['fix', 'bug', 'error', 'patch']):
- if author not in self.potential_bug_commits:
- self.potential_bug_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
- elif any(keyword in message.lower() for keyword in ['refactor', 'cleanup', 'optimize']):
- self.refactoring_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
- elif any(keyword in message.lower() for keyword in ['add', 'new', 'feature', 'implement']):
- self.feature_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
-
- # Calculate active periods for each author
- for author in self.authors:
- if 'active_days' in self.authors[author]:
- active_days = self.authors[author]['active_days']
- sorted_days = sorted(active_days)
-
- if len(sorted_days) > 1:
- # Calculate gaps between active days
- gaps = []
- for i in range(1, len(sorted_days)):
- prev_date = datetime.datetime.strptime(sorted_days[i-1], '%Y-%m-%d')
- curr_date = datetime.datetime.strptime(sorted_days[i], '%Y-%m-%d')
- gap = (curr_date - prev_date).days
- gaps.append(gap)
-
- avg_gap = sum(gaps) / len(gaps) if gaps else 0
-
- # Find longest streak
- longest_streak = 1
- current_streak = 1
- for gap in gaps:
- if gap == 1:
- current_streak += 1
- longest_streak = max(longest_streak, current_streak)
- else:
- current_streak = 1
- else:
- avg_gap = 0
- longest_streak = 1
-
- self.author_active_periods[author] = {
- 'active_days_count': len(active_days),
- 'longest_streak': longest_streak,
- 'avg_gap': avg_gap
- }
-
- except Exception as e:
- if conf['debug']:
- print(f'Warning: Working pattern analysis failed: {e}')
-
- def _analyzeImpactAndQuality(self):
- """Analyze the impact of changes and identify critical files and potential quality issues"""
- if conf['verbose']:
- print('Analyzing impact and quality indicators...')
-
- try:
- # Identify critical files based on common patterns
- all_files = getpipeoutput(['git ls-tree -r --name-only %s' % getcommitrange('HEAD', end_only=True)]).split('\n')
-
- for filepath in all_files:
- if not filepath.strip():
- continue
-
- filename = os.path.basename(filepath).lower()
-
- # Mark files as critical based on common patterns
- critical_patterns = [
- 'main.', 'app.', 'index.', 'config.', 'settings.',
- 'setup.', 'package.json', 'requirements.txt', 'Dockerfile',
- 'makefile', 'readme', 'license', '.env'
- ]
-
- if any(pattern in filename for pattern in critical_patterns):
- self.critical_files.add(filepath)
-
- # Files in root directory are often critical
- if '/' not in filepath:
- self.critical_files.add(filepath)
-
- # Analyze file impact scores based on change frequency and author diversity
- file_authors = defaultdict(set)
- file_change_count = defaultdict(int)
-
- # Get file change history
- log_lines = getpipeoutput(['git log --name-only --pretty=format:"AUTHOR:%aN" %s' % getlogrange('HEAD')]).split('\n')
- current_author = None
-
- for line in log_lines:
- line = line.strip()
- if line.startswith('AUTHOR:'):
- current_author = line.replace('AUTHOR:', '')
- elif line and current_author and not line.startswith('AUTHOR:'):
- filename = line
- file_authors[filename].add(current_author)
- file_change_count[filename] += 1
-
- # Calculate impact scores
- for filename in file_change_count:
- change_count = file_change_count[filename]
- author_count = len(file_authors[filename])
-
- # Impact score based on change frequency and author diversity
- base_score = min(change_count * 10, 100) # Cap at 100
- diversity_bonus = min(author_count * 5, 25) # Bonus for multiple authors
- critical_bonus = 50 if filename in self.critical_files else 0
-
- impact_score = base_score + diversity_bonus + critical_bonus
- self.file_impact_scores[filename] = impact_score
-
- # Analyze author impact
- for author in self.authors:
- critical_files_touched = []
- total_impact_score = 0
-
- # Check which critical files this author touched
- for filename in self.critical_files:
- if author in file_authors.get(filename, set()):
- critical_files_touched.append(filename)
- total_impact_score += self.file_impact_scores.get(filename, 0)
-
- # Calculate bug potential based on commit messages and patterns
- author_commits = self.commit_patterns.get(author, {})
- bug_commits = author_commits.get('bug_related_commits', 0)
- total_commits = author_commits.get('total_commits', 1)
- bug_ratio = bug_commits / total_commits if total_commits > 0 else 0
-
- # Higher bug potential if author has many bug-fix commits
- bug_potential = min(bug_ratio * 100, 100)
-
- self.impact_analysis[author] = {
- 'critical_files': critical_files_touched,
- 'impact_score': total_impact_score,
- 'bug_potential': bug_potential,
- 'high_impact_files': [f for f in file_authors if author in file_authors[f] and self.file_impact_scores.get(f, 0) > 50]
- }
-
- except Exception as e:
- if conf['debug']:
- print(f'Warning: Impact analysis failed: {e}')
-
- def _calculateTeamPerformanceMetrics(self):
- """Calculate comprehensive team performance metrics"""
- if conf['verbose']:
- print('Calculating team performance metrics...')
-
- try:
- total_commits = self.getTotalCommits()
- total_lines_changed = self.total_lines_added + self.total_lines_removed
-
- for author in self.authors:
- author_info = self.authors[author]
- commit_patterns = self.commit_patterns.get(author, {})
- working_patterns = self.working_patterns.get(author, {})
- impact_info = self.impact_analysis.get(author, {})
-
- # Efficiency Score (based on lines changed per commit and commit quality)
- avg_commit_size = commit_patterns.get('avg_commit_size', 0)
- total_author_commits = author_info.get('commits', 0)
-
- # Normalize efficiency (sweet spot is around 20-50 lines per commit)
- if 20 <= avg_commit_size <= 50:
- size_efficiency = 100
- elif avg_commit_size < 20:
- size_efficiency = max(0, avg_commit_size * 5) # Penalty for too small commits
- else:
- size_efficiency = max(0, 100 - (avg_commit_size - 50) * 2) # Penalty for too large commits
-
- # Quality indicators
- bug_commits = commit_patterns.get('bug_related_commits', 0)
- feature_commits = commit_patterns.get('feature_related_commits', 0)
- refactor_commits = commit_patterns.get('refactor_related_commits', 0)
-
- quality_score = 0
- if total_author_commits > 0:
- feature_ratio = feature_commits / total_author_commits
- refactor_ratio = refactor_commits / total_author_commits
- bug_ratio = bug_commits / total_author_commits
-
- quality_score = (feature_ratio * 40 + refactor_ratio * 30 - bug_ratio * 20) * 100
- quality_score = max(0, min(100, quality_score))
-
- efficiency_score = (size_efficiency * 0.6 + quality_score * 0.4)
-
- # Consistency Score (based on commit frequency and working patterns)
- commit_frequency = commit_patterns.get('commit_frequency', 0)
- active_periods = self.author_active_periods.get(author, {})
- longest_streak = active_periods.get('longest_streak', 1)
- avg_gap = active_periods.get('avg_gap', 30)
-
- # Consistency based on regular commits and sustained activity
- frequency_score = min(commit_frequency * 20, 100) # Up to 5 commits per day = max score
- streak_score = min(longest_streak * 5, 100) # Longer streaks = better consistency
- gap_score = max(0, 100 - avg_gap * 3) # Smaller gaps = better consistency
-
- consistency_score = (frequency_score * 0.4 + streak_score * 0.3 + gap_score * 0.3)
-
- # Leadership Score (based on impact on critical files, collaboration, and mentoring indicators)
- impact_score = impact_info.get('impact_score', 0)
- critical_files_count = len(impact_info.get('critical_files', []))
-
- # Collaboration score based on working with others
- collaboration_data = self.author_collaboration.get(author, {})
- worked_with_count = len(collaboration_data.get('worked_with', {}))
-
- # Normalize impact and collaboration
- impact_leadership = min(impact_score / 10, 100) # Scale impact score
- collaboration_leadership = min(worked_with_count * 10, 100) # Max score at 10 collaborators
- critical_file_leadership = min(critical_files_count * 20, 100) # Max score at 5 critical files
-
- leadership_score = (impact_leadership * 0.4 + collaboration_leadership * 0.3 + critical_file_leadership * 0.3)
-
- # Overall contribution percentage
- author_commits = author_info.get('commits', 0)
- contribution_percentage = (author_commits / total_commits * 100) if total_commits > 0 else 0
-
- # Store performance metrics
- self.team_performance[author] = {
- 'efficiency_score': efficiency_score,
- 'consistency': consistency_score,
- 'leadership_score': leadership_score,
- 'contribution_percentage': contribution_percentage,
- 'overall_score': (efficiency_score * 0.4 + consistency_score * 0.3 + leadership_score * 0.3),
- 'commit_quality_analysis': {
- 'avg_commit_size': avg_commit_size,
- 'small_commits_ratio': commit_patterns.get('small_commits', 0) / total_author_commits if total_author_commits > 0 else 0,
- 'large_commits_ratio': commit_patterns.get('large_commits', 0) / total_author_commits if total_author_commits > 0 else 0,
- 'bug_fix_ratio': bug_commits / total_author_commits if total_author_commits > 0 else 0,
- 'feature_ratio': feature_commits / total_author_commits if total_author_commits > 0 else 0
- }
- }
-
- except Exception as e:
- if conf['debug']:
- print(f'Warning: Team performance calculation failed: {e}')
-
- def refine(self):
- # authors
- # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
- self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
- self.authors_by_commits.reverse() # most first
- for i, name in enumerate(self.authors_by_commits):
- self.authors[name]['place_by_commits'] = i + 1
-
- for name in list(self.authors.keys()):
- a = self.authors[name]
- a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
- date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
- date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
- delta = date_last - date_first
- a['date_first'] = date_first.strftime('%Y-%m-%d')
- a['date_last'] = date_last.strftime('%Y-%m-%d')
- a['timedelta'] = delta
- if 'lines_added' not in a: a['lines_added'] = 0
- if 'lines_removed' not in a: a['lines_removed'] = 0
-
- def getActiveDays(self):
- return self.active_days
-
- def getActivityByDayOfWeek(self):
- return self.activity_by_day_of_week
-
- def getActivityByHourOfDay(self):
- return self.activity_by_hour_of_day
-
- def getAuthorInfo(self, author):
- return self.authors[author]
-
- def getAuthors(self, limit = None):
- res = getkeyssortedbyvaluekey(self.authors, 'commits')
- res.reverse()
- return res[:limit]
-
- def getCommitDeltaDays(self):
- return (self.last_commit_stamp // 86400 - self.first_commit_stamp // 86400) + 1
-
- def getDomainInfo(self, domain):
- return self.domains[domain]
-
- def getDomains(self):
- return list(self.domains.keys())
-
- def getFirstCommitDate(self):
- return datetime.datetime.fromtimestamp(self.first_commit_stamp)
-
- def getLastCommitDate(self):
- return datetime.datetime.fromtimestamp(self.last_commit_stamp)
-
- def getTags(self):
- lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
- return lines.split('\n')
-
- def getTagDate(self, tag):
- return self.revToDate('tags/' + tag)
-
- def getTotalAuthors(self):
- return self.total_authors
-
- def getTotalCommits(self):
- return self.total_commits
-
- def getTotalFiles(self):
- return self.total_files
-
- def getTotalLOC(self):
- return self.total_lines
-
- def getTotalSourceLines(self):
- return self.total_source_lines
-
- def getTotalCommentLines(self):
- return self.total_comment_lines
-
- def getTotalBlankLines(self):
- return self.total_blank_lines
-
- def getSLOCByExtension(self):
- return self.sloc_by_extension
-
- def getLargestFiles(self, limit=10):
- """Get the largest files by size."""
- sorted_files = sorted(self.file_sizes.items(), key=lambda x: x[1], reverse=True)
- return sorted_files[:limit]
-
- def getFilesWithMostRevisions(self, limit=10):
- """Get files with most revisions (hotspots)."""
- sorted_files = sorted(self.file_revisions.items(), key=lambda x: x[1], reverse=True)
- return sorted_files[:limit]
-
- def getAverageFileSize(self):
- """Get average file size in bytes."""
- if not self.file_sizes:
- return 0.0
- return sum(self.file_sizes.values()) / len(self.file_sizes)
-
- def getDirectoriesByActivity(self, limit=10):
- """Get directories with most lines changed (added + removed)."""
- if not hasattr(self, 'directories'):
- return []
- directory_activity = []
- for directory, stats in self.directories.items():
- total_lines = stats['lines_added'] + stats['lines_removed']
- file_count = len(stats['files'])
- directory_activity.append((directory, total_lines, stats['lines_added'], stats['lines_removed'], file_count))
- return sorted(directory_activity, key=lambda x: x[1], reverse=True)[:limit]
-
- def getDirectoriesByRevisions(self, limit=10):
- """Get directories with most file revisions."""
- if not hasattr(self, 'directory_revisions'):
- return []
- sorted_dirs = sorted(self.directory_revisions.items(), key=lambda x: x[1], reverse=True)
- return sorted_dirs[:limit]
-
- def getAverageRevisionsPerFile(self):
- """Get average number of revisions per file."""
- if not self.file_revisions:
- return 0.0
- return sum(self.file_revisions.values()) / len(self.file_revisions)
-
- def getTotalSize(self):
- return self.total_size
-
- def getLast30DaysActivity(self):
- """Get activity stats for last 30 days."""
- return {
- 'commits': self.last_30_days_commits,
- 'lines_added': self.last_30_days_lines_added,
- 'lines_removed': self.last_30_days_lines_removed
- }
-
- def getLast12MonthsActivity(self):
- """Get activity stats for last 12 months."""
- return {
- 'commits': dict(self.last_12_months_commits),
- 'lines_added': dict(self.last_12_months_lines_added),
- 'lines_removed': dict(self.last_12_months_lines_removed)
- }
-
- def getPaceOfChanges(self):
- """Get pace of changes (line changes over time)."""
- return self.pace_of_changes
-
- def getRepositorySize(self):
- """Get repository size in MB."""
- return getattr(self, 'repository_size_mb', 0.0)
-
- def getBranches(self):
- """Get all branches with their statistics."""
- return self.branches
-
- def getUnmergedBranches(self):
- """Get list of unmerged branch names."""
- return self.unmerged_branches
-
- def getMainBranch(self):
- """Get the detected main branch name."""
- return getattr(self, 'main_branch', 'master')
-
- def getBranchInfo(self, branch_name):
- """Get detailed information about a specific branch."""
- return self.branches.get(branch_name, {})
-
- def getBranchAuthors(self, branch_name):
- """Get authors who contributed to a specific branch."""
- branch_info = self.branches.get(branch_name, {})
- return branch_info.get('authors', {})
-
- def getBranchesByCommits(self, limit=None):
- """Get branches sorted by number of commits."""
- sorted_branches = sorted(self.branches.items(),
- key=lambda x: x[1].get('commits', 0),
- reverse=True)
- if limit:
- return sorted_branches[:limit]
- return sorted_branches
-
- def getBranchesByLinesChanged(self, limit=None):
- """Get branches sorted by total lines changed."""
- sorted_branches = sorted(self.branches.items(),
- key=lambda x: x[1].get('lines_added', 0) + x[1].get('lines_removed', 0),
- reverse=True)
- if limit:
- return sorted_branches[:limit]
- return sorted_branches
-
- def getUnmergedBranchStats(self):
- """Get statistics for unmerged branches only."""
- unmerged_stats = {}
- for branch_name in self.unmerged_branches:
- if branch_name in self.branches:
- unmerged_stats[branch_name] = self.branches[branch_name]
- return unmerged_stats
-
- # New methods for advanced team analysis
- def getCommitPatterns(self):
- """Get commit patterns analysis for all authors."""
- return self.commit_patterns
-
- def getCommitPatternsForAuthor(self, author):
- """Get commit patterns for a specific author."""
- return self.commit_patterns.get(author, {})
-
- def getWorkingPatterns(self):
- """Get working time patterns for all authors."""
- return self.working_patterns
-
- def getWorkingPatternsForAuthor(self, author):
- """Get working patterns for a specific author."""
- return self.working_patterns.get(author, {})
-
- def getTeamCollaboration(self):
- """Get team collaboration analysis."""
- return self.author_collaboration
-
- def getCollaborationForAuthor(self, author):
- """Get collaboration data for a specific author."""
- return self.author_collaboration.get(author, {})
-
- def getImpactAnalysis(self):
- """Get impact analysis for all authors."""
- return self.impact_analysis
-
- def getImpactAnalysisForAuthor(self, author):
- """Get impact analysis for a specific author."""
- return self.impact_analysis.get(author, {})
-
- def getTeamPerformance(self):
- """Get team performance metrics for all authors."""
- return self.team_performance
-
- def getTeamPerformanceForAuthor(self, author):
- """Get team performance metrics for a specific author."""
- return self.team_performance.get(author, {})
-
- def getCriticalFiles(self):
- """Get list of files identified as critical to the project."""
- return list(self.critical_files)
-
- def getFileImpactScores(self):
- """Get impact scores for all files."""
- return dict(self.file_impact_scores)
-
- def getTopImpactFiles(self, limit=10):
- """Get files with highest impact scores."""
- sorted_files = sorted(self.file_impact_scores.items(), key=lambda x: x[1], reverse=True)
- return sorted_files[:limit]
-
- def getBugRelatedCommits(self):
- """Get commits that appear to be bug-related."""
- return self.potential_bug_commits
-
- def getRefactoringCommits(self):
- """Get commits that appear to be refactoring."""
- return self.refactoring_commits
-
- def getFeatureCommits(self):
- """Get commits that appear to add features."""
- return self.feature_commits
-
- def getAuthorActivePeriods(self):
- """Get active periods analysis for all authors."""
- return self.author_active_periods
-
- def getAuthorsByContribution(self):
- """Get authors sorted by contribution percentage."""
- performance_data = [(author, perf.get('contribution_percentage', 0))
- for author, perf in self.team_performance.items()]
- return sorted(performance_data, key=lambda x: x[1], reverse=True)
-
- def getAuthorsByEfficiency(self):
- """Get authors sorted by efficiency score."""
- performance_data = [(author, perf.get('efficiency_score', 0))
- for author, perf in self.team_performance.items()]
- return sorted(performance_data, key=lambda x: x[1], reverse=True)
-
- def getAuthorsByConsistency(self):
- """Get authors sorted by consistency score."""
- performance_data = [(author, perf.get('consistency', 0))
- for author, perf in self.team_performance.items()]
- return sorted(performance_data, key=lambda x: x[1], reverse=True)
-
- def getAuthorsByLeadership(self):
- """Get authors sorted by leadership score."""
- performance_data = [(author, perf.get('leadership_score', 0))
- for author, perf in self.team_performance.items()]
- return sorted(performance_data, key=lambda x: x[1], reverse=True)
-
- def getTeamWorkDistribution(self):
- """Analyze work distribution across team members."""
- total_commits = self.getTotalCommits()
- total_lines = self.total_lines_added + self.total_lines_removed
-
- distribution = {}
- for author in self.authors:
- author_info = self.authors[author]
- author_commits = author_info.get('commits', 0)
- author_lines = author_info.get('lines_added', 0) + author_info.get('lines_removed', 0)
-
- distribution[author] = {
- 'commit_percentage': (author_commits / total_commits * 100) if total_commits > 0 else 0,
- 'lines_percentage': (author_lines / total_lines * 100) if total_lines > 0 else 0,
- 'commits': author_commits,
- 'lines_changed': author_lines
- }
-
- return distribution
-
- def getCommitSizeAnalysis(self):
- """Get analysis of commit sizes across the team."""
- analysis = {
- 'small_commits_authors': [], # Authors with >50% small commits
- 'large_commits_authors': [], # Authors with >20% large commits
- 'balanced_authors': [], # Authors with balanced commit sizes
- 'overall_stats': {
- 'total_small': 0,
- 'total_medium': 0,
- 'total_large': 0
- }
- }
-
- for author, patterns in self.commit_patterns.items():
- total_commits = patterns.get('total_commits', 0)
- if total_commits == 0:
- continue
-
- small_ratio = patterns.get('small_commits', 0) / total_commits
- large_ratio = patterns.get('large_commits', 0) / total_commits
-
- analysis['overall_stats']['total_small'] += patterns.get('small_commits', 0)
- analysis['overall_stats']['total_medium'] += patterns.get('medium_commits', 0)
- analysis['overall_stats']['total_large'] += patterns.get('large_commits', 0)
-
- if small_ratio > 0.5:
- analysis['small_commits_authors'].append((author, small_ratio))
- elif large_ratio > 0.2:
- analysis['large_commits_authors'].append((author, large_ratio))
- else:
- analysis['balanced_authors'].append((author, small_ratio, large_ratio))
-
- return analysis
-
- def revToDate(self, rev):
- stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
- return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
-
- class ReportCreator:
- """Creates the actual report based on given data."""
- def __init__(self):
- pass
-
- def create(self, data, path):
- self.data = data
- self.path = path
-
- def html_linkify(text):
- return text.lower().replace(' ', '_')
-
- def html_header(level, text):
- name = html_linkify(text)
- return '\n<h%d id="%s"><a href="#%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
-
- class HTMLReportCreator(ReportCreator):
- def create(self, data, path):
- ReportCreator.create(self, data, path)
- self.title = data.projectname
-
- # Prepare safe local values to avoid division-by-zero and empty-collection errors
- total_commits = data.getTotalCommits()
- total_active_days = len(data.getActiveDays()) if hasattr(data, 'getActiveDays') else 0
- delta_days = data.getCommitDeltaDays() if hasattr(data, 'getCommitDeltaDays') else 0
- total_authors = data.getTotalAuthors()
- # busiest counters: use 1 as denominator if no activity recorded to avoid ZeroDivisionError
- hour_of_day_busiest = data.activity_by_hour_of_day_busiest if getattr(data, 'activity_by_hour_of_day_busiest', 0) > 0 else 1
- hour_of_week_busiest = data.activity_by_hour_of_week_busiest if getattr(data, 'activity_by_hour_of_week_busiest', 0) > 0 else 1
- # timezone max for coloring; default to 1 if empty
- max_commits_on_tz = max(data.commits_by_timezone.values()) if data.commits_by_timezone else 1
-
- # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
- binarypath = os.path.dirname(os.path.abspath(__file__))
- secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
- basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
- for file in (conf['style'], 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
- for base in basedirs:
- src = base + '/' + file
- if os.path.exists(src):
- shutil.copyfile(src, path + '/' + file)
- break
- else:
- print('Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs))
-
- f = open(path + "/index.html", 'w')
- format = '%Y-%m-%d %H:%M:%S'
- self.printHeader(f)
-
- f.write('<h1>GitStats - %s</h1>' % data.projectname)
-
- self.printNav(f)
-
- f.write('<dl>')
- f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
- f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
- f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
- f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
- f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), total_active_days, (100.0 * total_active_days / data.getCommitDeltaDays()) if data.getCommitDeltaDays() else 0.0))
- f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
- # Add file statistics
- try:
- avg_size = data.getAverageFileSize()
- f.write('<dt>Average File Size</dt><dd>%.2f bytes (%.1f KB)</dd>' % (avg_size, avg_size / 1024))
- except:
- pass
- try:
- avg_revisions = data.getAverageRevisionsPerFile()
- f.write('<dt>Average Revisions per File</dt><dd>%.2f</dd>' % avg_revisions)
- except:
- pass
- try:
- repo_size = data.getRepositorySize()
- if repo_size > 0:
- f.write('<dt>Repository Size</dt><dd>%.1f MB</dd>' % repo_size)
- except:
- pass
- f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
- f.write('<dt>Source Lines of Code</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalSourceLines(), (100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
- f.write('<dt>Comment Lines</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalCommentLines(), (100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
- f.write('<dt>Blank Lines</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalBlankLines(), (100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
- avg_active = float(total_commits) / total_active_days if total_active_days else 0.0
- avg_all = float(total_commits) / delta_days if delta_days else 0.0
- f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (total_commits, avg_active, avg_all))
- avg_per_author = float(total_commits) / total_authors if total_authors else 0.0
- f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (total_authors, avg_per_author))
-
- # Branch statistics
- branches = data.getBranches() if hasattr(data, 'getBranches') else {}
- unmerged_branches = data.getUnmergedBranches() if hasattr(data, 'getUnmergedBranches') else []
- main_branch = data.getMainBranch() if hasattr(data, 'getMainBranch') else 'master'
-
- if branches:
- f.write('<dt>Total Branches</dt><dd>%d</dd>' % len(branches))
- if unmerged_branches:
- f.write('<dt>Unmerged Branches</dt><dd>%d (%s)</dd>' % (len(unmerged_branches), ', '.join(unmerged_branches[:5]) + ('...' if len(unmerged_branches) > 5 else '')))
- f.write('<dt>Main Branch</dt><dd>%s</dd>' % main_branch)
-
- f.write('</dl>')
-
- f.write('</body>\n</html>')
- f.close()
-
- ###
- # Team Analysis - New comprehensive team analysis page
- f = open(path + '/team_analysis.html', 'w')
- self.printHeader(f)
- f.write('<h1>Team Analysis</h1>')
- self.printNav(f)
-
- # Team Overview
- f.write(html_header(2, 'Team Overview'))
- total_authors = data.getTotalAuthors()
- work_distribution = data.getTeamWorkDistribution()
-
- f.write('<dl>')
- f.write('<dt>Total Team Members</dt><dd>%d</dd>' % total_authors)
-
- # Calculate work distribution metrics
- commit_contributions = [dist['commit_percentage'] for dist in work_distribution.values()]
- lines_contributions = [dist['lines_percentage'] for dist in work_distribution.values()]
-
- if commit_contributions:
- max_commit_contrib = max(commit_contributions)
- min_commit_contrib = min(commit_contributions)
- avg_commit_contrib = sum(commit_contributions) / len(commit_contributions)
-
- f.write('<dt>Work Distribution (Commits)</dt><dd>Max: %.1f%%, Min: %.1f%%, Avg: %.1f%%</dd>' %
- (max_commit_contrib, min_commit_contrib, avg_commit_contrib))
-
- if lines_contributions:
- max_lines_contrib = max(lines_contributions)
- min_lines_contrib = min(lines_contributions)
- avg_lines_contrib = sum(lines_contributions) / len(lines_contributions)
-
- f.write('<dt>Work Distribution (Lines)</dt><dd>Max: %.1f%%, Min: %.1f%%, Avg: %.1f%%</dd>' %
- (max_lines_contrib, min_lines_contrib, avg_lines_contrib))
-
- f.write('</dl>')
-
- # Team Performance Rankings
- f.write(html_header(2, 'Team Performance Rankings'))
-
- # Top contributors by different metrics
- contrib_ranking = data.getAuthorsByContribution()
- efficiency_ranking = data.getAuthorsByEfficiency()
- consistency_ranking = data.getAuthorsByConsistency()
- leadership_ranking = data.getAuthorsByLeadership()
-
- f.write('<div class="rankings">')
- f.write('<div class="ranking-section">')
- f.write('<h3>Top Contributors (by Commit %)</h3>')
- f.write('<ol>')
- for author, percentage in contrib_ranking[:10]:
- f.write('<li>%s (%.1f%%)</li>' % (author, percentage))
- f.write('</ol>')
- f.write('</div>')
-
- f.write('<div class="ranking-section">')
- f.write('<h3>Most Efficient (by Quality Score)</h3>')
- f.write('<ol>')
- for author, score in efficiency_ranking[:10]:
- f.write('<li>%s (%.1f)</li>' % (author, score))
- f.write('</ol>')
- f.write('</div>')
-
- f.write('<div class="ranking-section">')
- f.write('<h3>Most Consistent</h3>')
- f.write('<ol>')
- for author, score in consistency_ranking[:10]:
- f.write('<li>%s (%.1f)</li>' % (author, score))
- f.write('</ol>')
- f.write('</div>')
-
- f.write('<div class="ranking-section">')
- f.write('<h3>Leadership Score</h3>')
- f.write('<ol>')
- for author, score in leadership_ranking[:10]:
- f.write('<li>%s (%.1f)</li>' % (author, score))
- f.write('</ol>')
- f.write('</div>')
- f.write('</div>')
-
- # Detailed Team Performance Table
- f.write(html_header(2, 'Detailed Team Performance Analysis'))
- f.write('<table class="team-performance sortable" id="team-performance">')
- f.write('<tr>')
- f.write('<th>Author</th>')
- f.write('<th>Commits</th>')
- f.write('<th>Contrib %</th>')
- f.write('<th>Lines Changed</th>')
- f.write('<th>Avg Commit Size</th>')
- f.write('<th>Efficiency</th>')
- f.write('<th>Consistency</th>')
- f.write('<th>Leadership</th>')
- f.write('<th>Overall Score</th>')
- f.write('<th>Assessment</th>')
- f.write('</tr>')
-
- team_performance = data.getTeamPerformance()
- commit_patterns = data.getCommitPatterns()
-
- # Sort by overall score
- sorted_authors = sorted(team_performance.items(), key=lambda x: x[1].get('overall_score', 0), reverse=True)
-
- for author, perf in sorted_authors:
- author_info = data.getAuthorInfo(author)
- patterns = commit_patterns.get(author, {})
-
- commits = author_info.get('commits', 0)
- lines_changed = author_info.get('lines_added', 0) + author_info.get('lines_removed', 0)
- contrib_pct = perf.get('contribution_percentage', 0)
- avg_commit_size = patterns.get('avg_commit_size', 0)
- efficiency = perf.get('efficiency_score', 0)
- consistency = perf.get('consistency', 0)
- leadership = perf.get('leadership_score', 0)
- overall = perf.get('overall_score', 0)
-
- # Generate assessment
- assessment = self._generateAssessment(perf, patterns)
-
- f.write('<tr>')
- f.write('<td>%s</td>' % author)
- f.write('<td>%d</td>' % commits)
- f.write('<td>%.1f%%</td>' % contrib_pct)
- f.write('<td>%d</td>' % lines_changed)
- f.write('<td>%.1f</td>' % avg_commit_size)
- f.write('<td>%.1f</td>' % efficiency)
- f.write('<td>%.1f</td>' % consistency)
- f.write('<td>%.1f</td>' % leadership)
- f.write('<td>%.1f</td>' % overall)
- f.write('<td>%s</td>' % assessment)
- f.write('</tr>')
-
- f.write('</table>')
-
- # Commit Patterns Analysis
- f.write(html_header(2, 'Commit Patterns Analysis'))
-
- commit_size_analysis = data.getCommitSizeAnalysis()
-
- f.write('<h3>Commit Size Distribution</h3>')
- f.write('<p><strong>Small commits (<10 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_small'])
- f.write('<p><strong>Medium commits (10-100 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_medium'])
- f.write('<p><strong>Large commits (>100 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_large'])
-
- if commit_size_analysis['small_commits_authors']:
- f.write('<h4>Authors with predominantly small commits (possible commit splitting):</h4>')
- f.write('<ul>')
- for author, ratio in commit_size_analysis['small_commits_authors']:
- f.write('<li>%s (%.1f%% small commits)</li>' % (author, ratio * 100))
- f.write('</ul>')
-
- if commit_size_analysis['large_commits_authors']:
- f.write('<h4>Authors with frequent large commits:</h4>')
- f.write('<ul>')
- for author, ratio in commit_size_analysis['large_commits_authors']:
- f.write('<li>%s (%.1f%% large commits)</li>' % (author, ratio * 100))
- f.write('</ul>')
-
- # Working Patterns Analysis
- f.write(html_header(2, 'Working Time Patterns'))
-
- working_patterns = data.getWorkingPatterns()
-
- f.write('<table class="working-patterns sortable" id="working-patterns">')
- f.write('<tr>')
- f.write('<th>Author</th>')
- f.write('<th>Night Worker<br>(22:00-06:00)</th>')
- f.write('<th>Weekend Worker</th>')
- f.write('<th>Early Bird<br>(05:00-09:00)</th>')
- f.write('<th>Regular Hours<br>(09:00-17:00)</th>')
- f.write('<th>Evening<br>(17:00-22:00)</th>')
- f.write('<th>Peak Hour</th>')
- f.write('<th>Peak Day</th>')
- f.write('</tr>')
-
- for author, patterns in working_patterns.items():
- total_commits = patterns.get('total_commits', 1)
-
- night_pct = (patterns.get('night_commits', 0) / total_commits) * 100
- weekend_pct = (patterns.get('weekend_commits', 0) / total_commits) * 100
- early_pct = (patterns.get('early_bird', 0) / total_commits) * 100
- workday_pct = (patterns.get('workday', 0) / total_commits) * 100
- evening_pct = (patterns.get('evening', 0) / total_commits) * 100
-
- # Find peak hour and day
- peak_hours = patterns.get('peak_hours', {})
- peak_days = patterns.get('peak_days', {})
-
- peak_hour = max(peak_hours.keys(), key=lambda k: peak_hours[k]) if peak_hours else 'N/A'
- peak_day = max(peak_days.keys(), key=lambda k: peak_days[k]) if peak_days else 'N/A'
- peak_day_name = WEEKDAYS[peak_day] if isinstance(peak_day, int) and 0 <= peak_day < 7 else peak_day
-
- f.write('<tr>')
- f.write('<td>%s</td>' % author)
- f.write('<td>%.1f%%</td>' % night_pct)
- f.write('<td>%.1f%%</td>' % weekend_pct)
- f.write('<td>%.1f%%</td>' % early_pct)
- f.write('<td>%.1f%%</td>' % workday_pct)
- f.write('<td>%.1f%%</td>' % evening_pct)
- f.write('<td>%s:00</td>' % peak_hour)
- f.write('<td>%s</td>' % peak_day_name)
- f.write('</tr>')
-
- f.write('</table>')
-
- # Impact Analysis
- f.write(html_header(2, 'Impact and Quality Analysis'))
-
- impact_analysis = data.getImpactAnalysis()
- critical_files = data.getCriticalFiles()
-
- f.write('<h3>Critical Files in Project (%d files identified)</h3>' % len(critical_files))
- if critical_files:
- f.write('<ul>')
- for critical_file in critical_files[:20]: # Show first 20
- f.write('<li>%s</li>' % critical_file)
- f.write('</ul>')
- if len(critical_files) > 20:
- f.write('<p>... and %d more files</p>' % (len(critical_files) - 20))
-
- f.write('<h3>Author Impact Analysis</h3>')
- f.write('<table class="impact-analysis sortable" id="impact-analysis">')
- f.write('<tr>')
- f.write('<th>Author</th>')
- f.write('<th>Impact Score</th>')
- f.write('<th>Critical Files Touched</th>')
- f.write('<th>Bug Potential</th>')
- f.write('<th>High Impact Files</th>')
- f.write('<th>Assessment</th>')
- f.write('</tr>')
-
- # Sort by impact score
- sorted_impact = sorted(impact_analysis.items(), key=lambda x: x[1].get('impact_score', 0), reverse=True)
-
- for author, impact in sorted_impact:
- impact_score = impact.get('impact_score', 0)
- critical_files_touched = len(impact.get('critical_files', []))
- bug_potential = impact.get('bug_potential', 0)
- high_impact_files = len(impact.get('high_impact_files', []))
-
- # Generate impact assessment
- if impact_score > 200:
- impact_assessment = "Very High Impact"
- elif impact_score > 100:
- impact_assessment = "High Impact"
- elif impact_score > 50:
- impact_assessment = "Medium Impact"
- else:
- impact_assessment = "Low Impact"
-
- if bug_potential > 30:
- impact_assessment += " (High Bug Risk)"
- elif bug_potential > 15:
- impact_assessment += " (Medium Bug Risk)"
-
- f.write('<tr>')
- f.write('<td>%s</td>' % author)
- f.write('<td>%.1f</td>' % impact_score)
- f.write('<td>%d</td>' % critical_files_touched)
- f.write('<td>%.1f%%</td>' % bug_potential)
- f.write('<td>%d</td>' % high_impact_files)
- f.write('<td>%s</td>' % impact_assessment)
- f.write('</tr>')
-
- f.write('</table>')
-
- # Team Collaboration Analysis
- f.write(html_header(2, 'Team Collaboration Analysis'))
-
- collaboration_data = data.getTeamCollaboration()
-
- f.write('<table class="collaboration sortable" id="collaboration">')
- f.write('<tr>')
- f.write('<th>Author</th>')
- f.write('<th>Files Owned</th>')
- f.write('<th>Collaborators</th>')
- f.write('<th>Shared Files</th>')
- f.write('<th>Top Collaborations</th>')
- f.write('</tr>')
-
- for author, collab in collaboration_data.items():
- files_owned = len(collab.get('file_ownership', {}))
- worked_with = collab.get('worked_with', {})
- collaborators_count = len(worked_with)
-
- # Count total shared files
- shared_files = 0
- top_collabs = []
-
- for other_author, shared_files_dict in worked_with.items():
- shared_count = len(shared_files_dict)
- shared_files += shared_count
- top_collabs.append((other_author, shared_count))
-
- # Sort and take top 3 collaborations
- top_collabs.sort(key=lambda x: x[1], reverse=True)
- top_collabs_str = ', '.join([f"{author}({count})" for author, count in top_collabs[:3]])
-
- f.write('<tr>')
- f.write('<td>%s</td>' % author)
- f.write('<td>%d</td>' % files_owned)
- f.write('<td>%d</td>' % collaborators_count)
- f.write('<td>%d</td>' % shared_files)
- f.write('<td>%s</td>' % top_collabs_str)
- f.write('</tr>')
-
- f.write('</table>')
-
- f.write('</body></html>')
- f.close()
-
- ###
- # Activity
- f = open(path + '/activity.html', 'w')
- self.printHeader(f)
- f.write('<h1>Activity</h1>')
- self.printNav(f)
-
- # Last 30 days
- f.write(html_header(2, 'Last 30 Days'))
- last_30_days = data.getLast30DaysActivity()
- f.write('<dl>')
- f.write('<dt>Commits</dt><dd>%d</dd>' % last_30_days['commits'])
- f.write('<dt>Lines added</dt><dd>%d</dd>' % last_30_days['lines_added'])
- f.write('<dt>Lines removed</dt><dd>%d</dd>' % last_30_days['lines_removed'])
- f.write('<dt>Net lines</dt><dd>%d</dd>' % (last_30_days['lines_added'] - last_30_days['lines_removed']))
- f.write('</dl>')
-
- # Last 12 months
- f.write(html_header(2, 'Last 12 Months'))
- last_12_months = data.getLast12MonthsActivity()
- if last_12_months['commits']:
- f.write('<table class="sortable" id="last12months">')
- f.write('<tr><th>Month</th><th>Commits</th><th>Lines Added</th><th>Lines Removed</th><th>Net Lines</th></tr>')
-
- # Sort months in reverse chronological order
- sorted_months = sorted(last_12_months['commits'].keys(), reverse=True)
- for month in sorted_months:
- commits = last_12_months['commits'][month]
- lines_added = last_12_months['lines_added'].get(month, 0)
- lines_removed = last_12_months['lines_removed'].get(month, 0)
- net_lines = lines_added - lines_removed
-
- f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' %
- (month, commits, lines_added, lines_removed, net_lines))
-
- f.write('</table>')
- else:
- f.write('<p>No activity in the last 12 months.</p>')
-
- # Pace of Changes
- f.write(html_header(2, 'Pace of Changes'))
- f.write('<p>Number of line changes (additions + deletions) over time</p>')
- pace_data = data.getPaceOfChanges()
- if pace_data:
- f.write('<img src="pace_of_changes.png" alt="Pace of Changes">')
-
- # Generate pace of changes data file
- fg = open(path + '/pace_of_changes.dat', 'w')
- for stamp in sorted(pace_data.keys()):
- fg.write('%d %d\n' % (stamp, pace_data[stamp]))
- fg.close()
- else:
- f.write('<p>No pace data available.</p>')
-
- # Weekly activity
- WEEKS = 32
- f.write(html_header(2, 'Weekly activity'))
- f.write('<p>Last %d weeks</p>' % WEEKS)
-
- # generate weeks to show (previous N weeks from now)
- now = datetime.datetime.now()
- deltaweek = datetime.timedelta(7)
- weeks = []
- stampcur = now
- for i in range(0, WEEKS):
- weeks.insert(0, stampcur.strftime('%Y-%W'))
- stampcur -= deltaweek
-
- # top row: commits & bar
- f.write('<table class="noborders"><tr>')
- for i in range(0, WEEKS):
- commits = 0
- if weeks[i] in data.activity_by_year_week:
- commits = data.activity_by_year_week[weeks[i]]
-
- percentage = 0
- if weeks[i] in data.activity_by_year_week:
- percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
- height = max(1, int(200 * percentage))
- f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
-
- # bottom row: year/week
- f.write('</tr><tr>')
- for i in range(0, WEEKS):
- f.write('<td>%s</td>' % (WEEKS - i))
- f.write('</tr></table>')
-
- # Hour of Day
- f.write(html_header(2, 'Hour of Day'))
- hour_of_day = data.getActivityByHourOfDay()
- f.write('<table><tr><th>Hour</th>')
- for i in range(0, 24):
- f.write('<th>%d</th>' % i)
- f.write('</tr>\n<tr><th>Commits</th>')
- fp = open(path + '/hour_of_day.dat', 'w')
- for i in range(0, 24):
- if i in hour_of_day:
- r = 127 + int((float(hour_of_day[i]) / hour_of_day_busiest) * 128)
- f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
- fp.write('%d %d\n' % (i, hour_of_day[i]))
- else:
- f.write('<td>0</td>')
- fp.write('%d 0\n' % i)
- fp.close()
- f.write('</tr>\n<tr><th>%</th>')
- totalcommits = total_commits
- for i in range(0, 24):
- if i in hour_of_day:
- r = 127 + int((float(hour_of_day[i]) / hour_of_day_busiest) * 128)
- percent = (100.0 * hour_of_day[i]) / totalcommits if totalcommits else 0.0
- f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, percent))
- else:
- f.write('<td>0.00</td>')
- f.write('</tr></table>')
- f.write('<img src="hour_of_day.png" alt="Hour of Day">')
- fg = open(path + '/hour_of_day.dat', 'w')
- for i in range(0, 24):
- if i in hour_of_day:
- fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
- else:
- fg.write('%d 0\n' % (i + 1))
- fg.close()
-
- # Day of Week
- f.write(html_header(2, 'Day of Week'))
- day_of_week = data.getActivityByDayOfWeek()
- f.write('<div class="vtable"><table>')
- f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
- fp = open(path + '/day_of_week.dat', 'w')
- for d in range(0, 7):
- commits = 0
- if d in day_of_week:
- commits = day_of_week[d]
- fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
- f.write('<tr>')
- f.write('<th>%s</th>' % (WEEKDAYS[d]))
- if d in day_of_week:
- percent = (100.0 * day_of_week[d]) / totalcommits if totalcommits else 0.0
- f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], percent))
- else:
- f.write('<td>0</td>')
- f.write('</tr>')
- f.write('</table></div>')
- f.write('<img src="day_of_week.png" alt="Day of Week">')
- fp.close()
-
- # Hour of Week
- f.write(html_header(2, 'Hour of Week'))
- f.write('<table>')
-
- f.write('<tr><th>Weekday</th>')
- for hour in range(0, 24):
- f.write('<th>%d</th>' % (hour))
- f.write('</tr>')
-
- for weekday in range(0, 7):
- f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
- for hour in range(0, 24):
- try:
- commits = data.activity_by_hour_of_week[weekday][hour]
- except KeyError:
- commits = 0
- if commits != 0:
- f.write('<td')
- r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
- f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
- f.write('>%d</td>' % commits)
- else:
- f.write('<td></td>')
- f.write('</tr>')
-
- f.write('</table>')
-
- # Month of Year
- f.write(html_header(2, 'Month of Year'))
- f.write('<div class="vtable"><table>')
- f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
- fp = open (path + '/month_of_year.dat', 'w')
- for mm in range(1, 13):
- commits = 0
- if mm in data.activity_by_month_of_year:
- commits = data.activity_by_month_of_year[mm]
- percent = (100.0 * commits) / total_commits if total_commits else 0.0
- f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, percent))
- fp.write('%d %d\n' % (mm, commits))
- fp.close()
- f.write('</table></div>')
- f.write('<img src="month_of_year.png" alt="Month of Year">')
-
- # Commits by year/month
- f.write(html_header(2, 'Commits by year/month'))
- f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
- for yymm in reversed(sorted(data.commits_by_month.keys())):
- f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
- f.write('</table></div>')
- f.write('<img src="commits_by_year_month.png" alt="Commits by year/month">')
- fg = open(path + '/commits_by_year_month.dat', 'w')
- for yymm in sorted(data.commits_by_month.keys()):
- fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
- fg.close()
-
- # Commits by year
- f.write(html_header(2, 'Commits by Year'))
- f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
- for yy in reversed(sorted(data.commits_by_year.keys())):
- commits = data.commits_by_year.get(yy, 0)
- percent = (100.0 * commits) / total_commits if total_commits else 0.0
- f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, commits, percent, data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
- f.write('</table></div>')
- f.write('<img src="commits_by_year.png" alt="Commits by Year">')
- fg = open(path + '/commits_by_year.dat', 'w')
- for yy in sorted(data.commits_by_year.keys()):
- fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
- fg.close()
-
- # Commits by timezone
- f.write(html_header(2, 'Commits by Timezone'))
- f.write('<table><tr>')
- f.write('<th>Timezone</th><th>Commits</th>')
- f.write('</tr>')
- max_commits_on_tz = max(data.commits_by_timezone.values()) if data.commits_by_timezone else 1
- for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
- commits = data.commits_by_timezone[i]
- r = 127 + int((float(commits) / max_commits_on_tz) * 128)
- f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
- f.write('</table>')
-
- f.write('</body></html>')
- f.close()
-
- ###
- # Authors
- f = open(path + '/authors.html', 'w')
- self.printHeader(f)
-
- f.write('<h1>Authors</h1>')
- self.printNav(f)
-
- # Authors :: List of authors
- f.write(html_header(2, 'List of Authors'))
-
- f.write('<table class="authors sortable" id="authors">')
- f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
- for author in data.getAuthors(conf['max_authors']):
- info = data.getAuthorInfo(author)
- f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
- f.write('</table>')
-
- allauthors = data.getAuthors()
- if len(allauthors) > conf['max_authors']:
- rest = allauthors[conf['max_authors']:]
- f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
-
- f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
- f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author">')
- if len(allauthors) > conf['max_authors']:
- f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
-
- f.write(html_header(2, 'Commits per Author'))
- f.write('<img src="commits_by_author.png" alt="Commits per Author">')
- if len(allauthors) > conf['max_authors']:
- f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
-
- fgl = open(path + '/lines_of_code_by_author.dat', 'w')
- fgc = open(path + '/commits_by_author.dat', 'w')
-
- lines_by_authors = {} # cumulated added lines by
- # author. to save memory,
- # changes_by_date_by_author[stamp][author] is defined
- # only at points where author commits.
- # lines_by_authors allows us to generate all the
- # points in the .dat file.
-
- # Don't rely on getAuthors to give the same order each
- # time. Be robust and keep the list in a variable.
- commits_by_authors = {} # cumulated added lines by
-
- self.authors_to_plot = data.getAuthors(conf['max_authors'])
- for author in self.authors_to_plot:
- lines_by_authors[author] = 0
- commits_by_authors[author] = 0
- for stamp in sorted(data.changes_by_date_by_author.keys()):
- fgl.write('%d' % stamp)
- fgc.write('%d' % stamp)
- for author in self.authors_to_plot:
- if author in data.changes_by_date_by_author[stamp]:
- lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
- commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
- fgl.write(' %d' % lines_by_authors[author])
- fgc.write(' %d' % commits_by_authors[author])
- fgl.write('\n')
- fgc.write('\n')
- fgl.close()
- fgc.close()
-
- # Authors :: Author of Month
- f.write(html_header(2, 'Author of Month'))
- f.write('<table class="sortable" id="aom">')
- f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
- for yymm in reversed(sorted(data.author_of_month.keys())):
- authordict = data.author_of_month[yymm]
- authors = getkeyssortedbyvalues(authordict)
- authors.reverse()
- commits = data.author_of_month[yymm][authors[0]]
- next = ', '.join(authors[1:conf['authors_top']+1])
- f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))
-
- f.write('</table>')
-
- f.write(html_header(2, 'Author of Year'))
- f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
- for yy in reversed(sorted(data.author_of_year.keys())):
- authordict = data.author_of_year[yy]
- authors = getkeyssortedbyvalues(authordict)
- authors.reverse()
- commits = data.author_of_year[yy][authors[0]]
- next = ', '.join(authors[1:conf['authors_top']+1])
- f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
- f.write('</table>')
-
- # Domains
- f.write(html_header(2, 'Commits by Domains'))
- domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
- domains_by_commits.reverse() # most first
- f.write('<div class="vtable"><table>')
- f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
- fp = open(path + '/domains.dat', 'w')
- n = 0
- for domain in domains_by_commits:
- if n == conf['max_domains']:
- break
- commits = 0
- n += 1
- info = data.getDomainInfo(domain)
- fp.write('%s %d %d\n' % (domain, n , info['commits']))
- percent = (100.0 * info['commits'] / total_commits) if total_commits else 0.0
- f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], percent))
- f.write('</table></div>')
- f.write('<img src="domains.png" alt="Commits by Domains">')
- fp.close()
-
- f.write('</body></html>')
- f.close()
-
- ###
- # Branches
- f = open(path + '/branches.html', 'w')
- self.printHeader(f)
- f.write('<h1>Branches</h1>')
- self.printNav(f)
-
- # Branch summary
- branches = data.getBranches() if hasattr(data, 'getBranches') else {}
- unmerged_branches = data.getUnmergedBranches() if hasattr(data, 'getUnmergedBranches') else []
- main_branch = data.getMainBranch() if hasattr(data, 'getMainBranch') else 'master'
-
- f.write('<dl>')
- f.write('<dt>Total Branches</dt><dd>%d</dd>' % len(branches))
- if unmerged_branches:
- f.write('<dt>Unmerged Branches</dt><dd>%d</dd>' % len(unmerged_branches))
- f.write('<dt>Main Branch</dt><dd>%s</dd>' % main_branch)
- f.write('</dl>')
-
- if branches:
- # Branches :: All Branches
- f.write(html_header(2, 'All Branches'))
- f.write('<table class="branches sortable" id="branches">')
- f.write('<tr><th>Branch</th><th>Status</th><th>Commits</th><th>Lines Added</th><th>Lines Removed</th><th>Total Changes</th><th>Authors</th></tr>')
-
- # Sort branches by total changes (lines added + removed)
- sorted_branches = sorted(branches.items(),
- key=lambda x: x[1].get('lines_added', 0) + x[1].get('lines_removed', 0),
- reverse=True)
-
- for branch_name, branch_info in sorted_branches:
- status = 'Merged' if branch_info.get('is_merged', True) else 'Unmerged'
- commits = branch_info.get('commits', 0)
- lines_added = branch_info.get('lines_added', 0)
- lines_removed = branch_info.get('lines_removed', 0)
- total_changes = lines_added + lines_removed
- authors_count = len(branch_info.get('authors', {}))
-
- # Highlight unmerged branches
- row_class = 'class="unmerged"' if not branch_info.get('is_merged', True) else ''
- f.write('<tr %s><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' %
- (row_class, branch_name, status, commits, lines_added, lines_removed, total_changes, authors_count))
- f.write('</table>')
-
- # Unmerged Branches Detail
- if unmerged_branches:
- f.write(html_header(2, 'Unmerged Branches Detail'))
- f.write('<p>These branches have not been merged into the main branch (%s) and may represent ongoing work or abandoned features.</p>' % main_branch)
-
- f.write('<table class="unmerged-branches sortable" id="unmerged">')
- f.write('<tr><th>Branch</th><th>Commits</th><th>Authors</th><th>Top Contributors</th><th>Lines Added</th><th>Lines Removed</th></tr>')
-
- unmerged_stats = data.getUnmergedBranchStats() if hasattr(data, 'getUnmergedBranchStats') else {}
-
- for branch_name in unmerged_branches:
- if branch_name in unmerged_stats:
- branch_info = unmerged_stats[branch_name]
- commits = branch_info.get('commits', 0)
- authors = branch_info.get('authors', {})
- lines_added = branch_info.get('lines_added', 0)
- lines_removed = branch_info.get('lines_removed', 0)
-
- # Get top contributors
- top_contributors = sorted(authors.items(), key=lambda x: x[1].get('commits', 0), reverse=True)[:3]
- contributors_str = ', '.join([f"{author} ({info.get('commits', 0)})" for author, info in top_contributors])
-
- f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%s</td><td>%d</td><td>%d</td></tr>' %
- (branch_name, commits, len(authors), contributors_str, lines_added, lines_removed))
- f.write('</table>')
-
- # Branch Activity by Author
- f.write(html_header(2, 'Branch Activity by Author'))
- f.write('<p>This table shows which authors have contributed to which branches.</p>')
-
- # Collect all unique authors across all branches
- all_authors = set()
- for branch_info in branches.values():
- all_authors.update(branch_info.get('authors', {}).keys())
-
- if all_authors and len(branches) > 1:
- f.write('<table class="branch-authors sortable" id="branch-authors">')
- header = '<tr><th>Author</th>'
- for branch_name in sorted(branches.keys()):
- header += '<th>%s</th>' % branch_name
- header += '<th>Total Branches</th></tr>'
- f.write(header)
-
- for author in sorted(all_authors):
- row = '<tr><td>%s</td>' % author
- branch_count = 0
- for branch_name in sorted(branches.keys()):
- branch_authors = branches[branch_name].get('authors', {})
- if author in branch_authors:
- commits = branch_authors[author].get('commits', 0)
- row += '<td>%d</td>' % commits
- branch_count += 1
- else:
- row += '<td>-</td>'
- row += '<td>%d</td></tr>' % branch_count
- f.write(row)
- f.write('</table>')
-
- f.write('</body></html>')
- f.close()
-
- ###
- # Files
- f = open(path + '/files.html', 'w')
- self.printHeader(f)
- f.write('<h1>Files</h1>')
- self.printNav(f)
-
- f.write('<dl>\n')
- f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
- f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
- try:
- avg_size = data.getAverageFileSize()
- f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % avg_size)
- except (AttributeError, ZeroDivisionError):
- # Fallback to old calculation if new method fails
- avg_size = float(data.getTotalSize()) / data.getTotalFiles() if data.getTotalFiles() else 0.0
- f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % avg_size)
- try:
- avg_revisions = data.getAverageRevisionsPerFile()
- f.write('<dt>Average revisions per file</dt><dd>%.2f</dd>' % avg_revisions)
- except AttributeError:
- pass
- f.write('</dl>\n')
-
- # Files :: File count by date
- f.write(html_header(2, 'File count by date'))
-
- # use set to get rid of duplicate/unnecessary entries
- files_by_date = set()
- for stamp in sorted(data.files_by_stamp.keys()):
- files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
-
- fg = open(path + '/files_by_date.dat', 'w')
- for line in sorted(list(files_by_date)):
- fg.write('%s\n' % line)
- #for stamp in sorted(data.files_by_stamp.keys()):
- # fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
- fg.close()
-
- f.write('<img src="files_by_date.png" alt="Files by Date">')
-
- #f.write('<h2>Average file size by date</h2>')
-
- # Files :: Extensions
- f.write(html_header(2, 'Extensions'))
- f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
- for ext in sorted(data.extensions.keys()):
- files = data.extensions[ext]['files']
- lines = data.extensions[ext]['lines']
- loc_percentage = (100.0 * lines) / data.getTotalLOC() if data.getTotalLOC() else 0.0
- files_percentage = (100.0 * files) / data.getTotalFiles() if data.getTotalFiles() else 0.0
- lines_per_file = (lines // files) if files else 0
- f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, files_percentage, lines, loc_percentage, lines_per_file))
- f.write('</table>')
-
- # SLOC Breakdown by Extension
- f.write(html_header(2, 'Source Lines of Code (SLOC) Breakdown'))
- f.write('<table class="sortable" id="sloc"><tr><th>Extension</th><th>Source Lines (%)</th><th>Comment Lines (%)</th><th>Blank Lines (%)</th><th>Total Lines</th></tr>')
- sloc_data = data.getSLOCByExtension()
- for ext in sorted(sloc_data.keys()):
- if sloc_data[ext]['total'] == 0:
- continue
- source = sloc_data[ext]['source']
- comments = sloc_data[ext]['comments']
- blank = sloc_data[ext]['blank']
- total = sloc_data[ext]['total']
- source_pct = (100.0 * source / total) if total else 0.0
- comment_pct = (100.0 * comments / total) if total else 0.0
- blank_pct = (100.0 * blank / total) if total else 0.0
- f.write('<tr><td>%s</td><td>%d (%.1f%%)</td><td>%d (%.1f%%)</td><td>%d (%.1f%%)</td><td>%d</td></tr>' %
- (ext, source, source_pct, comments, comment_pct, blank, blank_pct, total))
- f.write('</table>')
-
- # Largest Files
- try:
- largest_files = data.getLargestFiles(15)
- if largest_files:
- f.write(html_header(2, 'Largest Files'))
- f.write('<table class="sortable" id="largest_files"><tr><th>File</th><th>Size (bytes)</th><th>Size (KB)</th></tr>')
- for filepath, size in largest_files:
- size_kb = size / 1024.0
- f.write('<tr><td>%s</td><td>%d</td><td>%.1f</td></tr>' % (filepath, size, size_kb))
- f.write('</table>')
- except (AttributeError, TypeError):
- pass
-
- # Files with Most Revisions (Hotspots)
- try:
- hotspot_files = data.getFilesWithMostRevisions(15)
- if hotspot_files:
- f.write(html_header(2, 'Files with Most Revisions (Hotspots)'))
- f.write('<table class="sortable" id="hotspot_files"><tr><th>File</th><th>Revisions</th><th>% of Total Commits</th></tr>')
- total_commits = data.getTotalCommits()
- for filepath, revisions in hotspot_files:
- revision_pct = (100.0 * revisions / total_commits) if total_commits else 0.0
- f.write('<tr><td>%s</td><td>%d</td><td>%.2f%%</td></tr>' % (filepath, revisions, revision_pct))
- f.write('</table>')
- except (AttributeError, TypeError):
- pass
-
- # Directory Activity
- try:
- active_directories = data.getDirectoriesByActivity(15)
- if active_directories:
- f.write(html_header(2, 'Most Active Directories'))
- f.write('<table class="sortable" id="active_directories"><tr><th>Directory</th><th>Total Lines Changed</th><th>Lines Added</th><th>Lines Removed</th><th>Files</th></tr>')
- for directory, total_lines, lines_added, lines_removed, file_count in active_directories:
- directory_display = directory if directory != '.' else '(root)'
- f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (directory_display, total_lines, lines_added, lines_removed, file_count))
- f.write('</table>')
- except (AttributeError, TypeError):
- pass
-
- f.write('</body></html>')
- f.close()
-
- ###
- # Lines
- f = open(path + '/lines.html', 'w')
- self.printHeader(f)
- f.write('<h1>Lines</h1>')
- self.printNav(f)
-
- f.write('<dl>\n')
- f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
- f.write('<dt>Source lines</dt><dd>%d (%.1f%%)</dd>' % (
- data.getTotalSourceLines(),
- (100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
- ))
- f.write('<dt>Comment lines</dt><dd>%d (%.1f%%)</dd>' % (
- data.getTotalCommentLines(),
- (100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
- ))
- f.write('<dt>Blank lines</dt><dd>%d (%.1f%%)</dd>' % (
- data.getTotalBlankLines(),
- (100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
- ))
- f.write('</dl>\n')
-
- f.write(html_header(2, 'Lines of Code'))
- f.write('<p>This chart shows the total lines of code over time, including source code, comments, and blank lines.</p>')
- f.write('<img src="lines_of_code.png" alt="Lines of Code">')
-
- fg = open(path + '/lines_of_code.dat', 'w')
- for stamp in sorted(data.changes_by_date.keys()):
- fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
- fg.close()
-
- # Add SLOC composition chart data
- f.write(html_header(2, 'Source Lines of Code (SLOC) Composition'))
- f.write('<p>Breakdown of code composition by file type and content type:</p>')
- sloc_data = data.getSLOCByExtension()
- if sloc_data:
- f.write('<table class="sortable" id="sloc_breakdown">')
- f.write('<tr><th>Extension</th><th>Source Lines</th><th>Comment Lines</th><th>Blank Lines</th><th>Total</th><th>Source %</th><th>Comment %</th></tr>')
-
- sorted_sloc = sorted(sloc_data.items(), key=lambda x: x[1]['total'], reverse=True)
- for ext, sloc_info in sorted_sloc[:15]: # Top 15 extensions
- if sloc_info['total'] == 0:
- continue
-
- ext_display = ext if ext else '(no extension)'
- source_pct = (100.0 * sloc_info['source'] / sloc_info['total']) if sloc_info['total'] else 0.0
- comment_pct = (100.0 * sloc_info['comments'] / sloc_info['total']) if sloc_info['total'] else 0.0
-
- f.write('<tr>')
- f.write('<td>%s</td>' % ext_display)
- f.write('<td>%d</td>' % sloc_info['source'])
- f.write('<td>%d</td>' % sloc_info['comments'])
- f.write('<td>%d</td>' % sloc_info['blank'])
- f.write('<td>%d</td>' % sloc_info['total'])
- f.write('<td>%.1f%%</td>' % source_pct)
- f.write('<td>%.1f%%</td>' % comment_pct)
- f.write('</tr>')
-
- f.write('</table>')
- else:
- f.write('<p>No SLOC data available.</p>')
-
- f.write('</body></html>')
- f.close()
-
- ###
- # tags.html
- f = open(path + '/tags.html', 'w')
- self.printHeader(f)
- f.write('<h1>Tags</h1>')
- self.printNav(f)
-
- f.write('<dl>')
- f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
- if len(data.tags) > 0:
- f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
- f.write('</dl>')
-
- f.write('<table class="tags">')
- f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
- # sort the tags by date desc
- tags_sorted_by_date_desc = list(map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items())))))
- for tag in tags_sorted_by_date_desc:
- authorinfo = []
- self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
- for i in reversed(self.authors_by_commits):
- authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
- f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
- f.write('</table>')
-
- f.write('</body></html>')
- f.close()
-
- self.createGraphs(path)
-
- def _generateAssessment(self, performance, patterns):
- """Generate a text assessment for an author based on their performance metrics."""
- efficiency = performance.get('efficiency_score', 0)
- consistency = performance.get('consistency', 0)
- leadership = performance.get('leadership_score', 0)
- contribution = performance.get('contribution_percentage', 0)
-
- small_commits_ratio = patterns.get('small_commits', 0) / max(patterns.get('total_commits', 1), 1)
- large_commits_ratio = patterns.get('large_commits', 0) / max(patterns.get('total_commits', 1), 1)
-
- assessments = []
-
- # Contribution level
- if contribution > 25:
- assessments.append("Major Contributor")
- elif contribution > 10:
- assessments.append("Regular Contributor")
- elif contribution > 2:
- assessments.append("Minor Contributor")
- else:
- assessments.append("Occasional Contributor")
-
- # Quality assessment
- if efficiency > 80:
- assessments.append("High Quality")
- elif efficiency > 60:
- assessments.append("Good Quality")
- elif efficiency > 40:
- assessments.append("Average Quality")
- else:
- assessments.append("Needs Improvement")
-
- # Work pattern assessment
- if small_commits_ratio > 0.7:
- assessments.append("Frequent Small Commits")
- elif large_commits_ratio > 0.3:
- assessments.append("Prefers Large Commits")
-
- if consistency > 80:
- assessments.append("Very Consistent")
- elif consistency > 60:
- assessments.append("Consistent")
-
- if leadership > 70:
- assessments.append("Leadership Role")
- elif leadership > 50:
- assessments.append("Collaborative")
-
- return ", ".join(assessments) if assessments else "Standard Contributor"
-
- def createGraphs(self, path):
- print('Generating graphs...')
-
- # hour of day
- f = open(path + '/hour_of_day.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'hour_of_day.png'
- unset key
- set xrange [0.5:24.5]
- set yrange [0:]
- set xtics 4
- set grid y
- set ylabel "Commits"
- plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
- """)
- f.close()
-
- # day of week
- f = open(path + '/day_of_week.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'day_of_week.png'
- unset key
- set xrange [0.5:7.5]
- set yrange [0:]
- set xtics 1
- set grid y
- set ylabel "Commits"
- plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
- """)
- f.close()
-
- # Domains
- f = open(path + '/domains.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'domains.png'
- unset key
- unset xtics
- set yrange [0:]
- set grid y
- set ylabel "Commits"
- plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
- """)
- f.close()
-
- # Month of Year
- f = open(path + '/month_of_year.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'month_of_year.png'
- unset key
- set xrange [0.5:12.5]
- set yrange [0:]
- set xtics 1
- set grid y
- set ylabel "Commits"
- plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
- """)
- f.close()
-
- # commits_by_year_month
- f = open(path + '/commits_by_year_month.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'commits_by_year_month.png'
- unset key
- set yrange [0:]
- set xdata time
- set timefmt "%Y-%m"
- set format x "%Y-%m"
- set xtics rotate
- set bmargin 5
- set grid y
- set ylabel "Commits"
- plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
- """)
- f.close()
-
- # commits_by_year
- f = open(path + '/commits_by_year.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'commits_by_year.png'
- unset key
- set yrange [0:]
- set xtics 1 rotate
- set grid y
- set ylabel "Commits"
- set yrange [0:]
- plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
- """)
- f.close()
-
- # Files by date
- f = open(path + '/files_by_date.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'files_by_date.png'
- unset key
- set yrange [0:]
- set xdata time
- set timefmt "%Y-%m-%d"
- set format x "%Y-%m-%d"
- set grid y
- set ylabel "Files"
- set xtics rotate
- set ytics autofreq
- set bmargin 6
- plot 'files_by_date.dat' using 1:2 w steps
- """)
- f.close()
-
- # Lines of Code
- f = open(path + '/lines_of_code.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'lines_of_code.png'
- unset key
- set yrange [0:]
- set xdata time
- set timefmt "%s"
- set format x "%Y-%m-%d"
- set grid y
- set ylabel "Lines"
- set xtics rotate
- set bmargin 6
- plot 'lines_of_code.dat' using 1:2 w lines
- """)
- f.close()
-
- # Lines of Code Added per author
- f = open(path + '/lines_of_code_by_author.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set terminal png transparent size 640,480
- set output 'lines_of_code_by_author.png'
- set key left top
- set yrange [0:]
- set xdata time
- set timefmt "%s"
- set format x "%Y-%m-%d"
- set grid y
- set ylabel "Lines"
- set xtics rotate
- set bmargin 6
- plot """
- )
- i = 1
- plots = []
- for a in self.authors_to_plot:
- i = i + 1
- author = a.replace("\"", "\\\"").replace("`", "")
- plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
- f.write(", ".join(plots))
- f.write('\n')
-
- f.close()
-
- # Commits per author
- f = open(path + '/commits_by_author.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set terminal png transparent size 640,480
- set output 'commits_by_author.png'
- set key left top
- set yrange [0:]
- set xdata time
- set timefmt "%s"
- set format x "%Y-%m-%d"
- set grid y
- set ylabel "Commits"
- set xtics rotate
- set bmargin 6
- plot """
- )
- i = 1
- plots = []
- for a in self.authors_to_plot:
- i = i + 1
- author = a.replace("\"", "\\\"").replace("`", "")
- plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
- f.write(", ".join(plots))
- f.write('\n')
-
- f.close()
-
- # Pace of Changes plot
- f = open(path + '/pace_of_changes.plot', 'w')
- f.write(GNUPLOT_COMMON)
- f.write(
- """
- set output 'pace_of_changes.png'
- unset key
- set yrange [0:]
- set xdata time
- set timefmt "%s"
- set format x "%Y-%m-%d"
- set grid y
- set ylabel "Line Changes (Additions + Deletions)"
- set xtics rotate
- set bmargin 6
- plot 'pace_of_changes.dat' using 1:2 w lines lw 2
- """)
- f.close()
-
- os.chdir(path)
- files = glob.glob(path + '/*.plot')
- for f in files:
- out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
- if len(out) > 0:
- print(out)
-
- def printHeader(self, f, title = ''):
- f.write(
- """<!DOCTYPE html>
- <html>
- <head>
- <meta charset="UTF-8">
- <title>GitStats - %s</title>
- <link rel="stylesheet" href="%s" type="text/css">
- <meta name="generator" content="GitStats %s">
- <script type="text/javascript" src="sortable.js"></script>
- </head>
- <body>
- """ % (self.title, conf['style'], getversion()))
-
- def printNav(self, f):
- f.write("""
- <div class="nav">
- <ul>
- <li><a href="index.html">General</a></li>
- <li><a href="activity.html">Activity</a></li>
- <li><a href="authors.html">Authors</a></li>
- <li><a href="team_analysis.html">Team Analysis</a></li>
- <li><a href="branches.html">Branches</a></li>
- <li><a href="files.html">Files</a></li>
- <li><a href="lines.html">Lines</a></li>
- <li><a href="tags.html">Tags</a></li>
- </ul>
- </div>
- """)
-
- class PDFReportCreator(ReportCreator):
- """Creates PDF reports using fpdf2 library with embedded charts and tab-based structure."""
-
- def __init__(self):
- ReportCreator.__init__(self)
- self.pdf = None
- self.output_path = None
- # Define color schemes for better visual appeal
- self.colors = {
- 'header': (41, 128, 185), # Blue
- 'text': (0, 0, 0), # Black
- 'table_header': (52, 152, 219), # Light blue
- 'table_alt': (245, 245, 245) # Light gray
- }
-
- def _set_color(self, color_type='text', fill=False):
- """Set text or fill color using predefined color scheme."""
- if color_type in self.colors:
- r, g, b = self.colors[color_type]
- if fill:
- self.pdf.set_fill_color(r, g, b)
- else:
- self.pdf.set_text_color(r, g, b)
-
- def _add_section_header(self, title, level=1):
- """Add a standardized section header with consistent formatting."""
- # Add some space before header
- self.pdf.ln(h=10)
-
- # Set header color and font
- self._set_color('header')
- if level == 1:
- self.pdf.set_font('helvetica', 'B', 20)
- height = 15
- elif level == 2:
- self.pdf.set_font('helvetica', 'B', 16)
- height = 12
- else:
- self.pdf.set_font('helvetica', 'B', 14)
- height = 10
-
- # Add the header
- self.pdf.cell(0, height, title, 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Reset color to text
- self._set_color('text')
- self.pdf.ln(h=5) # Small gap after header
-
- def _create_table_header(self, headers, widths=None, font_size=9):
- """Create a standardized table header with consistent formatting."""
- if widths is None:
- # Auto-calculate widths if not provided
- total_width = 180 # Reasonable default
- widths = [total_width // len(headers)] * len(headers)
-
- # Set header styling
- self._set_color('table_header')
- self._set_color('table_header', fill=True)
- self.pdf.set_font('helvetica', 'B', font_size)
-
- # Create header cells
- for i, (header, width) in enumerate(zip(headers, widths)):
- is_last = (i == len(headers) - 1)
- new_x = XPos.LMARGIN if is_last else XPos.RIGHT
- new_y = YPos.NEXT if is_last else YPos.TOP
-
- self.pdf.cell(width, 8, str(header), 1,
- new_x=new_x, new_y=new_y, align='C', fill=True)
-
- # Reset styling for table content
- self._set_color('text')
- self.pdf.set_font('helvetica', '', font_size - 1)
-
- def _create_table_row(self, values, widths, alternate_row=False, font_size=8):
- """Create a table row with optional alternating background."""
- if alternate_row:
- self._set_color('table_alt', fill=True)
-
- for i, (value, width) in enumerate(zip(values, widths)):
- is_last = (i == len(values) - 1)
- new_x = XPos.LMARGIN if is_last else XPos.RIGHT
- new_y = YPos.NEXT if is_last else YPos.TOP
-
- # Truncate long values to fit
- str_value = str(value)
- if len(str_value) > width // 3: # Rough character width estimation
- str_value = str_value[:width//3-2] + '...'
-
- self.pdf.cell(width, 6, str_value, 1,
- new_x=new_x, new_y=new_y, align='C', fill=alternate_row)
-
- def create(self, data, path):
- ReportCreator.create(self, data, path)
- self.title = data.projectname
- self.output_path = path
-
- # Initialize PDF document with fpdf2 features
- self.pdf = FPDF()
- self.pdf.set_auto_page_break(auto=True, margin=15)
-
- # Set metadata for better PDF properties
- self.pdf.set_title(f"GitStats Report - {data.projectname}")
- self.pdf.set_author("GitStats")
- self.pdf.set_subject(f"Git repository analysis for {data.projectname}")
- self.pdf.set_creator("GitStats with fpdf2")
- self.pdf.set_keywords("git,statistics,analysis,repository")
-
- # Create all pages (tabs)
- self._create_title_page(data)
- self._create_general_page(data)
- self._create_activity_page(data)
- self._create_authors_page(data)
- self._create_team_analysis_page(data)
- self._create_files_page(data)
- self._create_lines_page(data)
- self._create_tags_page(data)
- self._create_branches_page(data)
-
- # Save PDF with fpdf2's enhanced output method
- pdf_path = os.path.join(path, f"gitstats_{data.projectname.replace(' ', '_')}.pdf")
-
- # Use fpdf2's output method with proper file handling
- try:
- self.pdf.output(pdf_path)
- print(f"PDF report saved to: {pdf_path}")
- # Verify file was created and has content
- if os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0:
- print(f"PDF file size: {os.path.getsize(pdf_path)} bytes")
- else:
- print("Warning: PDF file was not created properly")
- except Exception as e:
- print(f"Error saving PDF: {e}")
- raise
-
- def _add_chart_if_exists(self, chart_filename, width=None, height=None):
- """Add a chart image to the PDF if it exists, with improved fpdf2 handling."""
- chart_path = os.path.join(self.output_path, chart_filename)
- if os.path.exists(chart_path):
- try:
- # Get current position
- x = self.pdf.get_x()
- y = self.pdf.get_y()
-
- # Calculate dimensions with better defaults
- if width is None:
- width = 150 # Default width
- if height is None:
- height = 80 # Default height
-
- # Get page dimensions for better space calculation
- page_width = self.pdf.w
- page_height = self.pdf.h
- margin = 15 # Same as auto_page_break margin
-
- # Check if there's enough space on current page
- if y + height > (page_height - margin):
- self.pdf.add_page()
- x = self.pdf.get_x()
- y = self.pdf.get_y()
-
- # Add image with fpdf2's enhanced image handling
- # fpdf2 automatically handles different image formats
- self.pdf.image(chart_path, x=x, y=y, w=width, h=height)
-
- # Move cursor below image with better spacing
- self.pdf.set_y(y + height + 8) # Increased spacing for better layout
-
- return True
- except Exception as e:
- print(f"Warning: Could not add chart {chart_filename}: {e}")
- return False
- return False
-
- def _create_title_page(self, data):
- """Create the title page of the PDF report."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 24)
- self.pdf.cell(0, 20, f'GitStats Report - {data.projectname}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- self.pdf.ln(h=10)
- self.pdf.set_font('helvetica', '', 12)
- format = '%Y-%m-%d %H:%M:%S'
-
- # Report generation info
- self.pdf.cell(0, 10, f'Generated: {datetime.datetime.now().strftime(format)}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
- self.pdf.cell(0, 10, f'Generator: GitStats (version {getversion()})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
- self.pdf.cell(0, 10, f'Git Version: {getgitversion()}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
- if getgnuplotversion():
- self.pdf.cell(0, 10, f'Gnuplot Version: {getgnuplotversion()}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
-
- self.pdf.ln(h=10)
- self.pdf.cell(0, 10, f'Report Period: {data.getFirstCommitDate().strftime(format)} to {data.getLastCommitDate().strftime(format)}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
-
- # Table of contents
- self.pdf.ln(h=15)
- self.pdf.set_font('helvetica', 'B', 16)
- self.pdf.cell(0, 10, 'Table of Contents', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 12)
- sections = [
- '1. General Statistics',
- '2. Activity Statistics',
- '3. Authors Statistics',
- '4. Team Analysis',
- '5. Files Statistics',
- '6. Lines of Code Statistics',
- '7. Tags Statistics',
- '8. Branches Statistics'
- ]
-
- for section in sections:
- self.pdf.cell(0, 8, section, 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- def _create_general_page(self, data):
- """Create the general statistics page (mirrors index.html)."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 20)
- self.pdf.cell(0, 15, '1. General Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 12)
-
- # Calculate basic stats
- total_commits = data.getTotalCommits()
- total_active_days = len(data.getActiveDays()) if hasattr(data, 'getActiveDays') else 0
- delta_days = data.getCommitDeltaDays() if hasattr(data, 'getCommitDeltaDays') else 0
- total_authors = data.getTotalAuthors()
-
- # General statistics (matching index.html exactly)
- stats = [
- ('Project name', data.projectname),
- ('Generated', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
- ('Report Period', f"{data.getFirstCommitDate().strftime('%Y-%m-%d %H:%M:%S')} to {data.getLastCommitDate().strftime('%Y-%m-%d %H:%M:%S')}"),
- ('Age', f"{delta_days} days, {total_active_days} active days ({(100.0 * total_active_days / delta_days) if delta_days else 0.0:.2f}%)"),
- ('Total Files', str(data.getTotalFiles())),
- ('Total Lines of Code', f"{data.getTotalLOC()} ({data.total_lines_added} added, {data.total_lines_removed} removed)"),
- ('Source Lines of Code', f"{data.getTotalSourceLines()} ({(100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
- ('Comment Lines', f"{data.getTotalCommentLines()} ({(100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
- ('Blank Lines', f"{data.getTotalBlankLines()} ({(100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
- ('Total Commits', f"{total_commits} (average {(float(total_commits) / total_active_days) if total_active_days else 0.0:.1f} commits per active day, {(float(total_commits) / delta_days) if delta_days else 0.0:.1f} per all days)"),
- ('Authors', f"{total_authors} (average {(float(total_commits) / total_authors) if total_authors else 0.0:.1f} commits per author)"),
- ('Total Branches', str(len(data.getBranches()))),
- ('Unmerged Branches', str(len(data.getUnmergedBranches()))),
- ('Main Branch', data.main_branch if hasattr(data, 'main_branch') else 'N/A')
- ]
-
- # Display stats
- for label, value in stats:
- self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=10)
-
- def _create_activity_page(self, data):
- """Create the activity statistics page with charts (mirrors activity.html)."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 20)
- self.pdf.cell(0, 15, '2. Activity Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Weekly activity section
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Weekly Activity', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self.pdf.set_font('helvetica', '', 10)
- self.pdf.cell(0, 6, 'Last 32 weeks activity (see chart below)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self.pdf.ln(h=5)
-
- # Hour of Day section
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Hour of Day', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 10)
- hour_of_day = data.getActivityByHourOfDay()
- total_commits = data.getTotalCommits()
-
- # Create hour of day table
- self.pdf.set_font('helvetica', 'B', 8)
- self.pdf.cell(20, 6, 'Hour', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- for h in range(0, 24):
- self.pdf.cell(7, 6, str(h), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.ln()
-
- self.pdf.cell(20, 6, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- for h in range(0, 24):
- commits = hour_of_day.get(h, 0)
- self.pdf.cell(7, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.ln()
-
- self.pdf.cell(20, 6, '%', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- for h in range(0, 24):
- commits = hour_of_day.get(h, 0)
- percent = (100.0 * commits / total_commits) if total_commits else 0.0
- self.pdf.cell(7, 6, f"{percent:.1f}", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.ln(h=10)
-
- # Add hour of day chart
- self._add_chart_if_exists('hour_of_day.png', 180, 90)
-
- # Day of Week section
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Day of Week', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 10)
- day_of_week = data.getActivityByDayOfWeek()
-
- # Create day of week table
- self.pdf.set_font('helvetica', 'B', 10)
- self.pdf.cell(30, 8, 'Day', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, 'Total (%)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- self.pdf.set_font('helvetica', '', 10)
- for d in range(0, 7):
- day_name = WEEKDAYS[d]
- commits = day_of_week.get(d, 0)
- percent = (100.0 * commits / total_commits) if total_commits else 0.0
- self.pdf.cell(30, 6, day_name, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(30, 6, f"{commits} ({percent:.2f}%)", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=5)
- self._add_chart_if_exists('day_of_week.png', 180, 90)
-
- # Month of Year section
- if hasattr(data, 'activity_by_month_of_year'):
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Month of Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', 'B', 10)
- self.pdf.cell(30, 8, 'Month', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(40, 8, 'Commits (%)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- self.pdf.set_font('helvetica', '', 10)
- for mm in range(1, 13):
- commits = data.activity_by_month_of_year.get(mm, 0)
- percent = (100.0 * commits / total_commits) if total_commits else 0.0
- self.pdf.cell(30, 6, str(mm), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(40, 6, f"{commits} ({percent:.2f} %)", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=5)
- self._add_chart_if_exists('month_of_year.png', 180, 90)
-
- # Add page break for next major chart
- if self.pdf.get_y() > 200:
- self.pdf.add_page()
-
- # Commits by year/month chart
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Commits by Year/Month', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self._add_chart_if_exists('commits_by_year_month.png', 180, 100)
-
- # Commits by year chart
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Commits by Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self._add_chart_if_exists('commits_by_year.png', 180, 100)
-
- def _create_authors_page(self, data):
- """Create the authors statistics page with charts (mirrors authors.html)."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 20)
- self.pdf.cell(0, 15, '3. Authors Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # List of Authors table
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'List of Authors', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- authors = data.getAuthors(conf['max_authors'])
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 8)
- self.pdf.cell(35, 6, 'Author', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 6, 'Commits (%)', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(15, 6, '+ lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(15, 6, '- lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, 'First commit', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, 'Last commit', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 6, 'Age', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(15, 6, 'Active days', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data
- self.pdf.set_font('helvetica', '', 7)
- for author in authors[:20]: # Top 20 authors
- info = data.getAuthorInfo(author)
-
- # Truncate long author names
- display_author = author[:18] + "..." if len(author) > 21 else author
-
- self.pdf.cell(35, 5, display_author, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(20, 5, f"{info['commits']} ({info['commits_frac']:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(15, 5, str(info['lines_added']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(15, 5, str(info['lines_removed']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 5, info['date_first'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 5, info['date_last'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
-
- # Calculate age
- try:
- age_days = (datetime.datetime.strptime(info['date_last'][:10], '%Y-%m-%d') -
- datetime.datetime.strptime(info['date_first'][:10], '%Y-%m-%d')).days
- age_text = f"{age_days} days" if age_days > 0 else "1 day"
- except:
- age_text = "N/A"
-
- active_days = len(info.get('active_days', [0])) if 'active_days' in info else 1
-
- self.pdf.cell(20, 5, age_text[:12], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(15, 5, str(active_days), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- self.pdf.ln(h=10)
-
- # Lines of code by author chart
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Cumulated Added Lines of Code per Author', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self._add_chart_if_exists('lines_of_code_by_author.png', 180, 110)
-
- # Commits per author chart
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Commits per Author', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self._add_chart_if_exists('commits_by_author.png', 180, 110)
-
- # Commits by domains chart
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Commits by Domains', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self._add_chart_if_exists('domains.png', 180, 100)
-
- def _create_team_analysis_page(self, data):
- """Create the team analysis page for comprehensive team evaluation (new feature)."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 20)
- self.pdf.cell(0, 15, '4. Team Analysis', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Team Overview
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Team Overview', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 12)
- total_authors = data.getTotalAuthors()
- work_distribution = data.getTeamWorkDistribution()
-
- self.pdf.cell(50, 8, 'Total Team Members:', 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(0, 8, str(total_authors), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Calculate work distribution metrics
- commit_contributions = [dist['commit_percentage'] for dist in work_distribution.values()]
- if commit_contributions:
- max_contrib = max(commit_contributions)
- min_contrib = min(commit_contributions)
- avg_contrib = sum(commit_contributions) / len(commit_contributions)
-
- self.pdf.cell(50, 8, 'Work Distribution:', 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(0, 8, f'Max: {max_contrib:.1f}%, Min: {min_contrib:.1f}%, Avg: {avg_contrib:.1f}%', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=10)
-
- # Team Performance Rankings
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Team Performance Rankings', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Top Contributors
- contrib_ranking = data.getAuthorsByContribution()
- efficiency_ranking = data.getAuthorsByEfficiency()
-
- self.pdf.set_font('helvetica', 'B', 12)
- self.pdf.cell(0, 8, 'Top 10 Contributors (by commit percentage):', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self.pdf.set_font('helvetica', '', 10)
-
- for i, (author, percentage) in enumerate(contrib_ranking[:10], 1):
- display_author = author[:30] + "..." if len(author) > 33 else author
- self.pdf.cell(0, 6, f'{i}. {display_author} ({percentage:.1f}%)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=5)
-
- # Team Performance Table
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Detailed Performance Analysis', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- team_performance = data.getTeamPerformance()
- commit_patterns = data.getCommitPatterns()
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 8)
- self.pdf.cell(35, 6, 'Author', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 6, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 6, 'Contrib %', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, 'Efficiency', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, 'Consistency', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, 'Leadership', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, 'Overall', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data - show top 15 performers
- self.pdf.set_font('helvetica', '', 7)
- sorted_authors = sorted(team_performance.items(), key=lambda x: x[1].get('overall_score', 0), reverse=True)
-
- for author, perf in sorted_authors[:15]:
- author_info = data.getAuthorInfo(author)
-
- commits = author_info.get('commits', 0)
- contrib_pct = perf.get('contribution_percentage', 0)
- efficiency = perf.get('efficiency_score', 0)
- consistency = perf.get('consistency', 0)
- leadership = perf.get('leadership_score', 0)
- overall = perf.get('overall_score', 0)
-
- # Truncate long author names
- display_author = author[:18] + "..." if len(author) > 21 else author
-
- self.pdf.cell(35, 5, display_author, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(20, 5, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 5, f'{contrib_pct:.1f}%', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 5, f'{efficiency:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 5, f'{consistency:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 5, f'{leadership:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 5, f'{overall:.1f}', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- self.pdf.ln(h=10)
-
- # Team Assessment Conclusion
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Team Assessment Conclusion', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 10)
-
- # Generate team insights
- top_contributor = contrib_ranking[0] if contrib_ranking else ("N/A", 0)
- most_efficient = efficiency_ranking[0] if efficiency_ranking else ("N/A", 0)
-
- self.pdf.cell(0, 6, f'- Top contributor: {top_contributor[0]} ({top_contributor[1]:.1f}% of commits)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self.pdf.cell(0, 6, f'- Most efficient developer: {most_efficient[0]} (score: {most_efficient[1]:.1f})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self.pdf.cell(0, 6, f'- Team size: {total_authors} active contributors', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Work distribution assessment
- if commit_contributions:
- gini_coefficient = self._calculate_gini_coefficient(commit_contributions)
- if gini_coefficient < 0.3:
- distribution_assessment = "Well-distributed (very balanced team)"
- elif gini_coefficient < 0.5:
- distribution_assessment = "Moderately distributed (some imbalance)"
- else:
- distribution_assessment = "Highly concentrated (few dominant contributors)"
-
- self.pdf.cell(0, 6, f'- Work distribution: {distribution_assessment}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- def _calculate_gini_coefficient(self, values):
- """Calculate Gini coefficient for work distribution analysis."""
- if not values:
- return 0
-
- sorted_values = sorted(values)
- n = len(sorted_values)
- cumsum = sum(sorted_values)
-
- if cumsum == 0:
- return 0
-
- sum_of_differences = 0
- for i in range(n):
- for j in range(n):
- sum_of_differences += abs(sorted_values[i] - sorted_values[j])
-
- gini = sum_of_differences / (2 * n * cumsum)
- return gini
-
- def _create_files_page(self, data):
- """Create the files statistics page with charts (mirrors files.html)."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 20)
- self.pdf.cell(0, 15, '5. Files Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Basic file stats
- total_files = data.getTotalFiles()
- total_loc = data.getTotalLOC()
-
- self.pdf.set_font('helvetica', '', 12)
- stats = [
- ('Total files', str(total_files)),
- ('Total lines', str(total_loc)),
- ]
-
- try:
- avg_size = data.getAverageFileSize()
- stats.append(('Average file size', f"{avg_size:.2f} bytes"))
- except (AttributeError, ZeroDivisionError):
- # Fallback to old calculation if new method fails
- avg_size = float(data.getTotalSize()) / total_files if total_files else 0.0
- stats.append(('Average file size', f"{avg_size:.2f} bytes"))
-
- try:
- avg_revisions = data.getAverageRevisionsPerFile()
- stats.append(('Average revisions per file', f"{avg_revisions:.2f}"))
- except AttributeError:
- pass
-
- for label, value in stats:
- self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=10)
-
- # File extensions
- if hasattr(data, 'extensions') and data.extensions:
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'File Extensions', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 9)
- self.pdf.cell(25, 8, 'Extension', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 8, 'Files', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 8, '% Files', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 8, 'Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 8, '% Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 8, 'Lines/File', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data - show top extensions
- self.pdf.set_font('helvetica', '', 8)
- sorted_extensions = sorted(data.extensions.items(),
- key=lambda x: x[1]['files'], reverse=True)[:15]
-
- for ext, ext_data in sorted_extensions:
- files = ext_data['files']
- lines = ext_data['lines']
- loc_percentage = (100.0 * lines / total_loc) if total_loc else 0.0
- files_percentage = (100.0 * files / total_files) if total_files else 0.0
- lines_per_file = (lines // files) if files else 0
-
- display_ext = ext if ext else '(no ext)'
-
- self.pdf.cell(25, 6, display_ext[:12], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(20, 6, str(files), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 6, f"{files_percentage:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, str(lines), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 6, f"{loc_percentage:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, str(lines_per_file), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- self.pdf.ln(h=10)
-
- # SLOC Breakdown by Extension
- sloc_data = data.getSLOCByExtension()
- if sloc_data:
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Source Lines of Code (SLOC) Breakdown', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 8)
- self.pdf.cell(20, 8, 'Extension', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 8, 'Source Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 8, 'Comment Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 8, 'Blank Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 8, 'Total', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data
- self.pdf.set_font('helvetica', '', 7)
- sorted_sloc = sorted(sloc_data.items(),
- key=lambda x: x[1]['total'], reverse=True)[:15]
-
- for ext, sloc_info in sorted_sloc:
- if sloc_info['total'] == 0:
- continue
-
- display_ext = ext if ext else '(no ext)'
- source_pct = (100.0 * sloc_info['source'] / sloc_info['total']) if sloc_info['total'] else 0.0
- comment_pct = (100.0 * sloc_info['comments'] / sloc_info['total']) if sloc_info['total'] else 0.0
- blank_pct = (100.0 * sloc_info['blank'] / sloc_info['total']) if sloc_info['total'] else 0.0
-
- self.pdf.cell(20, 5, display_ext[:8], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(25, 5, f"{sloc_info['source']} ({source_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 5, f"{sloc_info['comments']} ({comment_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 5, f"{sloc_info['blank']} ({blank_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 5, str(sloc_info['total']), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- self.pdf.ln(h=10)
-
- # Add new file statistics tables
- try:
- # Largest Files
- largest_files = data.getLargestFiles(10)
- if largest_files:
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Largest Files', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 9)
- self.pdf.cell(80, 8, 'File', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, 'Size (bytes)', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, 'Size (KB)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data
- self.pdf.set_font('helvetica', '', 8)
- for filepath, size in largest_files:
- size_kb = size / 1024.0
- display_path = filepath[:40] + '...' if len(filepath) > 40 else filepath
- self.pdf.cell(80, 6, display_path, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(30, 6, str(size), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 6, f"{size_kb:.1f}", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
- except (AttributeError, TypeError):
- pass
-
- try:
- # Files with Most Revisions (Hotspots)
- hotspot_files = data.getFilesWithMostRevisions(10)
- if hotspot_files:
- self.pdf.ln(h=10)
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Files with Most Revisions (Hotspots)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 9)
- self.pdf.cell(80, 8, 'File', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, 'Revisions', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, '% of Commits', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data
- self.pdf.set_font('helvetica', '', 8)
- total_commits = data.getTotalCommits()
- for filepath, revisions in hotspot_files:
- revision_pct = (100.0 * revisions / total_commits) if total_commits else 0.0
- display_path = filepath[:40] + '...' if len(filepath) > 40 else filepath
- self.pdf.cell(80, 6, display_path, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(30, 6, str(revisions), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 6, f"{revision_pct:.2f}%", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
- except (AttributeError, TypeError):
- pass
-
- self.pdf.ln(h=10)
-
- # Files by date chart
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Files by Date', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self._add_chart_if_exists('files_by_date.png', 180, 100)
-
- def _create_lines_page(self, data):
- """Create the lines of code statistics page with charts (mirrors lines.html)."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 20)
- self.pdf.cell(0, 15, '6. Lines of Code Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Basic line stats
- self.pdf.set_font('helvetica', '', 12)
- stats = [
- ('Total lines', str(data.getTotalLOC())),
- ('Lines added', str(data.total_lines_added)),
- ('Lines removed', str(data.total_lines_removed)),
- ('Net lines', str(data.total_lines_added - data.total_lines_removed)),
- ]
-
- for label, value in stats:
- self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=10)
-
- # Lines by year
- if hasattr(data, 'commits_by_year') and data.commits_by_year:
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Activity by Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 10)
- self.pdf.cell(25, 8, 'Year', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, '% of Total', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(35, 8, 'Lines Added', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(35, 8, 'Lines Removed', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data
- self.pdf.set_font('helvetica', '', 9)
- total_commits = data.getTotalCommits()
-
- for yy in sorted(data.commits_by_year.keys(), reverse=True):
- commits = data.commits_by_year.get(yy, 0)
- percent = (100.0 * commits / total_commits) if total_commits else 0.0
- lines_added = data.lines_added_by_year.get(yy, 0) if hasattr(data, 'lines_added_by_year') else 0
- lines_removed = data.lines_removed_by_year.get(yy, 0) if hasattr(data, 'lines_removed_by_year') else 0
-
- self.pdf.cell(25, 6, str(yy), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 6, f"{percent:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(35, 6, str(lines_added), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(35, 6, str(lines_removed), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- self.pdf.ln(h=10)
-
- # Lines of code chart
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Lines of Code Over Time', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self._add_chart_if_exists('lines_of_code.png', 180, 100)
-
- def _create_tags_page(self, data):
- """Create the tags statistics page (mirrors tags.html)."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 20)
- self.pdf.cell(0, 15, '7. Tags Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 12)
-
- if not hasattr(data, 'tags') or not data.tags:
- self.pdf.cell(0, 10, 'No tags found in repository.', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- return
-
- # Basic tag stats
- total_tags = len(data.tags)
- avg_commits_per_tag = (1.0 * data.getTotalCommits() / total_tags) if total_tags else 0.0
-
- stats = [
- ('Total tags', str(total_tags)),
- ('Average commits per tag', f"{avg_commits_per_tag:.2f}"),
- ]
-
- for label, value in stats:
- self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=10)
-
- # Tags table
- if hasattr(data, 'tags') and data.tags:
- self.pdf.set_font('helvetica', 'B', 12)
- self.pdf.cell(0, 10, 'List of Tags', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 10)
- self.pdf.cell(40, 8, 'Tag', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, 'Date', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(50, 8, 'Author', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data
- self.pdf.set_font('helvetica', '', 9)
- tag_list = sorted(data.tags.items(), key=lambda x: x[1]['date'], reverse=True)
-
- for tag, tag_data in tag_list[:20]: # Show top 20 tags
- self.pdf.cell(40, 6, tag[:20], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(30, 6, tag_data.get('date', 'N/A')[:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 6, str(tag_data.get('commits', 0)), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- author = tag_data.get('author', 'N/A')[:25]
- self.pdf.cell(50, 6, author, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Tags table
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, 'Recent Tags', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 10)
- self.pdf.cell(40, 8, 'Tag Name', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(30, 8, 'Date', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(80, 8, 'Top Authors', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Sort tags by date (most recent first)
- tags_sorted_by_date_desc = list(map(lambda el : el[1],
- reversed(sorted(map(lambda el : (el[1]['date'], el[0]),
- data.tags.items())))))
-
- # Show up to 20 most recent tags
- self.pdf.set_font('helvetica', '', 8)
- for tag in tags_sorted_by_date_desc[:20]:
- tag_info = data.tags[tag]
-
- # Get top authors for this tag
- if 'authors' in tag_info:
- authors = sorted(tag_info['authors'].items(),
- key=lambda x: x[1], reverse=True)[:3]
- author_list = ', '.join([f"{author}({commits})" for author, commits in authors])
- else:
- author_list = ''
-
- # Truncate long names
- display_tag = tag[:18] + "..." if len(tag) > 21 else tag
- display_authors = author_list[:35] + "..." if len(author_list) > 38 else author_list
-
- self.pdf.cell(40, 6, display_tag, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(30, 6, tag_info['date'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, str(tag_info['commits']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(80, 6, display_authors, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- def _create_branches_page(self, data):
- """Create the branches statistics page (mirrors branches.html)."""
- self.pdf.add_page()
- self.pdf.set_font('helvetica', 'B', 20)
- self.pdf.cell(0, 15, '8. Branches Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 12)
-
- if not hasattr(data, 'branches') or not data.branches:
- self.pdf.cell(0, 10, 'No branches found in repository.', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- return
-
- # Basic branch stats
- total_branches = len(data.getBranches())
- unmerged_branches = data.getUnmergedBranches()
- total_unmerged = len(unmerged_branches)
- main_branch = data.main_branch if hasattr(data, 'main_branch') else 'N/A'
-
- stats = [
- ('Total branches', str(total_branches)),
- ('Unmerged branches', str(total_unmerged)),
- ('Main branch', main_branch),
- ]
-
- for label, value in stats:
- self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=10)
-
- # Branches summary table
- self.pdf.set_font('helvetica', 'B', 12)
- self.pdf.cell(0, 10, 'All Branches', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Table header
- self.pdf.set_font('helvetica', 'B', 9)
- self.pdf.cell(35, 8, 'Branch Name', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 8, 'Status', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 8, 'Lines Added', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 8, 'Lines Removed', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 8, 'Authors', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(45, 8, 'First Author', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
-
- # Table data - sort by commits descending
- self.pdf.set_font('helvetica', '', 8)
- branches_sorted = sorted(data.branches.items(),
- key=lambda x: x[1].get('commits', 0), reverse=True)
-
- for branch_name, branch_data in branches_sorted:
- # Determine status
- status = 'Unmerged' if branch_name in [b for b in unmerged_branches] else 'Merged'
-
- # Get branch statistics
- commits = branch_data.get('commits', 0)
- lines_added = branch_data.get('lines_added', 0)
- lines_removed = branch_data.get('lines_removed', 0)
- authors_count = len(branch_data.get('authors', {}))
-
- # Get first/main author
- authors = branch_data.get('authors', {})
- if authors:
- first_author = max(authors.items(), key=lambda x: x[1])[0]
- first_author = first_author[:20] + "..." if len(first_author) > 23 else first_author
- else:
- first_author = 'N/A'
-
- # Truncate branch name if too long
- display_branch = branch_name[:18] + "..." if len(branch_name) > 21 else branch_name
-
- self.pdf.cell(35, 6, display_branch, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
- self.pdf.cell(20, 6, status, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, str(lines_added), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(25, 6, str(lines_removed), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(20, 6, str(authors_count), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
- self.pdf.cell(45, 6, first_author, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Unmerged branches detail section
- if total_unmerged > 0:
- self.pdf.ln(h=10)
- self.pdf.set_font('helvetica', 'B', 14)
- self.pdf.cell(0, 10, f'Unmerged Branches Details ({total_unmerged})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 10)
- for branch_name in unmerged_branches:
- if branch_name in data.branches:
- branch_data = data.branches[branch_name]
-
- self.pdf.set_font('helvetica', 'B', 10)
- self.pdf.cell(0, 8, f"Branch: {branch_name}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.set_font('helvetica', '', 9)
- self.pdf.cell(20, 6, f" Commits: {branch_data.get('commits', 0)}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
- self.pdf.cell(20, 6, f" Lines: +{branch_data.get('lines_added', 0)} -{branch_data.get('lines_removed', 0)}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- # Show authors
- authors = branch_data.get('authors', {})
- if authors:
- author_list = sorted(authors.items(), key=lambda x: x[1], reverse=True)
- author_str = ', '.join([f"{author}({commits})" for author, commits in author_list[:3]])
- if len(author_list) > 3:
- author_str += f" and {len(author_list) - 3} more"
- self.pdf.cell(20, 6, f" Authors: {author_str}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
-
- self.pdf.ln(h=2)
-
-
-
- def is_git_repository(path):
- """Check if a directory is a valid git repository."""
- if not os.path.exists(path) or not os.path.isdir(path):
- return False
- git_dir = os.path.join(path, '.git')
- return os.path.exists(git_dir)
-
- def discover_repositories(scan_path):
- """Discover all git repositories in a directory.
-
- Returns a list of tuples: (repo_name, repo_path)
- where repo_name matches the regex pattern and repo_path is the full path.
- """
- repositories = []
- if not os.path.exists(scan_path) or not os.path.isdir(scan_path):
- return repositories
-
- try:
- for item in os.listdir(scan_path):
- item_path = os.path.join(scan_path, item)
- if os.path.isdir(item_path) and is_git_repository(item_path):
- # Use directory name as repository name
- repo_name = item
- repositories.append((repo_name, item_path))
- if conf['verbose']:
- print(f' Found repository: {repo_name} at {item_path}')
- except (PermissionError, OSError) as e:
- print(f'Warning: Could not scan directory {scan_path}: {e}')
-
- return repositories
-
- def usage():
- print("""
- Usage: gitstats [options] <gitpath..> <outputpath>
- gitstats [options] --multi-repo <scan-folder> <outputpath>
-
- Options:
- -c key=value Override configuration value
- --debug Enable debug output
- --verbose Enable verbose output
- --multi-repo Scan folder for multiple repositories and generate reports for each
- -h, --help Show this help message
-
- Note: GitStats always generates both HTML and PDF reports.
-
- Examples:
- gitstats repo output # Generates both HTML and PDF reports
- gitstats --verbose repo output # With verbose output
- gitstats --multi-repo /path/to/repos output # Generate reports for all repos in folder
- gitstats --debug -c max_authors=50 repo output
-
- With --multi-repo mode:
- - Scans the specified folder for git repositories
- - Creates a report for each repository in a subfolder named <reponame>_report
- - Only processes directories that are valid git repositories
-
- Default config values:
- %s
-
- Please see the manual page for more details.
- """ % conf)
-
-
- class GitStats:
- def run(self, args_orig):
- multi_repo_mode = False
- optlist, args = getopt.getopt(args_orig, 'hc:', ["help", "debug", "verbose", "multi-repo"])
- for o,v in optlist:
- if o == '-c':
- if '=' not in v:
- print(f'FATAL: Invalid configuration format. Use key=value: {v}')
- sys.exit(1)
- key, value = v.split('=', 1)
- if key not in conf:
- raise KeyError('no such key "%s" in config' % key)
-
- # Validate configuration values
- try:
- if isinstance(conf[key], int):
- new_value = int(value)
- if key in ['max_authors', 'max_domains'] and new_value < 1:
- print(f'FATAL: {key} must be a positive integer, got: {new_value}')
- sys.exit(1)
- conf[key] = new_value
- elif isinstance(conf[key], bool):
- conf[key] = value.lower() in ('true', '1', 'yes', 'on')
- else:
- conf[key] = value
- except ValueError as e:
- print(f'FATAL: Invalid value for {key}: {value} ({e})')
- sys.exit(1)
- elif o == '--debug':
- conf['debug'] = True
- conf['verbose'] = True # Debug implies verbose
- elif o == '--verbose':
- conf['verbose'] = True
- elif o == '--multi-repo':
- multi_repo_mode = True
- elif o in ('-h', '--help'):
- usage()
- sys.exit()
-
- if multi_repo_mode:
- if len(args) != 2:
- print('FATAL: --multi-repo requires exactly two arguments: <scan-folder> <outputpath>')
- usage()
- sys.exit(1)
-
- scan_folder = os.path.abspath(args[0])
- outputpath = os.path.abspath(args[1])
-
- # Validate scan folder
- if not os.path.exists(scan_folder):
- print(f'FATAL: Scan folder does not exist: {scan_folder}')
- sys.exit(1)
- if not os.path.isdir(scan_folder):
- print(f'FATAL: Scan folder is not a directory: {scan_folder}')
- sys.exit(1)
-
- # Discover repositories
- print(f'Scanning folder for git repositories: {scan_folder}')
- repositories = discover_repositories(scan_folder)
-
- if not repositories:
- print(f'No git repositories found in: {scan_folder}')
- sys.exit(0)
-
- print(f'Found {len(repositories)} git repositories:')
- for repo_name, repo_path in repositories:
- print(f' - {repo_name}')
-
- # Generate reports for each repository
- self.run_multi_repo(repositories, outputpath)
- else:
- # Original single/multiple repository mode
- if len(args) < 2:
- usage()
- sys.exit(0)
-
- self.run_single_mode(args)
-
- def run_multi_repo(self, repositories, base_outputpath):
- """Generate reports for multiple repositories."""
- rundir = os.getcwd()
-
- # Validate and create base output directory
- try:
- os.makedirs(base_outputpath, exist_ok=True)
- except PermissionError:
- print(f'FATAL: Permission denied creating output directory: {base_outputpath}')
- sys.exit(1)
- except OSError as e:
- print(f'FATAL: Error creating output directory {base_outputpath}: {e}')
- sys.exit(1)
-
- if not os.path.isdir(base_outputpath):
- print('FATAL: Output path is not a directory or does not exist')
- sys.exit(1)
-
- # Check write permissions
- if not os.access(base_outputpath, os.W_OK):
- print(f'FATAL: No write permission for output directory: {base_outputpath}')
- sys.exit(1)
-
- if not getgnuplotversion():
- print('gnuplot not found')
- sys.exit(1)
-
- if conf['verbose']:
- print('Configuration:')
- for key, value in conf.items():
- print(f' {key}: {value}')
- print()
-
- print(f'Base output path: {base_outputpath}')
-
- successful_reports = 0
- failed_reports = []
-
- for repo_name, repo_path in repositories:
- print(f'\n{"="*60}')
- print(f'Processing repository: {repo_name}')
- print(f'Repository path: {repo_path}')
-
- # Create repository-specific output directory with pattern: repositoryname_report
- repo_output_path = os.path.join(base_outputpath, f'{repo_name}_report')
-
- try:
- os.makedirs(repo_output_path, exist_ok=True)
- print(f'Report output path: {repo_output_path}')
-
- # Process this repository
- self.process_single_repository(repo_path, repo_output_path, rundir)
- successful_reports += 1
- print(f'✓ Successfully generated report for {repo_name}')
-
- except Exception as e:
- failed_reports.append((repo_name, str(e)))
- print(f'✗ Failed to generate report for {repo_name}: {e}')
- if conf['debug']:
- import traceback
- traceback.print_exc()
-
- # Summary
- print(f'\n{"="*60}')
- print(f'Multi-repository report generation complete!')
- print(f'Successfully processed: {successful_reports}/{len(repositories)} repositories')
-
- if failed_reports:
- print(f'\nFailed repositories:')
- for repo_name, error in failed_reports:
- print(f' - {repo_name}: {error}')
-
- if successful_reports > 0:
- print(f'\nReports generated in: {base_outputpath}')
- print('Repository reports:')
- for repo_name, repo_path in repositories:
- if (repo_name, f'Error processing {repo_name}') not in failed_reports:
- report_path = os.path.join(base_outputpath, f'{repo_name}_report')
- print(f' - {repo_name}: {report_path}/index.html')
-
- def run_single_mode(self, args):
- """Original single/multiple repository mode."""
- outputpath = os.path.abspath(args[-1])
- rundir = os.getcwd()
-
- # Validate git paths
- git_paths = args[0:-1]
- for gitpath in git_paths:
- if not os.path.exists(gitpath):
- print(f'FATAL: Git repository path does not exist: {gitpath}')
- sys.exit(1)
- if not os.path.isdir(gitpath):
- print(f'FATAL: Git repository path is not a directory: {gitpath}')
- sys.exit(1)
- git_dir = os.path.join(gitpath, '.git')
- if not os.path.exists(git_dir):
- print(f'FATAL: Path is not a git repository (no .git directory found): {gitpath}')
- sys.exit(1)
-
- # Validate and create output directory
- try:
- os.makedirs(outputpath, exist_ok=True)
- except PermissionError:
- print(f'FATAL: Permission denied creating output directory: {outputpath}')
- sys.exit(1)
- except OSError as e:
- print(f'FATAL: Error creating output directory {outputpath}: {e}')
- sys.exit(1)
-
- if not os.path.isdir(outputpath):
- print('FATAL: Output path is not a directory or does not exist')
- sys.exit(1)
-
- # Check write permissions
- if not os.access(outputpath, os.W_OK):
- print(f'FATAL: No write permission for output directory: {outputpath}')
- sys.exit(1)
-
- if not getgnuplotversion():
- print('gnuplot not found')
- sys.exit(1)
-
- if conf['verbose']:
- print('Configuration:')
- for key, value in conf.items():
- print(f' {key}: {value}')
- print()
-
- print('Output path: %s' % outputpath)
- cachefile = os.path.join(outputpath, 'gitstats.cache')
-
- data = GitDataCollector()
- data.loadCache(cachefile)
-
- for gitpath in git_paths:
- print('Git path: %s' % gitpath)
-
- prevdir = os.getcwd()
- os.chdir(gitpath)
-
- print('Collecting data...')
- data.collect(gitpath)
-
- os.chdir(prevdir)
-
- print('Refining data...')
- data.saveCache(cachefile)
- data.refine()
-
- os.chdir(rundir)
-
- print('Generating report...')
-
- # Always generate both HTML and PDF reports
- print('Creating HTML report...')
- html_report = HTMLReportCreator()
- html_report.create(data, outputpath)
-
- print('Creating PDF report...')
- pdf_report = PDFReportCreator()
- pdf_report.create(data, outputpath)
-
- time_end = time.time()
- exectime_internal = time_end - time_start
- external_percentage = (100.0 * exectime_external) / exectime_internal if exectime_internal > 0 else 0.0
- print('Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, external_percentage))
-
- if sys.stdin.isatty():
- print('You may now run:')
- print()
- print(' sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''"))
- pdf_filename = f"gitstats_{data.projectname.replace(' ', '_')}.pdf"
- print(' PDF report: \'%s\'' % os.path.join(outputpath, pdf_filename).replace("'", "'\\''"))
- print()
-
- def process_single_repository(self, repo_path, output_path, rundir):
- """Process a single repository and generate its report."""
- cachefile = os.path.join(output_path, 'gitstats.cache')
-
- data = GitDataCollector()
- data.loadCache(cachefile)
-
- print(f' Collecting data from: {repo_path}')
-
- prevdir = os.getcwd()
- os.chdir(repo_path)
-
- data.collect(repo_path)
- os.chdir(prevdir)
-
- print(' Refining data...')
- data.saveCache(cachefile)
- data.refine()
-
- os.chdir(rundir)
-
- print(' Generating report...')
-
- # Always generate both HTML and PDF reports
- print(' Creating HTML report...')
- html_report = HTMLReportCreator()
- html_report.create(data, output_path)
-
- print(' Creating PDF report...')
- pdf_report = PDFReportCreator()
- pdf_report.create(data, output_path)
-
- print(f' Report generated in: {output_path}')
-
- if __name__=='__main__':
- try:
- g = GitStats()
- g.run(sys.argv[1:])
- except KeyboardInterrupt:
- print('\nInterrupted by user')
- sys.exit(1)
- except KeyError as e:
- print(f'FATAL: Configuration error: {e}')
- sys.exit(1)
- except Exception as e:
- print(f'FATAL: Unexpected error: {e}')
- if conf.get('debug', False):
- import traceback
- traceback.print_exc()
- sys.exit(1)
|