hoxu
/
gitstats
zrcadlo https://github.com/hoxu/gitstats


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589
							import datetime
import getopt
import glob
import os
import pickle
import platform
import re
import shutil
import subprocess
import sys
import time
import zlib
from collections import defaultdict
from fpdf import FPDF
from fpdf.enums import XPos, YPos

if sys.version_info < (3, 6):
	print("Python 3.6 or higher is required for gitstats", file=sys.stderr)
	sys.exit(1)

from multiprocessing import Pool

os.environ['LC_ALL'] = 'C'

GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
ON_LINUX = (platform.system() == 'Linux')
WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')

exectime_internal = 0.0
exectime_external = 0.0
time_start = time.time()

# By default, gnuplot is searched from path, but can be overridden with the
# environment variable "GNUPLOT"
gnuplot_cmd = 'gnuplot'
if 'GNUPLOT' in os.environ:
	gnuplot_cmd = os.environ['GNUPLOT']

conf = {
	'max_domains': 10,
	'max_ext_length': 10,
	'style': 'gitstats.css',
	'max_authors': 20,
	'authors_top': 5,
	'commit_begin': '',
	'commit_end': 'HEAD',
	'linear_linestats': 1,
	'project_name': '',
	'processes': 8,
	'start_date': '',
	'debug': False,
	'verbose': False
}

def getpipeoutput(cmds, quiet = False):
	global exectime_external
	start = time.time()
	
	# Basic input validation to prevent command injection
	for cmd in cmds:
		if not isinstance(cmd, str):
			raise TypeError("Commands must be strings")
		# Check for obvious command injection attempts
		if any(dangerous in cmd for dangerous in [';', '&&', '||', '`', '$(']):
			print(f'Warning: Potentially dangerous command detected: {cmd}')
	
	if (not quiet and ON_LINUX and os.isatty(1)) or conf['verbose']:
		print('>> ' + ' | '.join(cmds), end='')
		sys.stdout.flush()
	p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
	processes=[p]
	for x in cmds[1:]:
		p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True)
		processes.append(p)
	output = p.communicate()[0]
	for p in processes:
		p.wait()
	end = time.time()
	if not quiet or conf['verbose'] or conf['debug']:
		if ON_LINUX and os.isatty(1):
			print('\r', end='')
		print('[%.5f] >> %s' % (end - start, ' | '.join(cmds)))
	if conf['debug']:
		print(f'DEBUG: Command output ({len(output)} bytes): {output[:200].decode("utf-8", errors="replace")}...')
	exectime_external += (end - start)
	return output.decode('utf-8', errors='replace').rstrip('\n')

def getlogrange(defaultrange = 'HEAD', end_only = True):
	commit_range = getcommitrange(defaultrange, end_only)
	if len(conf['start_date']) > 0:
		return '--since="%s" "%s"' % (conf['start_date'], commit_range)
	return commit_range

def getcommitrange(defaultrange = 'HEAD', end_only = False):
	if len(conf['commit_end']) > 0:
		if end_only or len(conf['commit_begin']) == 0:
			return conf['commit_end']
		return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
	return defaultrange

def getkeyssortedbyvalues(dict):
	return list(map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items()))))

# dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
def getkeyssortedbyvaluekey(d, key):
	return list(map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys()))))

def getstatsummarycounts(line):
	numbers = re.findall(r'\d+', line)
	if   len(numbers) == 1:
		# neither insertions nor deletions: may probably only happen for "0 files changed"
		numbers.append(0);
		numbers.append(0);
	elif len(numbers) == 2 and line.find('(+)') != -1:
		numbers.append(0);    # only insertions were printed on line
	elif len(numbers) == 2 and line.find('(-)') != -1:
		numbers.insert(1, 0); # only deletions were printed on line
	return numbers

VERSION = 0
def getversion():
	global VERSION
	if VERSION == 0:
		gitstats_repo = os.path.dirname(os.path.abspath(__file__))
		VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" %
			(gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])])
	return VERSION

def getgitversion():
	return getpipeoutput(['git --version']).split('\n')[0]

def getgnuplotversion():
	return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]

def getnumoffilesfromrev(time_rev):
	"""
	Get number of files changed in commit
	"""
	time, rev = time_rev
	return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))

def getnumoflinesinblob(ext_blob):
	"""
	Get number of lines in blob
	"""
	ext, blob_id = ext_blob
	return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0]))

def analyzesloc(ext_blob):
	"""
	Analyze source lines of code vs comments vs blank lines in a blob
	Returns (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines)
	"""
	ext, blob_id = ext_blob
	content = getpipeoutput(['git cat-file blob %s' % blob_id])
	
	total_lines = 0
	source_lines = 0
	comment_lines = 0
	blank_lines = 0
	
	# Define comment patterns for different file types
	comment_patterns = {
		'.py': [r'^\s*#', r'^\s*"""', r'^\s*\'\'\''],
		'.js': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
		'.ts': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
		'.java': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
		'.cpp': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
		'.c': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
		'.h': [r'^\s*//', r'^\s*/\*', r'^\s*\*'],
		'.css': [r'^\s*/\*', r'^\s*\*'],
		'.html': [r'^\s*<!--', r'^\s*<!\-\-'],
		'.xml': [r'^\s*<!--', r'^\s*<!\-\-'],
		'.sh': [r'^\s*#'],
		'.rb': [r'^\s*#'],
		'.pl': [r'^\s*#'],
		'.php': [r'^\s*//', r'^\s*/\*', r'^\s*\*', r'^\s*#'],
	}
	
	import re
	patterns = comment_patterns.get(ext, [])
	
	for line in content.split('\n'):
		total_lines += 1
		line_stripped = line.strip()
		
		if not line_stripped:
			blank_lines += 1
		elif any(re.match(pattern, line) for pattern in patterns):
			comment_lines += 1
		else:
			source_lines += 1
	
	return (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines)

class DataCollector:
	"""Manages data collection from a revision control repository."""
	def __init__(self):
		self.stamp_created = time.time()
		self.cache = {}
		self.total_authors = 0
		self.activity_by_hour_of_day = defaultdict(int) # hour -> commits
		self.activity_by_day_of_week = defaultdict(int) # day -> commits
		self.activity_by_month_of_year = defaultdict(int) # month [1-12] -> commits
		self.activity_by_hour_of_week = defaultdict(lambda: defaultdict(int)) # weekday -> hour -> commits
		self.activity_by_hour_of_day_busiest = 0
		self.activity_by_hour_of_week_busiest = 0
		self.activity_by_year_week = defaultdict(int) # yy_wNN -> commits
		self.activity_by_year_week_peak = 0

		self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}

		self.total_commits = 0
		self.total_files = 0
		self.authors_by_commits = 0

		# domains
		self.domains = defaultdict(lambda: defaultdict(int)) # domain -> commits

		# author of the month
		self.author_of_month = defaultdict(lambda: defaultdict(int)) # month -> author -> commits
		self.author_of_year = defaultdict(lambda: defaultdict(int)) # year -> author -> commits
		self.commits_by_month = defaultdict(int) # month -> commits
		self.commits_by_year = defaultdict(int) # year -> commits
		self.lines_added_by_month = defaultdict(int) # month -> lines added
		self.lines_added_by_year = defaultdict(int) # year -> lines added
		self.lines_removed_by_month = defaultdict(int) # month -> lines removed
		self.lines_removed_by_year = defaultdict(int) # year -> lines removed
		self.first_commit_stamp = 0
		self.last_commit_stamp = 0
		self.last_active_day = None
		self.active_days = set()

		# lines
		self.total_lines = 0
		self.total_lines_added = 0
		self.total_lines_removed = 0
		
		# SLOC (Source Lines of Code) analysis
		self.total_source_lines = 0
		self.total_comment_lines = 0
		self.total_blank_lines = 0
		self.sloc_by_extension = {} # ext -> {'source': 0, 'comments': 0, 'blank': 0, 'total': 0}
		
		# File size and revision tracking
		self.file_sizes = {} # filepath -> size in bytes
		self.file_revisions = {} # filepath -> revision count

		# Directory activity tracking
		self.directories = defaultdict(lambda: {'commits': 0, 'lines_added': 0, 'lines_removed': 0, 'files': set()})
		self.directory_revisions = defaultdict(int) # directory -> total file revisions in directory

		# size
		self.total_size = 0

		# timezone
		self.commits_by_timezone = defaultdict(int) # timezone -> commits

		# tags
		self.tags = {}

		self.files_by_stamp = {} # stamp -> files

		# extensions
		self.extensions = {} # extension -> files, lines

		# line statistics
		self.changes_by_date = {} # stamp -> { files, ins, del }
		
		# Pace of Changes tracking (number of line changes happening over time)
		self.pace_of_changes = {} # stamp -> total_line_changes (ins + del)
		
		# Last 30 days activity
		self.last_30_days_commits = 0
		self.last_30_days_lines_added = 0
		self.last_30_days_lines_removed = 0
		
		# Last 12 months activity  
		self.last_12_months_commits = defaultdict(int) # month -> commits
		self.last_12_months_lines_added = defaultdict(int) # month -> lines added
		self.last_12_months_lines_removed = defaultdict(int) # month -> lines removed
		
		# Repository size tracking
		self.repository_size_mb = 0.0
		
		# Branch analysis
		self.branches = {} # branch_name -> {'commits': 0, 'lines_added': 0, 'lines_removed': 0, 'authors': {}, 'is_merged': True, 'merge_base': '', 'unique_commits': []}
		self.unmerged_branches = [] # list of branch names that are not merged into main branch
		self.main_branch = 'master' # will be detected automatically
		
		# Team collaboration analysis
		self.author_collaboration = {} # author -> {'worked_with': {other_author: shared_files}, 'file_ownership': {file: change_count}}
		self.commit_patterns = {} # author -> {'avg_commit_size': lines, 'small_commits': count, 'large_commits': count, 'commit_frequency': commits_per_day}
		self.working_patterns = {} # author -> {'night_commits': count, 'weekend_commits': count, 'peak_hours': [hours], 'timezone_pattern': {tz: count}}
		self.impact_analysis = {} # author -> {'critical_files': [files], 'impact_score': score, 'bug_potential': score}
		self.team_performance = {} # author -> {'efficiency_score': score, 'consistency': score, 'leadership_score': score}
		
		# File importance tracking
		self.critical_files = set() # Files that are likely critical (main.py, app.py, index.html, etc.)
		self.file_impact_scores = {} # file -> impact_score based on how often it's changed and by whom
		
		# Time-based analysis
		self.commits_by_time_of_day = defaultdict(lambda: defaultdict(int)) # author -> hour -> commits
		self.commits_by_day_of_week = defaultdict(lambda: defaultdict(int)) # author -> day -> commits
		self.author_active_periods = {} # author -> {'active_days': set, 'longest_streak': days, 'avg_gap': days}
		
		# Quality indicators
		self.potential_bug_commits = [] # List of commits that might indicate bugs (reverts, fixes, etc.)
		self.refactoring_commits = [] # List of commits that appear to be refactoring
		self.feature_commits = [] # List of commits that appear to add features

	##
	# This should be the main function to extract data from the repository.
	def collect(self, dir):
		self.dir = dir
		if len(conf['project_name']) == 0:
			self.projectname = os.path.basename(os.path.abspath(dir))
		else:
			self.projectname = conf['project_name']
	
	##
	# Load cacheable data
	def loadCache(self, cachefile):
		if not os.path.exists(cachefile):
			return
		print('Loading cache...')
		try:
			with open(cachefile, 'rb') as f:
				try:
					self.cache = pickle.loads(zlib.decompress(f.read()))
				except (zlib.error, pickle.PickleError) as e:
					# temporary hack to upgrade non-compressed caches
					try:
						f.seek(0)
						self.cache = pickle.load(f)
					except (pickle.PickleError, EOFError) as e2:
						print(f'Warning: Failed to load cache file {cachefile}: {e2}')
						self.cache = {}
				except Exception as e:
					print(f'Warning: Unexpected error loading cache file {cachefile}: {e}')
					self.cache = {}
		except IOError as e:
			print(f'Warning: Could not open cache file {cachefile}: {e}')
			self.cache = {}
	
	##
	# Produce any additional statistics from the extracted data.
	def refine(self):
		pass

	##
	# : get a dictionary of author
	def getAuthorInfo(self, author):
		return None
	
	def getActivityByDayOfWeek(self):
		return {}

	def getActivityByHourOfDay(self):
		return {}

	# : get a dictionary of domains
	def getDomainInfo(self, domain):
		return None

	##
	# Get a list of authors
	def getAuthors(self):
		return []
	
	def getFirstCommitDate(self):
		return datetime.datetime.now()
	
	def getLastCommitDate(self):
		return datetime.datetime.now()
	
	def getStampCreated(self):
		return self.stamp_created
	
	def getTags(self):
		return []
	
	def getTotalAuthors(self):
		return -1
	
	def getTotalCommits(self):
		return -1
		
	def getTotalFiles(self):
		return -1
	
	def getTotalLOC(self):
		return -1
	
	##
	# Save cacheable data
	def saveCache(self, cachefile):
		print('Saving cache...')
		tempfile = cachefile + '.tmp'
		try:
			with open(tempfile, 'wb') as f:
				#pickle.dump(self.cache, f)
				data = zlib.compress(pickle.dumps(self.cache))
				f.write(data)
			try:
				os.remove(cachefile)
			except OSError:
				pass
			os.rename(tempfile, cachefile)
		except IOError as e:
			print(f'Warning: Could not save cache file {cachefile}: {e}')
			# Clean up temp file if it exists
			try:
				os.remove(tempfile)
			except OSError:
				pass

class GitDataCollector(DataCollector):
	def collect(self, dir):
		DataCollector.collect(self, dir)

		self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
		#self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))

		# Clear tags for each repository to avoid multirepo contamination
		if not hasattr(self, '_first_repo'):
			self._first_repo = True
		else:
			# For subsequent repos, clear tags to avoid mixing
			self.tags = {}

		# tags
		lines = getpipeoutput(['git show-ref --tags']).split('\n')
		for line in lines:
			if len(line) == 0:
				continue
			(hash, tag) = line.split(' ')

			tag = tag.replace('refs/tags/', '')
			output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
			if len(output) > 0:
				parts = output.split(' ')
				stamp = 0
				try:
					stamp = int(parts[0])
				except ValueError:
					stamp = 0
				self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }

		# collect info on tags, starting from latest
		tags_sorted_by_date_desc = list(map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items())))))
		prev = None
		for tag in reversed(tags_sorted_by_date_desc):
			cmd = 'git shortlog -s "%s"' % tag
			if prev != None:
				cmd += ' "^%s"' % prev
			output = getpipeoutput([cmd])
			if len(output) == 0:
				continue
			prev = tag
			for line in output.split('\n'):
				parts = re.split(r'\s+', line, maxsplit=2)
				commits = int(parts[1])
				author = parts[2]
				self.tags[tag]['commits'] += commits
				self.tags[tag]['authors'][author] = commits

		# Collect revision statistics
		# Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
		lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
		for line in lines:
			parts = line.split(' ', 4)
			author = ''
			try:
				stamp = int(parts[0])
			except ValueError:
				stamp = 0
			timezone = parts[3]
			author, mail = parts[4].split('<', 1)
			author = author.rstrip()
			mail = mail.rstrip('>')
			domain = '?'
			if mail.find('@') != -1:
				domain = mail.rsplit('@', 1)[1]
			date = datetime.datetime.fromtimestamp(float(stamp))

			# First and last commit stamp (may be in any order because of cherry-picking and patches)
			if stamp > self.last_commit_stamp:
				self.last_commit_stamp = stamp
			if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
				self.first_commit_stamp = stamp

			# activity
			# hour
			hour = date.hour
			self.activity_by_hour_of_day[hour] += 1
			# most active hour?
			if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
				self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]

			# day of week
			day = date.weekday()
			self.activity_by_day_of_week[day] += 1

			# domain stats
			if domain not in self.domains:
				self.domains[domain] = defaultdict(int)
			# commits
			self.domains[domain]['commits'] += 1

			# hour of week  
			self.activity_by_hour_of_week[day][hour] += 1
			# most active hour?
			if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
				self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]

			# month of year
			month = date.month
			self.activity_by_month_of_year[month] += 1

			# yearly/weekly activity
			yyw = date.strftime('%Y-%W')
			self.activity_by_year_week[yyw] += 1
			if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
				self.activity_by_year_week_peak = self.activity_by_year_week[yyw]

			# author stats
			if author not in self.authors:
				self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
			# commits, note again that commits may be in any date order because of cherry-picking and patches
			if 'last_commit_stamp' not in self.authors[author]:
				self.authors[author]['last_commit_stamp'] = stamp
			if stamp > self.authors[author]['last_commit_stamp']:
				self.authors[author]['last_commit_stamp'] = stamp
			if 'first_commit_stamp' not in self.authors[author]:
				self.authors[author]['first_commit_stamp'] = stamp
			if stamp < self.authors[author]['first_commit_stamp']:
				self.authors[author]['first_commit_stamp'] = stamp

			# author of the month/year
			yymm = date.strftime('%Y-%m')
			self.author_of_month[yymm][author] += 1
			self.commits_by_month[yymm] += 1

			yy = date.year
			self.author_of_year[yy][author] += 1
			self.commits_by_year[yy] += 1

			# authors: active days
			yymmdd = date.strftime('%Y-%m-%d')
			if 'last_active_day' not in self.authors[author]:
				self.authors[author]['last_active_day'] = yymmdd
				self.authors[author]['active_days'] = set([yymmdd])
			elif yymmdd != self.authors[author]['last_active_day']:
				self.authors[author]['last_active_day'] = yymmdd
				self.authors[author]['active_days'].add(yymmdd)

			# project: active days
			if yymmdd != self.last_active_day:
				self.last_active_day = yymmdd
				self.active_days.add(yymmdd)

			# timezone
			self.commits_by_timezone[timezone] += 1

		# outputs "<stamp> <files>" for each revision
		revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
		lines = []
		revs_to_read = []
		time_rev_count = []
		#Look up rev in cache and take info from cache if found
		#If not append rev to list of rev to read from repo
		for revline in revlines:
			time, rev = revline.split(' ')
			#if cache empty then add time and rev to list of new rev's
			#otherwise try to read needed info from cache
			if 'files_in_tree' not in self.cache:
				revs_to_read.append((time,rev))
				continue
			if rev in self.cache['files_in_tree']:
				lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
			else:
				revs_to_read.append((time,rev))

		#Read revisions from repo
		pool = Pool(processes=conf['processes'])
		time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
		pool.terminate()
		pool.join()

		#Update cache with new revisions and append then to general list
		for (time, rev, count) in time_rev_count:
			if 'files_in_tree' not in self.cache:
				self.cache['files_in_tree'] = {}
			self.cache['files_in_tree'][rev] = count
			lines.append('%d %d' % (int(time), count))

		self.total_commits += len(lines)
		for line in lines:
			parts = line.split(' ')
			if len(parts) != 2:
				continue
			(stamp, files) = parts[0:2]
			try:
				self.files_by_stamp[int(stamp)] = int(files)
			except ValueError:
				print('Warning: failed to parse line "%s"' % line)

		# extensions and size of files
		lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
		blobs_to_read = []
		for line in lines:
			if len(line) == 0:
				continue
			parts = re.split(r'\s+', line, maxsplit=4)
			if parts[0] == '160000' and parts[3] == '-':
				# skip submodules
				continue
			blob_id = parts[2]
			size = int(parts[3])
			fullpath = parts[4]

			self.total_size += size
			self.total_files += 1
			
			# Track individual file sizes
			self.file_sizes[fullpath] = size

			filename = fullpath.split('/')[-1] # strip directories
			if filename.find('.') == -1 or filename.rfind('.') == 0:
				ext = ''
			else:
				ext = filename[(filename.rfind('.') + 1):]
			if len(ext) > conf['max_ext_length']:
				ext = ''
			if ext not in self.extensions:
				self.extensions[ext] = {'files': 0, 'lines': 0}
			self.extensions[ext]['files'] += 1
			#if cache empty then add ext and blob id to list of new blob's
			#otherwise try to read needed info from cache
			if 'lines_in_blob' not in self.cache:
				blobs_to_read.append((ext,blob_id))
				continue
			if blob_id in self.cache['lines_in_blob']:
				self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
			else:
				blobs_to_read.append((ext,blob_id))

		#Get info abount line count for new blob's that wasn't found in cache
		pool = Pool(processes=conf['processes'])
		ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
		pool.terminate()
		pool.join()

		# Also get SLOC analysis for the same blobs
		pool = Pool(processes=conf['processes'])
		ext_blob_sloc = pool.map(analyzesloc, blobs_to_read)
		pool.terminate()
		pool.join()

		#Update cache and write down info about number of number of lines
		for (ext, blob_id, linecount) in ext_blob_linecount:
			if 'lines_in_blob' not in self.cache:
				self.cache['lines_in_blob'] = {}
			self.cache['lines_in_blob'][blob_id] = linecount
			self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]

		# Update SLOC statistics
		for (ext, blob_id, total_lines, source_lines, comment_lines, blank_lines) in ext_blob_sloc:
			# Initialize extension SLOC tracking
			if ext not in self.sloc_by_extension:
				self.sloc_by_extension[ext] = {'source': 0, 'comments': 0, 'blank': 0, 'total': 0}
			
			# Update extension SLOC counts
			self.sloc_by_extension[ext]['source'] += source_lines
			self.sloc_by_extension[ext]['comments'] += comment_lines
			self.sloc_by_extension[ext]['blank'] += blank_lines
			self.sloc_by_extension[ext]['total'] += total_lines
			
			# Update global SLOC counts
			self.total_source_lines += source_lines
			self.total_comment_lines += comment_lines
			self.total_blank_lines += blank_lines

		# File revision counting
		print('Collecting file revision statistics...')
		revision_lines = getpipeoutput(['git log --name-only --pretty=format: %s' % getlogrange('HEAD')]).strip().split('\n')
		for line in revision_lines:
			line = line.strip()
			if len(line) > 0 and not line.startswith('commit'):
				# This is a filename
				if line not in self.file_revisions:
					self.file_revisions[line] = 0
				self.file_revisions[line] += 1
				
				# Track directory activity
				directory = os.path.dirname(line) if os.path.dirname(line) else '.'
				self.directory_revisions[directory] += 1
				self.directories[directory]['files'].add(line)

		# Directory activity analysis
		print('Collecting directory activity statistics...')
		numstat_lines = getpipeoutput(['git log --numstat --pretty=format:"%%at %%aN" %s' % getlogrange('HEAD')]).split('\n')
		current_author = None
		current_timestamp = None
		
		for line in numstat_lines:
			line = line.strip()
			if not line:
				continue
				
			# Check if this is a commit header line (timestamp + author)
			if line.count('\t') == 0 and ' ' in line:
				try:
					parts = line.split(' ', 1)
					current_timestamp = int(parts[0])
					current_author = parts[1]
					continue
				except (ValueError, IndexError):
					pass
			
			# Check if this is a numstat line (additions\tdeletions\tfilename)
			if line.count('\t') >= 2:
				parts = line.split('\t')
				if len(parts) >= 3:
					try:
						additions = int(parts[0]) if parts[0] != '-' else 0
						deletions = int(parts[1]) if parts[1] != '-' else 0
						filename = '\t'.join(parts[2:])  # Handle filenames with tabs
						
						# Track directory activity
						directory = os.path.dirname(filename) if os.path.dirname(filename) else '.'
						self.directories[directory]['commits'] += 1  # Will be deduplicated later
						self.directories[directory]['lines_added'] += additions
						self.directories[directory]['lines_removed'] += deletions
						self.directories[directory]['files'].add(filename)
					except ValueError:
						pass

		# line statistics
		# outputs:
		#  N files changed, N insertions (+), N deletions(-)
		# <stamp> <author>
		self.changes_by_date = {} # stamp -> { files, ins, del }
		# computation of lines of code by date is better done
		# on a linear history.
		extra = ''
		if conf['linear_linestats']:
			extra = '--first-parent -m'
		lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
		lines.reverse()
		files = 0; inserted = 0; deleted = 0; total_lines = 0
		author = None
		for line in lines:
			if len(line) == 0:
				continue

			# <stamp> <author>
			if re.search('files? changed', line) == None:
				pos = line.find(' ')
				if pos != -1:
					try:
						(stamp, author) = (int(line[:pos]), line[pos+1:])
						self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
						
						# Track pace of changes (total line changes)
						self.pace_of_changes[stamp] = inserted + deleted

						date = datetime.datetime.fromtimestamp(stamp)
						
						# Track last 30 days activity
						import time as time_mod
						now = time_mod.time()
						if now - stamp <= 30 * 24 * 3600:  # 30 days in seconds
							self.last_30_days_commits += 1
							self.last_30_days_lines_added += inserted
							self.last_30_days_lines_removed += deleted
						
						# Track last 12 months activity
						if now - stamp <= 365 * 24 * 3600:  # 12 months in seconds
							yymm = date.strftime('%Y-%m')
							self.last_12_months_commits[yymm] += 1
							self.last_12_months_lines_added[yymm] += inserted
							self.last_12_months_lines_removed[yymm] += deleted
						
						yymm = date.strftime('%Y-%m')
						self.lines_added_by_month[yymm] += inserted
						self.lines_removed_by_month[yymm] += deleted

						yy = date.year
						self.lines_added_by_year[yy] += inserted
						self.lines_removed_by_year[yy] += deleted

						files, inserted, deleted = 0, 0, 0
					except ValueError:
						print('Warning: unexpected line "%s"' % line)
				else:
					print('Warning: unexpected line "%s"' % line)
			else:
				numbers = getstatsummarycounts(line)

				if len(numbers) == 3:
					(files, inserted, deleted) = list(map(lambda el : int(el), numbers))
					total_lines += inserted
					total_lines -= deleted
					self.total_lines_added += inserted
					self.total_lines_removed += deleted

				else:
					print('Warning: failed to handle line "%s"' % line)
					(files, inserted, deleted) = (0, 0, 0)
				#self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
		self.total_lines += total_lines

		# Per-author statistics

		# defined for stamp, author only if author commited at this timestamp.
		self.changes_by_date_by_author = {} # stamp -> author -> lines_added

		# Similar to the above, but never use --first-parent
		# (we need to walk through every commit to know who
		# committed what, not just through mainline)
		lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
		lines.reverse()
		files = 0; inserted = 0; deleted = 0
		author = None
		stamp = 0
		for line in lines:
			if len(line) == 0:
				continue

			# <stamp> <author>
			if re.search('files? changed', line) == None:
				pos = line.find(' ')
				if pos != -1:
					try:
						oldstamp = stamp
						(stamp, author) = (int(line[:pos]), line[pos+1:])
						if oldstamp > stamp:
							# clock skew, keep old timestamp to avoid having ugly graph
							stamp = oldstamp
						if author not in self.authors:
							self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
						self.authors[author]['commits'] += 1
						self.authors[author]['lines_added'] += inserted
						self.authors[author]['lines_removed'] += deleted
						if stamp not in self.changes_by_date_by_author:
							self.changes_by_date_by_author[stamp] = {}
						if author not in self.changes_by_date_by_author[stamp]:
							self.changes_by_date_by_author[stamp][author] = {}
						self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
						self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
						files, inserted, deleted = 0, 0, 0
					except ValueError:
						print('Warning: unexpected line "%s"' % line)
				else:
					print('Warning: unexpected line "%s"' % line)
			else:
				numbers = getstatsummarycounts(line);

				if len(numbers) == 3:
					(files, inserted, deleted) = list(map(lambda el : int(el), numbers))
				else:
					print('Warning: failed to handle line "%s"' % line)
					(files, inserted, deleted) = (0, 0, 0)
		
		# Branch analysis - collect unmerged branches and per-branch statistics
		if conf['verbose']:
			print('Analyzing branches and detecting unmerged branches...')
		self._analyzeBranches()
		
		# Calculate repository size (this is slow as noted in TODO)
		if conf['verbose']:
			print('Calculating repository size...')
		try:
			# Get .git directory size
			git_dir_size = getpipeoutput(['du -sm .git']).split()[0]
			self.repository_size_mb = float(git_dir_size)
			if conf['verbose']:
				print(f'Repository size: {self.repository_size_mb:.1f} MB')
		except (ValueError, IndexError):
			print('Warning: Could not calculate repository size')
			self.repository_size_mb = 0.0
		
		# Perform advanced team analysis
		self._analyzeTeamCollaboration()
		self._analyzeCommitPatterns()
		self._analyzeWorkingPatterns()
		self._analyzeImpactAndQuality()
		self._calculateTeamPerformanceMetrics()
	
	def _detectMainBranch(self):
		"""Detect the main branch (master, main, develop, etc.)"""
		# Try common main branch names in order of preference
		main_branch_candidates = ['master', 'main', 'develop', 'development']
		
		# Get all local branches
		branches_output = getpipeoutput(['git branch'])
		local_branches = [line.strip().lstrip('* ') for line in branches_output.split('\n') if line.strip()]
		
		# Check if any of the common main branches exist
		for candidate in main_branch_candidates:
			if candidate in local_branches:
				self.main_branch = candidate
				return candidate
		
		# If none found, use the first branch or fall back to 'master'
		if local_branches:
			self.main_branch = local_branches[0]
			return local_branches[0]
		
		# Fall back to master
		self.main_branch = 'master'
		return 'master'
	
	def _analyzeBranches(self):
		"""Analyze all branches and detect unmerged ones"""
		try:
			# Detect main branch
			main_branch = self._detectMainBranch()
			if conf['verbose']:
				print(f'Detected main branch: {main_branch}')
			
			# Get all local branches
			branches_output = getpipeoutput(['git branch'])
			all_branches = [line.strip().lstrip('* ') for line in branches_output.split('\n') if line.strip()]
			
			# Get unmerged branches (branches not merged into main)
			try:
				unmerged_output = getpipeoutput([f'git branch --no-merged {main_branch}'])
				self.unmerged_branches = [line.strip().lstrip('* ') for line in unmerged_output.split('\n') 
										if line.strip() and not line.strip().startswith('*')]
			except:
				# If main branch doesn't exist or command fails, assume all branches are unmerged
				self.unmerged_branches = [b for b in all_branches if b != main_branch]
			
			if conf['verbose']:
				print(f'Found {len(self.unmerged_branches)} unmerged branches: {", ".join(self.unmerged_branches)}')
			
			# Analyze each branch
			for branch in all_branches:
				if conf['verbose']:
					print(f'Analyzing branch: {branch}')
				self._analyzeBranch(branch, main_branch)
				
		except Exception as e:
			if conf['verbose'] or conf['debug']:
				print(f'Warning: Branch analysis failed: {e}')
			# Initialize empty structures if analysis fails
			self.unmerged_branches = []
			self.branches = {}
	
	def _analyzeBranch(self, branch_name, main_branch):
		"""Analyze a single branch for commits, authors, and line changes"""
		try:
			# Initialize branch data
			self.branches[branch_name] = {
				'commits': 0,
				'lines_added': 0,
				'lines_removed': 0,
				'authors': {},
				'is_merged': branch_name not in self.unmerged_branches,
				'merge_base': '',
				'unique_commits': []
			}
			
			# Get merge base with main branch
			try:
				merge_base = getpipeoutput([f'git merge-base {branch_name} {main_branch}']).strip()
				self.branches[branch_name]['merge_base'] = merge_base
			except:
				self.branches[branch_name]['merge_base'] = ''
			
			# Get commits unique to this branch (not in main branch)
			if branch_name != main_branch:
				try:
					# Get commits that are in branch but not in main
					unique_commits_output = getpipeoutput([f'git rev-list {branch_name} ^{main_branch}'])
					unique_commits = [line.strip() for line in unique_commits_output.split('\n') if line.strip()]
					self.branches[branch_name]['unique_commits'] = unique_commits
					
					# Analyze each unique commit
					for commit in unique_commits:
						self._analyzeBranchCommit(branch_name, commit)
						
				except:
					# If command fails, analyze all commits in the branch
					try:
						all_commits_output = getpipeoutput([f'git rev-list {branch_name}'])
						all_commits = [line.strip() for line in all_commits_output.split('\n') if line.strip()]
						self.branches[branch_name]['unique_commits'] = all_commits[:50]  # Limit to avoid too much data
						
						for commit in all_commits[:50]:
							self._analyzeBranchCommit(branch_name, commit)
					except:
						pass
			else:
				# For main branch, count all commits
				try:
					all_commits_output = getpipeoutput([f'git rev-list {branch_name}'])
					all_commits = [line.strip() for line in all_commits_output.split('\n') if line.strip()]
					self.branches[branch_name]['commits'] = len(all_commits)
					self.branches[branch_name]['unique_commits'] = all_commits[:100]  # Limit for performance
				except:
					pass
					
		except Exception as e:
			if conf['debug']:
				print(f'Warning: Failed to analyze branch {branch_name}: {e}')
	
	def _analyzeBranchCommit(self, branch_name, commit_hash):
		"""Analyze a single commit for branch statistics"""
		try:
			# Get commit author and timestamp
			commit_info = getpipeoutput([f'git log -1 --pretty=format:"%aN %at" {commit_hash}'])
			if not commit_info:
				return
				
			parts = commit_info.rsplit(' ', 1)
			if len(parts) != 2:
				return
				
			author = parts[0]
			try:
				timestamp = int(parts[1])
			except ValueError:
				return
			
			# Update branch commit count
			self.branches[branch_name]['commits'] += 1
			
			# Update author statistics for this branch
			if author not in self.branches[branch_name]['authors']:
				self.branches[branch_name]['authors'][author] = {
					'commits': 0,
					'lines_added': 0,
					'lines_removed': 0
				}
			self.branches[branch_name]['authors'][author]['commits'] += 1
			
			# Get line changes for this commit
			try:
				numstat_output = getpipeoutput([f'git show --numstat --format="" {commit_hash}'])
				for line in numstat_output.split('\n'):
					if line.strip() and '\t' in line:
						parts = line.split('\t')
						if len(parts) >= 2:
							try:
								additions = int(parts[0]) if parts[0] != '-' else 0
								deletions = int(parts[1]) if parts[1] != '-' else 0
								
								# Update branch statistics
								self.branches[branch_name]['lines_added'] += additions
								self.branches[branch_name]['lines_removed'] += deletions
								
								# Update author statistics for this branch
								self.branches[branch_name]['authors'][author]['lines_added'] += additions
								self.branches[branch_name]['authors'][author]['lines_removed'] += deletions
								
							except ValueError:
								pass
			except:
				pass
				
		except Exception as e:
			if conf['debug']:
				print(f'Warning: Failed to analyze commit {commit_hash}: {e}')
	
	def _analyzeTeamCollaboration(self):
		"""Analyze how team members collaborate on files and projects"""
		if conf['verbose']:
			print('Analyzing team collaboration patterns...')
		
		try:
			# Get commit details with files changed
			commit_data = getpipeoutput(['git log --name-only --pretty=format:"COMMIT:%H:%aN:%at" %s' % getlogrange('HEAD')]).split('\n')
			
			current_commit = None
			current_author = None
			current_timestamp = None
			
			for line in commit_data:
				line = line.strip()
				if line.startswith('COMMIT:'):
					# Parse commit header: COMMIT:hash:author:timestamp
					parts = line.split(':', 3)
					if len(parts) >= 4:
						current_commit = parts[1]
						current_author = parts[2]
						try:
							current_timestamp = int(parts[3])
						except ValueError:
							current_timestamp = None
				elif line and current_author and not line.startswith('COMMIT:'):
					# This is a filename
					filename = line
					
					# Initialize author collaboration data
					if current_author not in self.author_collaboration:
						self.author_collaboration[current_author] = {
							'worked_with': defaultdict(lambda: defaultdict(int)),
							'file_ownership': defaultdict(int)
						}
					
					# Track file ownership
					self.author_collaboration[current_author]['file_ownership'][filename] += 1
					
					# Track who else worked on this file
					file_history = getpipeoutput([f'git log --pretty=format:"%aN" -- "{filename}"']).split('\n')
					unique_authors = set(file_history) - {current_author}
					
					for other_author in unique_authors:
						if other_author.strip():
							self.author_collaboration[current_author]['worked_with'][other_author][filename] += 1
							
		except Exception as e:
			if conf['debug']:
				print(f'Warning: Team collaboration analysis failed: {e}')
	
	def _analyzeCommitPatterns(self):
		"""Analyze commit patterns to identify commit behavior (small vs large commits, frequency, etc.)"""
		if conf['verbose']:
			print('Analyzing commit patterns...')
		
		try:
			# Get detailed commit information
			commit_lines = getpipeoutput(['git log --shortstat --pretty=format:"COMMIT:%H:%aN:%at:%s" %s' % getlogrange('HEAD')]).split('\n')
			
			current_author = None
			current_timestamp = None
			current_message = None
			author_commits = defaultdict(list)
			
			for line in commit_lines:
				line = line.strip()
				if line.startswith('COMMIT:'):
					# Parse: COMMIT:hash:author:timestamp:subject
					parts = line.split(':', 4)
					if len(parts) >= 5:
						current_author = parts[2]
						try:
							current_timestamp = int(parts[3])
							current_message = parts[4]
						except ValueError:
							current_timestamp = None
							current_message = ""
				elif line and current_author and re.search(r'files? changed', line):
					# Parse shortstat line
					numbers = re.findall(r'\d+', line)
					if len(numbers) >= 1:
						files_changed = int(numbers[0])
						insertions = int(numbers[1]) if len(numbers) > 1 else 0
						deletions = int(numbers[2]) if len(numbers) > 2 else 0
						total_changes = insertions + deletions
						
						commit_info = {
							'timestamp': current_timestamp,
							'files_changed': files_changed,
							'lines_changed': total_changes,
							'insertions': insertions,
							'deletions': deletions,
							'message': current_message
						}
						author_commits[current_author].append(commit_info)
			
			# Analyze patterns for each author
			for author, commits in author_commits.items():
				if not commits:
					continue
				
				total_commits = len(commits)
				total_lines = sum(c['lines_changed'] for c in commits)
				avg_commit_size = total_lines / total_commits if total_commits else 0
				
				# Categorize commits by size
				small_commits = sum(1 for c in commits if c['lines_changed'] < 10)
				medium_commits = sum(1 for c in commits if 10 <= c['lines_changed'] < 100)
				large_commits = sum(1 for c in commits if c['lines_changed'] >= 100)
				
				# Calculate commit frequency (commits per day)
				if commits:
					timestamps = [c['timestamp'] for c in commits if c['timestamp']]
					if len(timestamps) > 1:
						time_span = max(timestamps) - min(timestamps)
						days_active = time_span / (24 * 3600) if time_span > 0 else 1
						commit_frequency = total_commits / days_active
					else:
						commit_frequency = total_commits
				else:
					commit_frequency = 0
				
				# Analyze commit messages for patterns
				bug_related = sum(1 for c in commits if any(keyword in c['message'].lower() 
					for keyword in ['fix', 'bug', 'error', 'issue', 'patch', 'repair']))
				feature_related = sum(1 for c in commits if any(keyword in c['message'].lower() 
					for keyword in ['add', 'new', 'feature', 'implement', 'create']))
				refactor_related = sum(1 for c in commits if any(keyword in c['message'].lower() 
					for keyword in ['refactor', 'cleanup', 'reorganize', 'restructure', 'optimize']))
				
				self.commit_patterns[author] = {
					'total_commits': total_commits,
					'avg_commit_size': avg_commit_size,
					'small_commits': small_commits,
					'medium_commits': medium_commits,
					'large_commits': large_commits,
					'commit_frequency': commit_frequency,
					'bug_related_commits': bug_related,
					'feature_related_commits': feature_related,
					'refactor_related_commits': refactor_related,
					'avg_files_per_commit': sum(c['files_changed'] for c in commits) / total_commits if total_commits else 0
				}
				
		except Exception as e:
			if conf['debug']:
				print(f'Warning: Commit pattern analysis failed: {e}')
	
	def _analyzeWorkingPatterns(self):
		"""Analyze when authors typically work (time of day, day of week, timezone patterns)"""
		if conf['verbose']:
			print('Analyzing working time patterns...')
		
		try:
			# Get commit timestamps with timezone info
			commit_lines = getpipeoutput(['git log --pretty=format:"%aN|%at|%ai|%s" %s' % getlogrange('HEAD')]).split('\n')
			
			for line in commit_lines:
				if not line.strip():
					continue
				
				parts = line.split('|', 3)
				if len(parts) < 3:
					continue
				
				author = parts[0]
				try:
					timestamp = int(parts[1])
					date_str = parts[2]  # ISO format with timezone
					message = parts[3] if len(parts) > 3 else ""
				except (ValueError, IndexError):
					continue
				
				# Parse date and time information
				date = datetime.datetime.fromtimestamp(timestamp)
				hour = date.hour
				day_of_week = date.weekday()  # Monday = 0, Sunday = 6
				
				# Initialize author working patterns
				if author not in self.working_patterns:
					self.working_patterns[author] = {
						'night_commits': 0,      # 22:00 - 06:00
						'weekend_commits': 0,    # Saturday, Sunday
						'peak_hours': defaultdict(int),
						'peak_days': defaultdict(int),
						'timezone_pattern': defaultdict(int),
						'early_bird': 0,         # 05:00 - 09:00
						'workday': 0,           # 09:00 - 17:00
						'evening': 0,           # 17:00 - 22:00
						'total_commits': 0
					}
				
				self.working_patterns[author]['total_commits'] += 1
				self.working_patterns[author]['peak_hours'][hour] += 1
				self.working_patterns[author]['peak_days'][day_of_week] += 1
				
				# Extract timezone from date string
				if '+' in date_str or '-' in date_str:
					tz_part = date_str.split()[-1]
					self.working_patterns[author]['timezone_pattern'][tz_part] += 1
				
				# Categorize by time of day
				if 22 <= hour or hour < 6:
					self.working_patterns[author]['night_commits'] += 1
				elif 5 <= hour < 9:
					self.working_patterns[author]['early_bird'] += 1
				elif 9 <= hour < 17:
					self.working_patterns[author]['workday'] += 1
				elif 17 <= hour < 22:
					self.working_patterns[author]['evening'] += 1
				
				# Weekend commits (Saturday = 5, Sunday = 6)
				if day_of_week >= 5:
					self.working_patterns[author]['weekend_commits'] += 1
				
				# Classify commit types
				if any(keyword in message.lower() for keyword in ['fix', 'bug', 'error', 'patch']):
					if author not in self.potential_bug_commits:
						self.potential_bug_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
				elif any(keyword in message.lower() for keyword in ['refactor', 'cleanup', 'optimize']):
					self.refactoring_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
				elif any(keyword in message.lower() for keyword in ['add', 'new', 'feature', 'implement']):
					self.feature_commits.append({'author': author, 'timestamp': timestamp, 'message': message})
			
			# Calculate active periods for each author
			for author in self.authors:
				if 'active_days' in self.authors[author]:
					active_days = self.authors[author]['active_days']
					sorted_days = sorted(active_days)
					
					if len(sorted_days) > 1:
						# Calculate gaps between active days
						gaps = []
						for i in range(1, len(sorted_days)):
							prev_date = datetime.datetime.strptime(sorted_days[i-1], '%Y-%m-%d')
							curr_date = datetime.datetime.strptime(sorted_days[i], '%Y-%m-%d')
							gap = (curr_date - prev_date).days
							gaps.append(gap)
						
						avg_gap = sum(gaps) / len(gaps) if gaps else 0
						
						# Find longest streak
						longest_streak = 1
						current_streak = 1
						for gap in gaps:
							if gap == 1:
								current_streak += 1
								longest_streak = max(longest_streak, current_streak)
							else:
								current_streak = 1
					else:
						avg_gap = 0
						longest_streak = 1
					
					self.author_active_periods[author] = {
						'active_days_count': len(active_days),
						'longest_streak': longest_streak,
						'avg_gap': avg_gap
					}
				
		except Exception as e:
			if conf['debug']:
				print(f'Warning: Working pattern analysis failed: {e}')
	
	def _analyzeImpactAndQuality(self):
		"""Analyze the impact of changes and identify critical files and potential quality issues"""
		if conf['verbose']:
			print('Analyzing impact and quality indicators...')
		
		try:
			# Identify critical files based on common patterns
			all_files = getpipeoutput(['git ls-tree -r --name-only %s' % getcommitrange('HEAD', end_only=True)]).split('\n')
			
			for filepath in all_files:
				if not filepath.strip():
					continue
				
				filename = os.path.basename(filepath).lower()
				
				# Mark files as critical based on common patterns
				critical_patterns = [
					'main.', 'app.', 'index.', 'config.', 'settings.',
					'setup.', 'package.json', 'requirements.txt', 'Dockerfile',
					'makefile', 'readme', 'license', '.env'
				]
				
				if any(pattern in filename for pattern in critical_patterns):
					self.critical_files.add(filepath)
				
				# Files in root directory are often critical
				if '/' not in filepath:
					self.critical_files.add(filepath)
			
			# Analyze file impact scores based on change frequency and author diversity
			file_authors = defaultdict(set)
			file_change_count = defaultdict(int)
			
			# Get file change history
			log_lines = getpipeoutput(['git log --name-only --pretty=format:"AUTHOR:%aN" %s' % getlogrange('HEAD')]).split('\n')
			current_author = None
			
			for line in log_lines:
				line = line.strip()
				if line.startswith('AUTHOR:'):
					current_author = line.replace('AUTHOR:', '')
				elif line and current_author and not line.startswith('AUTHOR:'):
					filename = line
					file_authors[filename].add(current_author)
					file_change_count[filename] += 1
			
			# Calculate impact scores
			for filename in file_change_count:
				change_count = file_change_count[filename]
				author_count = len(file_authors[filename])
				
				# Impact score based on change frequency and author diversity
				base_score = min(change_count * 10, 100)  # Cap at 100
				diversity_bonus = min(author_count * 5, 25)  # Bonus for multiple authors
				critical_bonus = 50 if filename in self.critical_files else 0
				
				impact_score = base_score + diversity_bonus + critical_bonus
				self.file_impact_scores[filename] = impact_score
			
			# Analyze author impact
			for author in self.authors:
				critical_files_touched = []
				total_impact_score = 0
				
				# Check which critical files this author touched
				for filename in self.critical_files:
					if author in file_authors.get(filename, set()):
						critical_files_touched.append(filename)
						total_impact_score += self.file_impact_scores.get(filename, 0)
				
				# Calculate bug potential based on commit messages and patterns
				author_commits = self.commit_patterns.get(author, {})
				bug_commits = author_commits.get('bug_related_commits', 0)
				total_commits = author_commits.get('total_commits', 1)
				bug_ratio = bug_commits / total_commits if total_commits > 0 else 0
				
				# Higher bug potential if author has many bug-fix commits
				bug_potential = min(bug_ratio * 100, 100)
				
				self.impact_analysis[author] = {
					'critical_files': critical_files_touched,
					'impact_score': total_impact_score,
					'bug_potential': bug_potential,
					'high_impact_files': [f for f in file_authors if author in file_authors[f] and self.file_impact_scores.get(f, 0) > 50]
				}
				
		except Exception as e:
			if conf['debug']:
				print(f'Warning: Impact analysis failed: {e}')
	
	def _calculateTeamPerformanceMetrics(self):
		"""Calculate comprehensive team performance metrics"""
		if conf['verbose']:
			print('Calculating team performance metrics...')
		
		try:
			total_commits = self.getTotalCommits()
			total_lines_changed = self.total_lines_added + self.total_lines_removed
			
			for author in self.authors:
				author_info = self.authors[author]
				commit_patterns = self.commit_patterns.get(author, {})
				working_patterns = self.working_patterns.get(author, {})
				impact_info = self.impact_analysis.get(author, {})
				
				# Efficiency Score (based on lines changed per commit and commit quality)
				avg_commit_size = commit_patterns.get('avg_commit_size', 0)
				total_author_commits = author_info.get('commits', 0)
				
				# Normalize efficiency (sweet spot is around 20-50 lines per commit)
				if 20 <= avg_commit_size <= 50:
					size_efficiency = 100
				elif avg_commit_size < 20:
					size_efficiency = max(0, avg_commit_size * 5)  # Penalty for too small commits
				else:
					size_efficiency = max(0, 100 - (avg_commit_size - 50) * 2)  # Penalty for too large commits
				
				# Quality indicators
				bug_commits = commit_patterns.get('bug_related_commits', 0)
				feature_commits = commit_patterns.get('feature_related_commits', 0)
				refactor_commits = commit_patterns.get('refactor_related_commits', 0)
				
				quality_score = 0
				if total_author_commits > 0:
					feature_ratio = feature_commits / total_author_commits
					refactor_ratio = refactor_commits / total_author_commits
					bug_ratio = bug_commits / total_author_commits
					
					quality_score = (feature_ratio * 40 + refactor_ratio * 30 - bug_ratio * 20) * 100
					quality_score = max(0, min(100, quality_score))
				
				efficiency_score = (size_efficiency * 0.6 + quality_score * 0.4)
				
				# Consistency Score (based on commit frequency and working patterns)
				commit_frequency = commit_patterns.get('commit_frequency', 0)
				active_periods = self.author_active_periods.get(author, {})
				longest_streak = active_periods.get('longest_streak', 1)
				avg_gap = active_periods.get('avg_gap', 30)
				
				# Consistency based on regular commits and sustained activity
				frequency_score = min(commit_frequency * 20, 100)  # Up to 5 commits per day = max score
				streak_score = min(longest_streak * 5, 100)  # Longer streaks = better consistency
				gap_score = max(0, 100 - avg_gap * 3)  # Smaller gaps = better consistency
				
				consistency_score = (frequency_score * 0.4 + streak_score * 0.3 + gap_score * 0.3)
				
				# Leadership Score (based on impact on critical files, collaboration, and mentoring indicators)
				impact_score = impact_info.get('impact_score', 0)
				critical_files_count = len(impact_info.get('critical_files', []))
				
				# Collaboration score based on working with others
				collaboration_data = self.author_collaboration.get(author, {})
				worked_with_count = len(collaboration_data.get('worked_with', {}))
				
				# Normalize impact and collaboration
				impact_leadership = min(impact_score / 10, 100)  # Scale impact score
				collaboration_leadership = min(worked_with_count * 10, 100)  # Max score at 10 collaborators
				critical_file_leadership = min(critical_files_count * 20, 100)  # Max score at 5 critical files
				
				leadership_score = (impact_leadership * 0.4 + collaboration_leadership * 0.3 + critical_file_leadership * 0.3)
				
				# Overall contribution percentage
				author_commits = author_info.get('commits', 0)
				contribution_percentage = (author_commits / total_commits * 100) if total_commits > 0 else 0
				
				# Store performance metrics
				self.team_performance[author] = {
					'efficiency_score': efficiency_score,
					'consistency': consistency_score,
					'leadership_score': leadership_score,
					'contribution_percentage': contribution_percentage,
					'overall_score': (efficiency_score * 0.4 + consistency_score * 0.3 + leadership_score * 0.3),
					'commit_quality_analysis': {
						'avg_commit_size': avg_commit_size,
						'small_commits_ratio': commit_patterns.get('small_commits', 0) / total_author_commits if total_author_commits > 0 else 0,
						'large_commits_ratio': commit_patterns.get('large_commits', 0) / total_author_commits if total_author_commits > 0 else 0,
						'bug_fix_ratio': bug_commits / total_author_commits if total_author_commits > 0 else 0,
						'feature_ratio': feature_commits / total_author_commits if total_author_commits > 0 else 0
					}
				}
				
		except Exception as e:
			if conf['debug']:
				print(f'Warning: Team performance calculation failed: {e}')
	
	def refine(self):
		# authors
		# name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
		self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
		self.authors_by_commits.reverse() # most first
		for i, name in enumerate(self.authors_by_commits):
			self.authors[name]['place_by_commits'] = i + 1

		for name in list(self.authors.keys()):
			a = self.authors[name]
			a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
			date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
			date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
			delta = date_last - date_first
			a['date_first'] = date_first.strftime('%Y-%m-%d')
			a['date_last'] = date_last.strftime('%Y-%m-%d')
			a['timedelta'] = delta
			if 'lines_added' not in a: a['lines_added'] = 0
			if 'lines_removed' not in a: a['lines_removed'] = 0
	
	def getActiveDays(self):
		return self.active_days

	def getActivityByDayOfWeek(self):
		return self.activity_by_day_of_week

	def getActivityByHourOfDay(self):
		return self.activity_by_hour_of_day

	def getAuthorInfo(self, author):
		return self.authors[author]
	
	def getAuthors(self, limit = None):
		res = getkeyssortedbyvaluekey(self.authors, 'commits')
		res.reverse()
		return res[:limit]
	
	def getCommitDeltaDays(self):
		return (self.last_commit_stamp // 86400 - self.first_commit_stamp // 86400) + 1

	def getDomainInfo(self, domain):
		return self.domains[domain]

	def getDomains(self):
		return list(self.domains.keys())
	
	def getFirstCommitDate(self):
		return datetime.datetime.fromtimestamp(self.first_commit_stamp)
	
	def getLastCommitDate(self):
		return datetime.datetime.fromtimestamp(self.last_commit_stamp)
	
	def getTags(self):
		lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
		return lines.split('\n')
	
	def getTagDate(self, tag):
		return self.revToDate('tags/' + tag)
	
	def getTotalAuthors(self):
		return self.total_authors
	
	def getTotalCommits(self):
		return self.total_commits

	def getTotalFiles(self):
		return self.total_files
	
	def getTotalLOC(self):
		return self.total_lines

	def getTotalSourceLines(self):
		return self.total_source_lines
	
	def getTotalCommentLines(self):
		return self.total_comment_lines
	
	def getTotalBlankLines(self):
		return self.total_blank_lines
	
	def getSLOCByExtension(self):
		return self.sloc_by_extension
	
	def getLargestFiles(self, limit=10):
		"""Get the largest files by size."""
		sorted_files = sorted(self.file_sizes.items(), key=lambda x: x[1], reverse=True)
		return sorted_files[:limit]
	
	def getFilesWithMostRevisions(self, limit=10):
		"""Get files with most revisions (hotspots)."""
		sorted_files = sorted(self.file_revisions.items(), key=lambda x: x[1], reverse=True)
		return sorted_files[:limit]
	
	def getAverageFileSize(self):
		"""Get average file size in bytes."""
		if not self.file_sizes:
			return 0.0
		return sum(self.file_sizes.values()) / len(self.file_sizes)
	
	def getDirectoriesByActivity(self, limit=10):
		"""Get directories with most lines changed (added + removed)."""
		if not hasattr(self, 'directories'):
			return []
		directory_activity = []
		for directory, stats in self.directories.items():
			total_lines = stats['lines_added'] + stats['lines_removed']
			file_count = len(stats['files'])
			directory_activity.append((directory, total_lines, stats['lines_added'], stats['lines_removed'], file_count))
		return sorted(directory_activity, key=lambda x: x[1], reverse=True)[:limit]
	
	def getDirectoriesByRevisions(self, limit=10):
		"""Get directories with most file revisions."""
		if not hasattr(self, 'directory_revisions'):
			return []
		sorted_dirs = sorted(self.directory_revisions.items(), key=lambda x: x[1], reverse=True)
		return sorted_dirs[:limit]
	
	def getAverageRevisionsPerFile(self):
		"""Get average number of revisions per file."""
		if not self.file_revisions:
			return 0.0
		return sum(self.file_revisions.values()) / len(self.file_revisions)

	def getTotalSize(self):
		return self.total_size
	
	def getLast30DaysActivity(self):
		"""Get activity stats for last 30 days."""
		return {
			'commits': self.last_30_days_commits,
			'lines_added': self.last_30_days_lines_added,
			'lines_removed': self.last_30_days_lines_removed
		}
	
	def getLast12MonthsActivity(self):
		"""Get activity stats for last 12 months."""
		return {
			'commits': dict(self.last_12_months_commits),
			'lines_added': dict(self.last_12_months_lines_added),
			'lines_removed': dict(self.last_12_months_lines_removed)
		}
	
	def getPaceOfChanges(self):
		"""Get pace of changes (line changes over time)."""
		return self.pace_of_changes
	
	def getRepositorySize(self):
		"""Get repository size in MB."""
		return getattr(self, 'repository_size_mb', 0.0)
	
	def getBranches(self):
		"""Get all branches with their statistics."""
		return self.branches
	
	def getUnmergedBranches(self):
		"""Get list of unmerged branch names."""
		return self.unmerged_branches
	
	def getMainBranch(self):
		"""Get the detected main branch name."""
		return getattr(self, 'main_branch', 'master')
	
	def getBranchInfo(self, branch_name):
		"""Get detailed information about a specific branch."""
		return self.branches.get(branch_name, {})
	
	def getBranchAuthors(self, branch_name):
		"""Get authors who contributed to a specific branch."""
		branch_info = self.branches.get(branch_name, {})
		return branch_info.get('authors', {})
	
	def getBranchesByCommits(self, limit=None):
		"""Get branches sorted by number of commits."""
		sorted_branches = sorted(self.branches.items(), 
								key=lambda x: x[1].get('commits', 0), 
								reverse=True)
		if limit:
			return sorted_branches[:limit]
		return sorted_branches
	
	def getBranchesByLinesChanged(self, limit=None):
		"""Get branches sorted by total lines changed."""
		sorted_branches = sorted(self.branches.items(), 
								key=lambda x: x[1].get('lines_added', 0) + x[1].get('lines_removed', 0), 
								reverse=True)
		if limit:
			return sorted_branches[:limit]
		return sorted_branches
	
	def getUnmergedBranchStats(self):
		"""Get statistics for unmerged branches only."""
		unmerged_stats = {}
		for branch_name in self.unmerged_branches:
			if branch_name in self.branches:
				unmerged_stats[branch_name] = self.branches[branch_name]
		return unmerged_stats
	
	# New methods for advanced team analysis
	def getCommitPatterns(self):
		"""Get commit patterns analysis for all authors."""
		return self.commit_patterns
	
	def getCommitPatternsForAuthor(self, author):
		"""Get commit patterns for a specific author."""
		return self.commit_patterns.get(author, {})
	
	def getWorkingPatterns(self):
		"""Get working time patterns for all authors."""
		return self.working_patterns
	
	def getWorkingPatternsForAuthor(self, author):
		"""Get working patterns for a specific author."""
		return self.working_patterns.get(author, {})
	
	def getTeamCollaboration(self):
		"""Get team collaboration analysis."""
		return self.author_collaboration
	
	def getCollaborationForAuthor(self, author):
		"""Get collaboration data for a specific author."""
		return self.author_collaboration.get(author, {})
	
	def getImpactAnalysis(self):
		"""Get impact analysis for all authors."""
		return self.impact_analysis
	
	def getImpactAnalysisForAuthor(self, author):
		"""Get impact analysis for a specific author."""
		return self.impact_analysis.get(author, {})
	
	def getTeamPerformance(self):
		"""Get team performance metrics for all authors."""
		return self.team_performance
	
	def getTeamPerformanceForAuthor(self, author):
		"""Get team performance metrics for a specific author."""
		return self.team_performance.get(author, {})
	
	def getCriticalFiles(self):
		"""Get list of files identified as critical to the project."""
		return list(self.critical_files)
	
	def getFileImpactScores(self):
		"""Get impact scores for all files."""
		return dict(self.file_impact_scores)
	
	def getTopImpactFiles(self, limit=10):
		"""Get files with highest impact scores."""
		sorted_files = sorted(self.file_impact_scores.items(), key=lambda x: x[1], reverse=True)
		return sorted_files[:limit]
	
	def getBugRelatedCommits(self):
		"""Get commits that appear to be bug-related."""
		return self.potential_bug_commits
	
	def getRefactoringCommits(self):
		"""Get commits that appear to be refactoring."""
		return self.refactoring_commits
	
	def getFeatureCommits(self):
		"""Get commits that appear to add features."""
		return self.feature_commits
	
	def getAuthorActivePeriods(self):
		"""Get active periods analysis for all authors."""
		return self.author_active_periods
	
	def getAuthorsByContribution(self):
		"""Get authors sorted by contribution percentage."""
		performance_data = [(author, perf.get('contribution_percentage', 0)) 
						   for author, perf in self.team_performance.items()]
		return sorted(performance_data, key=lambda x: x[1], reverse=True)
	
	def getAuthorsByEfficiency(self):
		"""Get authors sorted by efficiency score."""
		performance_data = [(author, perf.get('efficiency_score', 0)) 
						   for author, perf in self.team_performance.items()]
		return sorted(performance_data, key=lambda x: x[1], reverse=True)
	
	def getAuthorsByConsistency(self):
		"""Get authors sorted by consistency score."""
		performance_data = [(author, perf.get('consistency', 0)) 
						   for author, perf in self.team_performance.items()]
		return sorted(performance_data, key=lambda x: x[1], reverse=True)
	
	def getAuthorsByLeadership(self):
		"""Get authors sorted by leadership score."""
		performance_data = [(author, perf.get('leadership_score', 0)) 
						   for author, perf in self.team_performance.items()]
		return sorted(performance_data, key=lambda x: x[1], reverse=True)
	
	def getTeamWorkDistribution(self):
		"""Analyze work distribution across team members."""
		total_commits = self.getTotalCommits()
		total_lines = self.total_lines_added + self.total_lines_removed
		
		distribution = {}
		for author in self.authors:
			author_info = self.authors[author]
			author_commits = author_info.get('commits', 0)
			author_lines = author_info.get('lines_added', 0) + author_info.get('lines_removed', 0)
			
			distribution[author] = {
				'commit_percentage': (author_commits / total_commits * 100) if total_commits > 0 else 0,
				'lines_percentage': (author_lines / total_lines * 100) if total_lines > 0 else 0,
				'commits': author_commits,
				'lines_changed': author_lines
			}
		
		return distribution
	
	def getCommitSizeAnalysis(self):
		"""Get analysis of commit sizes across the team."""
		analysis = {
			'small_commits_authors': [],  # Authors with >50% small commits
			'large_commits_authors': [],  # Authors with >20% large commits
			'balanced_authors': [],       # Authors with balanced commit sizes
			'overall_stats': {
				'total_small': 0,
				'total_medium': 0,
				'total_large': 0
			}
		}
		
		for author, patterns in self.commit_patterns.items():
			total_commits = patterns.get('total_commits', 0)
			if total_commits == 0:
				continue
			
			small_ratio = patterns.get('small_commits', 0) / total_commits
			large_ratio = patterns.get('large_commits', 0) / total_commits
			
			analysis['overall_stats']['total_small'] += patterns.get('small_commits', 0)
			analysis['overall_stats']['total_medium'] += patterns.get('medium_commits', 0)
			analysis['overall_stats']['total_large'] += patterns.get('large_commits', 0)
			
			if small_ratio > 0.5:
				analysis['small_commits_authors'].append((author, small_ratio))
			elif large_ratio > 0.2:
				analysis['large_commits_authors'].append((author, large_ratio))
			else:
				analysis['balanced_authors'].append((author, small_ratio, large_ratio))
		
		return analysis
	
	def revToDate(self, rev):
		stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
		return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')

class ReportCreator:
	"""Creates the actual report based on given data."""
	def __init__(self):
		pass
	
	def create(self, data, path):
		self.data = data
		self.path = path

def html_linkify(text):
	return text.lower().replace(' ', '_')

def html_header(level, text):
	name = html_linkify(text)
	return '\n<h%d id="%s"><a href="#%s">%s</a></h%d>\n\n' % (level, name, name, text, level)

class HTMLReportCreator(ReportCreator):
	def create(self, data, path):
		ReportCreator.create(self, data, path)
		self.title = data.projectname

		# Prepare safe local values to avoid division-by-zero and empty-collection errors
		total_commits = data.getTotalCommits()
		total_active_days = len(data.getActiveDays()) if hasattr(data, 'getActiveDays') else 0
		delta_days = data.getCommitDeltaDays() if hasattr(data, 'getCommitDeltaDays') else 0
		total_authors = data.getTotalAuthors()
		# busiest counters: use 1 as denominator if no activity recorded to avoid ZeroDivisionError
		hour_of_day_busiest = data.activity_by_hour_of_day_busiest if getattr(data, 'activity_by_hour_of_day_busiest', 0) > 0 else 1
		hour_of_week_busiest = data.activity_by_hour_of_week_busiest if getattr(data, 'activity_by_hour_of_week_busiest', 0) > 0 else 1
		# timezone max for coloring; default to 1 if empty
		max_commits_on_tz = max(data.commits_by_timezone.values()) if data.commits_by_timezone else 1

		# copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
		binarypath = os.path.dirname(os.path.abspath(__file__))
		secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
		basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
		for file in (conf['style'], 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
			for base in basedirs:
				src = base + '/' + file
				if os.path.exists(src):
					shutil.copyfile(src, path + '/' + file)
					break
			else:
				print('Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs))

		f = open(path + "/index.html", 'w')
		format = '%Y-%m-%d %H:%M:%S'
		self.printHeader(f)

		f.write('<h1>GitStats - %s</h1>' % data.projectname)

		self.printNav(f)

		f.write('<dl>')
		f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
		f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
		f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
		f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
		f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), total_active_days, (100.0 * total_active_days / data.getCommitDeltaDays()) if data.getCommitDeltaDays() else 0.0))
		f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
		# Add file statistics
		try:
			avg_size = data.getAverageFileSize()
			f.write('<dt>Average File Size</dt><dd>%.2f bytes (%.1f KB)</dd>' % (avg_size, avg_size / 1024))
		except:
			pass
		try:
			avg_revisions = data.getAverageRevisionsPerFile()
			f.write('<dt>Average Revisions per File</dt><dd>%.2f</dd>' % avg_revisions)
		except:
			pass
		try:
			repo_size = data.getRepositorySize()
			if repo_size > 0:
				f.write('<dt>Repository Size</dt><dd>%.1f MB</dd>' % repo_size)
		except:
			pass
		f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
		f.write('<dt>Source Lines of Code</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalSourceLines(), (100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
		f.write('<dt>Comment Lines</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalCommentLines(), (100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
		f.write('<dt>Blank Lines</dt><dd>%s (%.1f%%)</dd>' % (data.getTotalBlankLines(), (100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0))
		avg_active = float(total_commits) / total_active_days if total_active_days else 0.0
		avg_all = float(total_commits) / delta_days if delta_days else 0.0
		f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (total_commits, avg_active, avg_all))
		avg_per_author = float(total_commits) / total_authors if total_authors else 0.0
		f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (total_authors, avg_per_author))
		
		# Branch statistics
		branches = data.getBranches() if hasattr(data, 'getBranches') else {}
		unmerged_branches = data.getUnmergedBranches() if hasattr(data, 'getUnmergedBranches') else []
		main_branch = data.getMainBranch() if hasattr(data, 'getMainBranch') else 'master'
		
		if branches:
			f.write('<dt>Total Branches</dt><dd>%d</dd>' % len(branches))
			if unmerged_branches:
				f.write('<dt>Unmerged Branches</dt><dd>%d (%s)</dd>' % (len(unmerged_branches), ', '.join(unmerged_branches[:5]) + ('...' if len(unmerged_branches) > 5 else '')))
			f.write('<dt>Main Branch</dt><dd>%s</dd>' % main_branch)
		
		f.write('</dl>')

		f.write('</body>\n</html>')
		f.close()

		###
		# Team Analysis - New comprehensive team analysis page
		f = open(path + '/team_analysis.html', 'w')
		self.printHeader(f)
		f.write('<h1>Team Analysis</h1>')
		self.printNav(f)

		# Team Overview
		f.write(html_header(2, 'Team Overview'))
		total_authors = data.getTotalAuthors()
		work_distribution = data.getTeamWorkDistribution()
		
		f.write('<dl>')
		f.write('<dt>Total Team Members</dt><dd>%d</dd>' % total_authors)
		
		# Calculate work distribution metrics
		commit_contributions = [dist['commit_percentage'] for dist in work_distribution.values()]
		lines_contributions = [dist['lines_percentage'] for dist in work_distribution.values()]
		
		if commit_contributions:
			max_commit_contrib = max(commit_contributions)
			min_commit_contrib = min(commit_contributions)
			avg_commit_contrib = sum(commit_contributions) / len(commit_contributions)
			
			f.write('<dt>Work Distribution (Commits)</dt><dd>Max: %.1f%%, Min: %.1f%%, Avg: %.1f%%</dd>' % 
				(max_commit_contrib, min_commit_contrib, avg_commit_contrib))
		
		if lines_contributions:
			max_lines_contrib = max(lines_contributions)
			min_lines_contrib = min(lines_contributions)
			avg_lines_contrib = sum(lines_contributions) / len(lines_contributions)
			
			f.write('<dt>Work Distribution (Lines)</dt><dd>Max: %.1f%%, Min: %.1f%%, Avg: %.1f%%</dd>' % 
				(max_lines_contrib, min_lines_contrib, avg_lines_contrib))
		
		f.write('</dl>')

		# Team Performance Rankings
		f.write(html_header(2, 'Team Performance Rankings'))
		
		# Top contributors by different metrics
		contrib_ranking = data.getAuthorsByContribution()
		efficiency_ranking = data.getAuthorsByEfficiency()
		consistency_ranking = data.getAuthorsByConsistency()
		leadership_ranking = data.getAuthorsByLeadership()
		
		f.write('<div class="rankings">')
		f.write('<div class="ranking-section">')
		f.write('<h3>Top Contributors (by Commit %)</h3>')
		f.write('<ol>')
		for author, percentage in contrib_ranking[:10]:
			f.write('<li>%s (%.1f%%)</li>' % (author, percentage))
		f.write('</ol>')
		f.write('</div>')
		
		f.write('<div class="ranking-section">')
		f.write('<h3>Most Efficient (by Quality Score)</h3>')
		f.write('<ol>')
		for author, score in efficiency_ranking[:10]:
			f.write('<li>%s (%.1f)</li>' % (author, score))
		f.write('</ol>')
		f.write('</div>')
		
		f.write('<div class="ranking-section">')
		f.write('<h3>Most Consistent</h3>')
		f.write('<ol>')
		for author, score in consistency_ranking[:10]:
			f.write('<li>%s (%.1f)</li>' % (author, score))
		f.write('</ol>')
		f.write('</div>')
		
		f.write('<div class="ranking-section">')
		f.write('<h3>Leadership Score</h3>')
		f.write('<ol>')
		for author, score in leadership_ranking[:10]:
			f.write('<li>%s (%.1f)</li>' % (author, score))
		f.write('</ol>')
		f.write('</div>')
		f.write('</div>')

		# Detailed Team Performance Table
		f.write(html_header(2, 'Detailed Team Performance Analysis'))
		f.write('<table class="team-performance sortable" id="team-performance">')
		f.write('<tr>')
		f.write('<th>Author</th>')
		f.write('<th>Commits</th>')
		f.write('<th>Contrib %</th>')
		f.write('<th>Lines Changed</th>')
		f.write('<th>Avg Commit Size</th>')
		f.write('<th>Efficiency</th>')
		f.write('<th>Consistency</th>')
		f.write('<th>Leadership</th>')
		f.write('<th>Overall Score</th>')
		f.write('<th>Assessment</th>')
		f.write('</tr>')
		
		team_performance = data.getTeamPerformance()
		commit_patterns = data.getCommitPatterns()
		
		# Sort by overall score
		sorted_authors = sorted(team_performance.items(), key=lambda x: x[1].get('overall_score', 0), reverse=True)
		
		for author, perf in sorted_authors:
			author_info = data.getAuthorInfo(author)
			patterns = commit_patterns.get(author, {})
			
			commits = author_info.get('commits', 0)
			lines_changed = author_info.get('lines_added', 0) + author_info.get('lines_removed', 0)
			contrib_pct = perf.get('contribution_percentage', 0)
			avg_commit_size = patterns.get('avg_commit_size', 0)
			efficiency = perf.get('efficiency_score', 0)
			consistency = perf.get('consistency', 0)
			leadership = perf.get('leadership_score', 0)
			overall = perf.get('overall_score', 0)
			
			# Generate assessment
			assessment = self._generateAssessment(perf, patterns)
			
			f.write('<tr>')
			f.write('<td>%s</td>' % author)
			f.write('<td>%d</td>' % commits)
			f.write('<td>%.1f%%</td>' % contrib_pct)
			f.write('<td>%d</td>' % lines_changed)
			f.write('<td>%.1f</td>' % avg_commit_size)
			f.write('<td>%.1f</td>' % efficiency)
			f.write('<td>%.1f</td>' % consistency)
			f.write('<td>%.1f</td>' % leadership)
			f.write('<td>%.1f</td>' % overall)
			f.write('<td>%s</td>' % assessment)
			f.write('</tr>')
		
		f.write('</table>')

		# Commit Patterns Analysis
		f.write(html_header(2, 'Commit Patterns Analysis'))
		
		commit_size_analysis = data.getCommitSizeAnalysis()
		
		f.write('<h3>Commit Size Distribution</h3>')
		f.write('<p><strong>Small commits (&lt;10 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_small'])
		f.write('<p><strong>Medium commits (10-100 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_medium'])
		f.write('<p><strong>Large commits (&gt;100 lines):</strong> %d commits</p>' % commit_size_analysis['overall_stats']['total_large'])
		
		if commit_size_analysis['small_commits_authors']:
			f.write('<h4>Authors with predominantly small commits (possible commit splitting):</h4>')
			f.write('<ul>')
			for author, ratio in commit_size_analysis['small_commits_authors']:
				f.write('<li>%s (%.1f%% small commits)</li>' % (author, ratio * 100))
			f.write('</ul>')
		
		if commit_size_analysis['large_commits_authors']:
			f.write('<h4>Authors with frequent large commits:</h4>')
			f.write('<ul>')
			for author, ratio in commit_size_analysis['large_commits_authors']:
				f.write('<li>%s (%.1f%% large commits)</li>' % (author, ratio * 100))
			f.write('</ul>')

		# Working Patterns Analysis
		f.write(html_header(2, 'Working Time Patterns'))
		
		working_patterns = data.getWorkingPatterns()
		
		f.write('<table class="working-patterns sortable" id="working-patterns">')
		f.write('<tr>')
		f.write('<th>Author</th>')
		f.write('<th>Night Worker<br>(22:00-06:00)</th>')
		f.write('<th>Weekend Worker</th>')
		f.write('<th>Early Bird<br>(05:00-09:00)</th>')
		f.write('<th>Regular Hours<br>(09:00-17:00)</th>')
		f.write('<th>Evening<br>(17:00-22:00)</th>')
		f.write('<th>Peak Hour</th>')
		f.write('<th>Peak Day</th>')
		f.write('</tr>')
		
		for author, patterns in working_patterns.items():
			total_commits = patterns.get('total_commits', 1)
			
			night_pct = (patterns.get('night_commits', 0) / total_commits) * 100
			weekend_pct = (patterns.get('weekend_commits', 0) / total_commits) * 100
			early_pct = (patterns.get('early_bird', 0) / total_commits) * 100
			workday_pct = (patterns.get('workday', 0) / total_commits) * 100
			evening_pct = (patterns.get('evening', 0) / total_commits) * 100
			
			# Find peak hour and day
			peak_hours = patterns.get('peak_hours', {})
			peak_days = patterns.get('peak_days', {})
			
			peak_hour = max(peak_hours.keys(), key=lambda k: peak_hours[k]) if peak_hours else 'N/A'
			peak_day = max(peak_days.keys(), key=lambda k: peak_days[k]) if peak_days else 'N/A'
			peak_day_name = WEEKDAYS[peak_day] if isinstance(peak_day, int) and 0 <= peak_day < 7 else peak_day
			
			f.write('<tr>')
			f.write('<td>%s</td>' % author)
			f.write('<td>%.1f%%</td>' % night_pct)
			f.write('<td>%.1f%%</td>' % weekend_pct)
			f.write('<td>%.1f%%</td>' % early_pct)
			f.write('<td>%.1f%%</td>' % workday_pct)
			f.write('<td>%.1f%%</td>' % evening_pct)
			f.write('<td>%s:00</td>' % peak_hour)
			f.write('<td>%s</td>' % peak_day_name)
			f.write('</tr>')
		
		f.write('</table>')

		# Impact Analysis
		f.write(html_header(2, 'Impact and Quality Analysis'))
		
		impact_analysis = data.getImpactAnalysis()
		critical_files = data.getCriticalFiles()
		
		f.write('<h3>Critical Files in Project (%d files identified)</h3>' % len(critical_files))
		if critical_files:
			f.write('<ul>')
			for critical_file in critical_files[:20]:  # Show first 20
				f.write('<li>%s</li>' % critical_file)
			f.write('</ul>')
			if len(critical_files) > 20:
				f.write('<p>... and %d more files</p>' % (len(critical_files) - 20))
		
		f.write('<h3>Author Impact Analysis</h3>')
		f.write('<table class="impact-analysis sortable" id="impact-analysis">')
		f.write('<tr>')
		f.write('<th>Author</th>')
		f.write('<th>Impact Score</th>')
		f.write('<th>Critical Files Touched</th>')
		f.write('<th>Bug Potential</th>')
		f.write('<th>High Impact Files</th>')
		f.write('<th>Assessment</th>')
		f.write('</tr>')
		
		# Sort by impact score
		sorted_impact = sorted(impact_analysis.items(), key=lambda x: x[1].get('impact_score', 0), reverse=True)
		
		for author, impact in sorted_impact:
			impact_score = impact.get('impact_score', 0)
			critical_files_touched = len(impact.get('critical_files', []))
			bug_potential = impact.get('bug_potential', 0)
			high_impact_files = len(impact.get('high_impact_files', []))
			
			# Generate impact assessment
			if impact_score > 200:
				impact_assessment = "Very High Impact"
			elif impact_score > 100:
				impact_assessment = "High Impact"
			elif impact_score > 50:
				impact_assessment = "Medium Impact"
			else:
				impact_assessment = "Low Impact"
			
			if bug_potential > 30:
				impact_assessment += " (High Bug Risk)"
			elif bug_potential > 15:
				impact_assessment += " (Medium Bug Risk)"
			
			f.write('<tr>')
			f.write('<td>%s</td>' % author)
			f.write('<td>%.1f</td>' % impact_score)
			f.write('<td>%d</td>' % critical_files_touched)
			f.write('<td>%.1f%%</td>' % bug_potential)
			f.write('<td>%d</td>' % high_impact_files)
			f.write('<td>%s</td>' % impact_assessment)
			f.write('</tr>')
		
		f.write('</table>')

		# Team Collaboration Analysis
		f.write(html_header(2, 'Team Collaboration Analysis'))
		
		collaboration_data = data.getTeamCollaboration()
		
		f.write('<table class="collaboration sortable" id="collaboration">')
		f.write('<tr>')
		f.write('<th>Author</th>')
		f.write('<th>Files Owned</th>')
		f.write('<th>Collaborators</th>')
		f.write('<th>Shared Files</th>')
		f.write('<th>Top Collaborations</th>')
		f.write('</tr>')
		
		for author, collab in collaboration_data.items():
			files_owned = len(collab.get('file_ownership', {}))
			worked_with = collab.get('worked_with', {})
			collaborators_count = len(worked_with)
			
			# Count total shared files
			shared_files = 0
			top_collabs = []
			
			for other_author, shared_files_dict in worked_with.items():
				shared_count = len(shared_files_dict)
				shared_files += shared_count
				top_collabs.append((other_author, shared_count))
			
			# Sort and take top 3 collaborations
			top_collabs.sort(key=lambda x: x[1], reverse=True)
			top_collabs_str = ', '.join([f"{author}({count})" for author, count in top_collabs[:3]])
			
			f.write('<tr>')
			f.write('<td>%s</td>' % author)
			f.write('<td>%d</td>' % files_owned)
			f.write('<td>%d</td>' % collaborators_count)
			f.write('<td>%d</td>' % shared_files)
			f.write('<td>%s</td>' % top_collabs_str)
			f.write('</tr>')
		
		f.write('</table>')

		f.write('</body></html>')
		f.close()

		###
		# Activity
		f = open(path + '/activity.html', 'w')
		self.printHeader(f)
		f.write('<h1>Activity</h1>')
		self.printNav(f)

		# Last 30 days
		f.write(html_header(2, 'Last 30 Days'))
		last_30_days = data.getLast30DaysActivity()
		f.write('<dl>')
		f.write('<dt>Commits</dt><dd>%d</dd>' % last_30_days['commits'])
		f.write('<dt>Lines added</dt><dd>%d</dd>' % last_30_days['lines_added'])
		f.write('<dt>Lines removed</dt><dd>%d</dd>' % last_30_days['lines_removed'])
		f.write('<dt>Net lines</dt><dd>%d</dd>' % (last_30_days['lines_added'] - last_30_days['lines_removed']))
		f.write('</dl>')

		# Last 12 months
		f.write(html_header(2, 'Last 12 Months'))
		last_12_months = data.getLast12MonthsActivity()
		if last_12_months['commits']:
			f.write('<table class="sortable" id="last12months">')
			f.write('<tr><th>Month</th><th>Commits</th><th>Lines Added</th><th>Lines Removed</th><th>Net Lines</th></tr>')
			
			# Sort months in reverse chronological order
			sorted_months = sorted(last_12_months['commits'].keys(), reverse=True)
			for month in sorted_months:
				commits = last_12_months['commits'][month]
				lines_added = last_12_months['lines_added'].get(month, 0)
				lines_removed = last_12_months['lines_removed'].get(month, 0)
				net_lines = lines_added - lines_removed
				
				f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' % 
					(month, commits, lines_added, lines_removed, net_lines))
			
			f.write('</table>')
		else:
			f.write('<p>No activity in the last 12 months.</p>')

		# Pace of Changes
		f.write(html_header(2, 'Pace of Changes'))
		f.write('<p>Number of line changes (additions + deletions) over time</p>')
		pace_data = data.getPaceOfChanges()
		if pace_data:
			f.write('<img src="pace_of_changes.png" alt="Pace of Changes">')
			
			# Generate pace of changes data file
			fg = open(path + '/pace_of_changes.dat', 'w')
			for stamp in sorted(pace_data.keys()):
				fg.write('%d %d\n' % (stamp, pace_data[stamp]))
			fg.close()
		else:
			f.write('<p>No pace data available.</p>')

		# Weekly activity
		WEEKS = 32
		f.write(html_header(2, 'Weekly activity'))
		f.write('<p>Last %d weeks</p>' % WEEKS)

		# generate weeks to show (previous N weeks from now)
		now = datetime.datetime.now()
		deltaweek = datetime.timedelta(7)
		weeks = []
		stampcur = now
		for i in range(0, WEEKS):
			weeks.insert(0, stampcur.strftime('%Y-%W'))
			stampcur -= deltaweek

		# top row: commits & bar
		f.write('<table class="noborders"><tr>')
		for i in range(0, WEEKS):
			commits = 0
			if weeks[i] in data.activity_by_year_week:
				commits = data.activity_by_year_week[weeks[i]]

			percentage = 0
			if weeks[i] in data.activity_by_year_week:
				percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
			height = max(1, int(200 * percentage))
			f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))

		# bottom row: year/week
		f.write('</tr><tr>')
		for i in range(0, WEEKS):
			f.write('<td>%s</td>' % (WEEKS - i))
		f.write('</tr></table>')

		# Hour of Day
		f.write(html_header(2, 'Hour of Day'))
		hour_of_day = data.getActivityByHourOfDay()
		f.write('<table><tr><th>Hour</th>')
		for i in range(0, 24):
			f.write('<th>%d</th>' % i)
		f.write('</tr>\n<tr><th>Commits</th>')
		fp = open(path + '/hour_of_day.dat', 'w')
		for i in range(0, 24):
			if i in hour_of_day:
				r = 127 + int((float(hour_of_day[i]) / hour_of_day_busiest) * 128)
				f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
				fp.write('%d %d\n' % (i, hour_of_day[i]))
			else:
				f.write('<td>0</td>')
				fp.write('%d 0\n' % i)
		fp.close()
		f.write('</tr>\n<tr><th>%</th>')
		totalcommits = total_commits
		for i in range(0, 24):
			if i in hour_of_day:
				r = 127 + int((float(hour_of_day[i]) / hour_of_day_busiest) * 128)
				percent = (100.0 * hour_of_day[i]) / totalcommits if totalcommits else 0.0
				f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, percent))
			else:
				f.write('<td>0.00</td>')
		f.write('</tr></table>')
		f.write('<img src="hour_of_day.png" alt="Hour of Day">')
		fg = open(path + '/hour_of_day.dat', 'w')
		for i in range(0, 24):
			if i in hour_of_day:
				fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
			else:
				fg.write('%d 0\n' % (i + 1))
		fg.close()

		# Day of Week
		f.write(html_header(2, 'Day of Week'))
		day_of_week = data.getActivityByDayOfWeek()
		f.write('<div class="vtable"><table>')
		f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
		fp = open(path + '/day_of_week.dat', 'w')
		for d in range(0, 7):
			commits = 0
			if d in day_of_week:
				commits = day_of_week[d]
			fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
			f.write('<tr>')
			f.write('<th>%s</th>' % (WEEKDAYS[d]))
			if d in day_of_week:
				percent = (100.0 * day_of_week[d]) / totalcommits if totalcommits else 0.0
				f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], percent))
			else:
				f.write('<td>0</td>')
			f.write('</tr>')
		f.write('</table></div>')
		f.write('<img src="day_of_week.png" alt="Day of Week">')
		fp.close()

		# Hour of Week
		f.write(html_header(2, 'Hour of Week'))
		f.write('<table>')

		f.write('<tr><th>Weekday</th>')
		for hour in range(0, 24):
			f.write('<th>%d</th>' % (hour))
		f.write('</tr>')

		for weekday in range(0, 7):
			f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
			for hour in range(0, 24):
				try:
					commits = data.activity_by_hour_of_week[weekday][hour]
				except KeyError:
					commits = 0
				if commits != 0:
					f.write('<td')
					r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
					f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
					f.write('>%d</td>' % commits)
				else:
					f.write('<td></td>')
			f.write('</tr>')

		f.write('</table>')

		# Month of Year
		f.write(html_header(2, 'Month of Year'))
		f.write('<div class="vtable"><table>')
		f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
		fp = open (path + '/month_of_year.dat', 'w')
		for mm in range(1, 13):
			commits = 0
			if mm in data.activity_by_month_of_year:
				commits = data.activity_by_month_of_year[mm]
			percent = (100.0 * commits) / total_commits if total_commits else 0.0
			f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, percent))
			fp.write('%d %d\n' % (mm, commits))
		fp.close()
		f.write('</table></div>')
		f.write('<img src="month_of_year.png" alt="Month of Year">')

		# Commits by year/month
		f.write(html_header(2, 'Commits by year/month'))
		f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
		for yymm in reversed(sorted(data.commits_by_month.keys())):
			f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
		f.write('</table></div>')
		f.write('<img src="commits_by_year_month.png" alt="Commits by year/month">')
		fg = open(path + '/commits_by_year_month.dat', 'w')
		for yymm in sorted(data.commits_by_month.keys()):
			fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
		fg.close()

		# Commits by year
		f.write(html_header(2, 'Commits by Year'))
		f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
		for yy in reversed(sorted(data.commits_by_year.keys())):
			commits = data.commits_by_year.get(yy, 0)
			percent = (100.0 * commits) / total_commits if total_commits else 0.0
			f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, commits, percent, data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
		f.write('</table></div>')
		f.write('<img src="commits_by_year.png" alt="Commits by Year">')
		fg = open(path + '/commits_by_year.dat', 'w')
		for yy in sorted(data.commits_by_year.keys()):
			fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
		fg.close()

		# Commits by timezone
		f.write(html_header(2, 'Commits by Timezone'))
		f.write('<table><tr>')
		f.write('<th>Timezone</th><th>Commits</th>')
		f.write('</tr>')
		max_commits_on_tz = max(data.commits_by_timezone.values()) if data.commits_by_timezone else 1
		for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
			commits = data.commits_by_timezone[i]
			r = 127 + int((float(commits) / max_commits_on_tz) * 128)
			f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
		f.write('</table>')

		f.write('</body></html>')
		f.close()

		###
		# Authors
		f = open(path + '/authors.html', 'w')
		self.printHeader(f)

		f.write('<h1>Authors</h1>')
		self.printNav(f)

		# Authors :: List of authors
		f.write(html_header(2, 'List of Authors'))

		f.write('<table class="authors sortable" id="authors">')
		f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
		for author in data.getAuthors(conf['max_authors']):
			info = data.getAuthorInfo(author)
			f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
		f.write('</table>')

		allauthors = data.getAuthors()
		if len(allauthors) > conf['max_authors']:
			rest = allauthors[conf['max_authors']:]
			f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))

		f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
		f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author">')
		if len(allauthors) > conf['max_authors']:
			f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])

		f.write(html_header(2, 'Commits per Author'))
		f.write('<img src="commits_by_author.png" alt="Commits per Author">')
		if len(allauthors) > conf['max_authors']:
			f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])

		fgl = open(path + '/lines_of_code_by_author.dat', 'w')
		fgc = open(path + '/commits_by_author.dat', 'w')

		lines_by_authors = {} # cumulated added lines by
		# author. to save memory,
		# changes_by_date_by_author[stamp][author] is defined
		# only at points where author commits.
		# lines_by_authors allows us to generate all the
		# points in the .dat file.

		# Don't rely on getAuthors to give the same order each
		# time. Be robust and keep the list in a variable.
		commits_by_authors = {} # cumulated added lines by

		self.authors_to_plot = data.getAuthors(conf['max_authors'])
		for author in self.authors_to_plot:
			lines_by_authors[author] = 0
			commits_by_authors[author] = 0
		for stamp in sorted(data.changes_by_date_by_author.keys()):
			fgl.write('%d' % stamp)
			fgc.write('%d' % stamp)
			for author in self.authors_to_plot:
				if author in data.changes_by_date_by_author[stamp]:
					lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
					commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
				fgl.write(' %d' % lines_by_authors[author])
				fgc.write(' %d' % commits_by_authors[author])
			fgl.write('\n')
			fgc.write('\n')
		fgl.close()
		fgc.close()

		# Authors :: Author of Month
		f.write(html_header(2, 'Author of Month'))
		f.write('<table class="sortable" id="aom">')
		f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
		for yymm in reversed(sorted(data.author_of_month.keys())):
			authordict = data.author_of_month[yymm]
			authors = getkeyssortedbyvalues(authordict)
			authors.reverse()
			commits = data.author_of_month[yymm][authors[0]]
			next = ', '.join(authors[1:conf['authors_top']+1])
			f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))

		f.write('</table>')

		f.write(html_header(2, 'Author of Year'))
		f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
		for yy in reversed(sorted(data.author_of_year.keys())):
			authordict = data.author_of_year[yy]
			authors = getkeyssortedbyvalues(authordict)
			authors.reverse()
			commits = data.author_of_year[yy][authors[0]]
			next = ', '.join(authors[1:conf['authors_top']+1])
			f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
		f.write('</table>')

		# Domains
		f.write(html_header(2, 'Commits by Domains'))
		domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
		domains_by_commits.reverse() # most first
		f.write('<div class="vtable"><table>')
		f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
		fp = open(path + '/domains.dat', 'w')
		n = 0
		for domain in domains_by_commits:
			if n == conf['max_domains']:
				break
			commits = 0
			n += 1
			info = data.getDomainInfo(domain)
			fp.write('%s %d %d\n' % (domain, n , info['commits']))
			percent = (100.0 * info['commits'] / total_commits) if total_commits else 0.0
			f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], percent))
		f.write('</table></div>')
		f.write('<img src="domains.png" alt="Commits by Domains">')
		fp.close()

		f.write('</body></html>')
		f.close()

		###
		# Branches
		f = open(path + '/branches.html', 'w')
		self.printHeader(f)
		f.write('<h1>Branches</h1>')
		self.printNav(f)

		# Branch summary
		branches = data.getBranches() if hasattr(data, 'getBranches') else {}
		unmerged_branches = data.getUnmergedBranches() if hasattr(data, 'getUnmergedBranches') else []
		main_branch = data.getMainBranch() if hasattr(data, 'getMainBranch') else 'master'
		
		f.write('<dl>')
		f.write('<dt>Total Branches</dt><dd>%d</dd>' % len(branches))
		if unmerged_branches:
			f.write('<dt>Unmerged Branches</dt><dd>%d</dd>' % len(unmerged_branches))
		f.write('<dt>Main Branch</dt><dd>%s</dd>' % main_branch)
		f.write('</dl>')

		if branches:
			# Branches :: All Branches
			f.write(html_header(2, 'All Branches'))
			f.write('<table class="branches sortable" id="branches">')
			f.write('<tr><th>Branch</th><th>Status</th><th>Commits</th><th>Lines Added</th><th>Lines Removed</th><th>Total Changes</th><th>Authors</th></tr>')
			
			# Sort branches by total changes (lines added + removed)
			sorted_branches = sorted(branches.items(), 
									key=lambda x: x[1].get('lines_added', 0) + x[1].get('lines_removed', 0), 
									reverse=True)
			
			for branch_name, branch_info in sorted_branches:
				status = 'Merged' if branch_info.get('is_merged', True) else 'Unmerged'
				commits = branch_info.get('commits', 0)
				lines_added = branch_info.get('lines_added', 0)
				lines_removed = branch_info.get('lines_removed', 0)
				total_changes = lines_added + lines_removed
				authors_count = len(branch_info.get('authors', {}))
				
				# Highlight unmerged branches
				row_class = 'class="unmerged"' if not branch_info.get('is_merged', True) else ''
				f.write('<tr %s><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' % 
						(row_class, branch_name, status, commits, lines_added, lines_removed, total_changes, authors_count))
			f.write('</table>')

			# Unmerged Branches Detail
			if unmerged_branches:
				f.write(html_header(2, 'Unmerged Branches Detail'))
				f.write('<p>These branches have not been merged into the main branch (%s) and may represent ongoing work or abandoned features.</p>' % main_branch)
				
				f.write('<table class="unmerged-branches sortable" id="unmerged">')
				f.write('<tr><th>Branch</th><th>Commits</th><th>Authors</th><th>Top Contributors</th><th>Lines Added</th><th>Lines Removed</th></tr>')
				
				unmerged_stats = data.getUnmergedBranchStats() if hasattr(data, 'getUnmergedBranchStats') else {}
				
				for branch_name in unmerged_branches:
					if branch_name in unmerged_stats:
						branch_info = unmerged_stats[branch_name]
						commits = branch_info.get('commits', 0)
						authors = branch_info.get('authors', {})
						lines_added = branch_info.get('lines_added', 0)
						lines_removed = branch_info.get('lines_removed', 0)
						
						# Get top contributors
						top_contributors = sorted(authors.items(), key=lambda x: x[1].get('commits', 0), reverse=True)[:3]
						contributors_str = ', '.join([f"{author} ({info.get('commits', 0)})" for author, info in top_contributors])
						
						f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%s</td><td>%d</td><td>%d</td></tr>' % 
								(branch_name, commits, len(authors), contributors_str, lines_added, lines_removed))
				f.write('</table>')

			# Branch Activity by Author
			f.write(html_header(2, 'Branch Activity by Author'))
			f.write('<p>This table shows which authors have contributed to which branches.</p>')
			
			# Collect all unique authors across all branches
			all_authors = set()
			for branch_info in branches.values():
				all_authors.update(branch_info.get('authors', {}).keys())
			
			if all_authors and len(branches) > 1:
				f.write('<table class="branch-authors sortable" id="branch-authors">')
				header = '<tr><th>Author</th>'
				for branch_name in sorted(branches.keys()):
					header += '<th>%s</th>' % branch_name
				header += '<th>Total Branches</th></tr>'
				f.write(header)
				
				for author in sorted(all_authors):
					row = '<tr><td>%s</td>' % author
					branch_count = 0
					for branch_name in sorted(branches.keys()):
						branch_authors = branches[branch_name].get('authors', {})
						if author in branch_authors:
							commits = branch_authors[author].get('commits', 0)
							row += '<td>%d</td>' % commits
							branch_count += 1
						else:
							row += '<td>-</td>'
					row += '<td>%d</td></tr>' % branch_count
					f.write(row)
				f.write('</table>')

		f.write('</body></html>')
		f.close()

		###
		# Files
		f = open(path + '/files.html', 'w')
		self.printHeader(f)
		f.write('<h1>Files</h1>')
		self.printNav(f)

		f.write('<dl>\n')
		f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
		f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
		try:
			avg_size = data.getAverageFileSize()
			f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % avg_size)
		except (AttributeError, ZeroDivisionError):
			# Fallback to old calculation if new method fails
			avg_size = float(data.getTotalSize()) / data.getTotalFiles() if data.getTotalFiles() else 0.0
			f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % avg_size)
		try:
			avg_revisions = data.getAverageRevisionsPerFile()
			f.write('<dt>Average revisions per file</dt><dd>%.2f</dd>' % avg_revisions)
		except AttributeError:
			pass
		f.write('</dl>\n')

		# Files :: File count by date
		f.write(html_header(2, 'File count by date'))

		# use set to get rid of duplicate/unnecessary entries
		files_by_date = set()
		for stamp in sorted(data.files_by_stamp.keys()):
			files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))

		fg = open(path + '/files_by_date.dat', 'w')
		for line in sorted(list(files_by_date)):
			fg.write('%s\n' % line)
		#for stamp in sorted(data.files_by_stamp.keys()):
		#	fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
		fg.close()
			
		f.write('<img src="files_by_date.png" alt="Files by Date">')

		#f.write('<h2>Average file size by date</h2>')

		# Files :: Extensions
		f.write(html_header(2, 'Extensions'))
		f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
		for ext in sorted(data.extensions.keys()):
			files = data.extensions[ext]['files']
			lines = data.extensions[ext]['lines']
			loc_percentage = (100.0 * lines) / data.getTotalLOC() if data.getTotalLOC() else 0.0
			files_percentage = (100.0 * files) / data.getTotalFiles() if data.getTotalFiles() else 0.0
			lines_per_file = (lines // files) if files else 0
			f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, files_percentage, lines, loc_percentage, lines_per_file))
		f.write('</table>')

		# SLOC Breakdown by Extension
		f.write(html_header(2, 'Source Lines of Code (SLOC) Breakdown'))
		f.write('<table class="sortable" id="sloc"><tr><th>Extension</th><th>Source Lines (%)</th><th>Comment Lines (%)</th><th>Blank Lines (%)</th><th>Total Lines</th></tr>')
		sloc_data = data.getSLOCByExtension()
		for ext in sorted(sloc_data.keys()):
			if sloc_data[ext]['total'] == 0:
				continue
			source = sloc_data[ext]['source']
			comments = sloc_data[ext]['comments']
			blank = sloc_data[ext]['blank']
			total = sloc_data[ext]['total']
			source_pct = (100.0 * source / total) if total else 0.0
			comment_pct = (100.0 * comments / total) if total else 0.0
			blank_pct = (100.0 * blank / total) if total else 0.0
			f.write('<tr><td>%s</td><td>%d (%.1f%%)</td><td>%d (%.1f%%)</td><td>%d (%.1f%%)</td><td>%d</td></tr>' % 
				(ext, source, source_pct, comments, comment_pct, blank, blank_pct, total))
		f.write('</table>')

		# Largest Files
		try:
			largest_files = data.getLargestFiles(15)
			if largest_files:
				f.write(html_header(2, 'Largest Files'))
				f.write('<table class="sortable" id="largest_files"><tr><th>File</th><th>Size (bytes)</th><th>Size (KB)</th></tr>')
				for filepath, size in largest_files:
					size_kb = size / 1024.0
					f.write('<tr><td>%s</td><td>%d</td><td>%.1f</td></tr>' % (filepath, size, size_kb))
				f.write('</table>')
		except (AttributeError, TypeError):
			pass

		# Files with Most Revisions (Hotspots)
		try:
			hotspot_files = data.getFilesWithMostRevisions(15)
			if hotspot_files:
				f.write(html_header(2, 'Files with Most Revisions (Hotspots)'))
				f.write('<table class="sortable" id="hotspot_files"><tr><th>File</th><th>Revisions</th><th>% of Total Commits</th></tr>')
				total_commits = data.getTotalCommits()
				for filepath, revisions in hotspot_files:
					revision_pct = (100.0 * revisions / total_commits) if total_commits else 0.0
					f.write('<tr><td>%s</td><td>%d</td><td>%.2f%%</td></tr>' % (filepath, revisions, revision_pct))
				f.write('</table>')
		except (AttributeError, TypeError):
			pass

		# Directory Activity
		try:
			active_directories = data.getDirectoriesByActivity(15)
			if active_directories:
				f.write(html_header(2, 'Most Active Directories'))
				f.write('<table class="sortable" id="active_directories"><tr><th>Directory</th><th>Total Lines Changed</th><th>Lines Added</th><th>Lines Removed</th><th>Files</th></tr>')
				for directory, total_lines, lines_added, lines_removed, file_count in active_directories:
					directory_display = directory if directory != '.' else '(root)'
					f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (directory_display, total_lines, lines_added, lines_removed, file_count))
				f.write('</table>')
		except (AttributeError, TypeError):
			pass

		f.write('</body></html>')
		f.close()

		###
		# Lines
		f = open(path + '/lines.html', 'w')
		self.printHeader(f)
		f.write('<h1>Lines</h1>')
		self.printNav(f)

		f.write('<dl>\n')
		f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
		f.write('<dt>Source lines</dt><dd>%d (%.1f%%)</dd>' % (
			data.getTotalSourceLines(), 
			(100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
		))
		f.write('<dt>Comment lines</dt><dd>%d (%.1f%%)</dd>' % (
			data.getTotalCommentLines(),
			(100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
		))
		f.write('<dt>Blank lines</dt><dd>%d (%.1f%%)</dd>' % (
			data.getTotalBlankLines(),
			(100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0
		))
		f.write('</dl>\n')

		f.write(html_header(2, 'Lines of Code'))
		f.write('<p>This chart shows the total lines of code over time, including source code, comments, and blank lines.</p>')
		f.write('<img src="lines_of_code.png" alt="Lines of Code">')

		fg = open(path + '/lines_of_code.dat', 'w')
		for stamp in sorted(data.changes_by_date.keys()):
			fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
		fg.close()

		# Add SLOC composition chart data
		f.write(html_header(2, 'Source Lines of Code (SLOC) Composition'))
		f.write('<p>Breakdown of code composition by file type and content type:</p>')
		sloc_data = data.getSLOCByExtension()
		if sloc_data:
			f.write('<table class="sortable" id="sloc_breakdown">')
			f.write('<tr><th>Extension</th><th>Source Lines</th><th>Comment Lines</th><th>Blank Lines</th><th>Total</th><th>Source %</th><th>Comment %</th></tr>')
			
			sorted_sloc = sorted(sloc_data.items(), key=lambda x: x[1]['total'], reverse=True)
			for ext, sloc_info in sorted_sloc[:15]:  # Top 15 extensions
				if sloc_info['total'] == 0:
					continue
				
				ext_display = ext if ext else '(no extension)'
				source_pct = (100.0 * sloc_info['source'] / sloc_info['total']) if sloc_info['total'] else 0.0
				comment_pct = (100.0 * sloc_info['comments'] / sloc_info['total']) if sloc_info['total'] else 0.0
				
				f.write('<tr>')
				f.write('<td>%s</td>' % ext_display)
				f.write('<td>%d</td>' % sloc_info['source'])
				f.write('<td>%d</td>' % sloc_info['comments'])
				f.write('<td>%d</td>' % sloc_info['blank'])
				f.write('<td>%d</td>' % sloc_info['total'])
				f.write('<td>%.1f%%</td>' % source_pct)
				f.write('<td>%.1f%%</td>' % comment_pct)
				f.write('</tr>')
			
			f.write('</table>')
		else:
			f.write('<p>No SLOC data available.</p>')

		f.write('</body></html>')
		f.close()

		###
		# tags.html
		f = open(path + '/tags.html', 'w')
		self.printHeader(f)
		f.write('<h1>Tags</h1>')
		self.printNav(f)

		f.write('<dl>')
		f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
		if len(data.tags) > 0:
			f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
		f.write('</dl>')

		f.write('<table class="tags">')
		f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
		# sort the tags by date desc
		tags_sorted_by_date_desc = list(map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items())))))
		for tag in tags_sorted_by_date_desc:
			authorinfo = []
			self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
			for i in reversed(self.authors_by_commits):
				authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
			f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
		f.write('</table>')

		f.write('</body></html>')
		f.close()

		self.createGraphs(path)
	
	def _generateAssessment(self, performance, patterns):
		"""Generate a text assessment for an author based on their performance metrics."""
		efficiency = performance.get('efficiency_score', 0)
		consistency = performance.get('consistency', 0)
		leadership = performance.get('leadership_score', 0)
		contribution = performance.get('contribution_percentage', 0)
		
		small_commits_ratio = patterns.get('small_commits', 0) / max(patterns.get('total_commits', 1), 1)
		large_commits_ratio = patterns.get('large_commits', 0) / max(patterns.get('total_commits', 1), 1)
		
		assessments = []
		
		# Contribution level
		if contribution > 25:
			assessments.append("Major Contributor")
		elif contribution > 10:
			assessments.append("Regular Contributor")
		elif contribution > 2:
			assessments.append("Minor Contributor")
		else:
			assessments.append("Occasional Contributor")
		
		# Quality assessment
		if efficiency > 80:
			assessments.append("High Quality")
		elif efficiency > 60:
			assessments.append("Good Quality")
		elif efficiency > 40:
			assessments.append("Average Quality")
		else:
			assessments.append("Needs Improvement")
		
		# Work pattern assessment
		if small_commits_ratio > 0.7:
			assessments.append("Frequent Small Commits")
		elif large_commits_ratio > 0.3:
			assessments.append("Prefers Large Commits")
		
		if consistency > 80:
			assessments.append("Very Consistent")
		elif consistency > 60:
			assessments.append("Consistent")
		
		if leadership > 70:
			assessments.append("Leadership Role")
		elif leadership > 50:
			assessments.append("Collaborative")
		
		return ", ".join(assessments) if assessments else "Standard Contributor"
	
	def createGraphs(self, path):
		print('Generating graphs...')

		# hour of day
		f = open(path + '/hour_of_day.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'hour_of_day.png'
unset key
set xrange [0.5:24.5]
set yrange [0:]
set xtics 4
set grid y
set ylabel "Commits"
plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
""")
		f.close()

		# day of week
		f = open(path + '/day_of_week.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'day_of_week.png'
unset key
set xrange [0.5:7.5]
set yrange [0:]
set xtics 1
set grid y
set ylabel "Commits"
plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
""")
		f.close()

		# Domains
		f = open(path + '/domains.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'domains.png'
unset key
unset xtics
set yrange [0:]
set grid y
set ylabel "Commits"
plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
""")
		f.close()

		# Month of Year
		f = open(path + '/month_of_year.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'month_of_year.png'
unset key
set xrange [0.5:12.5]
set yrange [0:]
set xtics 1
set grid y
set ylabel "Commits"
plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
""")
		f.close()

		# commits_by_year_month
		f = open(path + '/commits_by_year_month.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'commits_by_year_month.png'
unset key
set yrange [0:]
set xdata time
set timefmt "%Y-%m"
set format x "%Y-%m"
set xtics rotate
set bmargin 5
set grid y
set ylabel "Commits"
plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
""")
		f.close()

		# commits_by_year
		f = open(path + '/commits_by_year.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'commits_by_year.png'
unset key
set yrange [0:]
set xtics 1 rotate
set grid y
set ylabel "Commits"
set yrange [0:]
plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
""")
		f.close()

		# Files by date
		f = open(path + '/files_by_date.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'files_by_date.png'
unset key
set yrange [0:]
set xdata time
set timefmt "%Y-%m-%d"
set format x "%Y-%m-%d"
set grid y
set ylabel "Files"
set xtics rotate
set ytics autofreq
set bmargin 6
plot 'files_by_date.dat' using 1:2 w steps
""")
		f.close()

		# Lines of Code
		f = open(path + '/lines_of_code.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'lines_of_code.png'
unset key
set yrange [0:]
set xdata time
set timefmt "%s"
set format x "%Y-%m-%d"
set grid y
set ylabel "Lines"
set xtics rotate
set bmargin 6
plot 'lines_of_code.dat' using 1:2 w lines
""")
		f.close()

		# Lines of Code Added per author
		f = open(path + '/lines_of_code_by_author.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set terminal png transparent size 640,480
set output 'lines_of_code_by_author.png'
set key left top
set yrange [0:]
set xdata time
set timefmt "%s"
set format x "%Y-%m-%d"
set grid y
set ylabel "Lines"
set xtics rotate
set bmargin 6
plot """
)
		i = 1
		plots = []
		for a in self.authors_to_plot:
			i = i + 1
			author = a.replace("\"", "\\\"").replace("`", "")
			plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
		f.write(", ".join(plots))
		f.write('\n')

		f.close()

		# Commits per author
		f = open(path + '/commits_by_author.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set terminal png transparent size 640,480
set output 'commits_by_author.png'
set key left top
set yrange [0:]
set xdata time
set timefmt "%s"
set format x "%Y-%m-%d"
set grid y
set ylabel "Commits"
set xtics rotate
set bmargin 6
plot """
)
		i = 1
		plots = []
		for a in self.authors_to_plot:
			i = i + 1
			author = a.replace("\"", "\\\"").replace("`", "")
			plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
		f.write(", ".join(plots))
		f.write('\n')

		f.close()

		# Pace of Changes plot
		f = open(path + '/pace_of_changes.plot', 'w')
		f.write(GNUPLOT_COMMON)
		f.write(
"""
set output 'pace_of_changes.png'
unset key
set yrange [0:]
set xdata time
set timefmt "%s"
set format x "%Y-%m-%d"
set grid y
set ylabel "Line Changes (Additions + Deletions)"
set xtics rotate
set bmargin 6
plot 'pace_of_changes.dat' using 1:2 w lines lw 2
""")
		f.close()

		os.chdir(path)
		files = glob.glob(path + '/*.plot')
		for f in files:
			out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
			if len(out) > 0:
				print(out)

	def printHeader(self, f, title = ''):
		f.write(
"""<!DOCTYPE html>
<html>
<head>
	<meta charset="UTF-8">
	<title>GitStats - %s</title>
	<link rel="stylesheet" href="%s" type="text/css">
	<meta name="generator" content="GitStats %s">
	<script type="text/javascript" src="sortable.js"></script>
</head>
<body>
""" % (self.title, conf['style'], getversion()))

	def printNav(self, f):
		f.write("""
<div class="nav">
<ul>
<li><a href="index.html">General</a></li>
<li><a href="activity.html">Activity</a></li>
<li><a href="authors.html">Authors</a></li>
<li><a href="team_analysis.html">Team Analysis</a></li>
<li><a href="branches.html">Branches</a></li>
<li><a href="files.html">Files</a></li>
<li><a href="lines.html">Lines</a></li>
<li><a href="tags.html">Tags</a></li>
</ul>
</div>
""")

class PDFReportCreator(ReportCreator):
	"""Creates PDF reports using fpdf2 library with embedded charts and tab-based structure."""
	
	def __init__(self):
		ReportCreator.__init__(self)
		self.pdf = None
		self.output_path = None
		# Define color schemes for better visual appeal
		self.colors = {
			'header': (41, 128, 185),    # Blue
			'text': (0, 0, 0),           # Black
			'table_header': (52, 152, 219), # Light blue
			'table_alt': (245, 245, 245)    # Light gray
		}
	
	def _set_color(self, color_type='text', fill=False):
		"""Set text or fill color using predefined color scheme."""
		if color_type in self.colors:
			r, g, b = self.colors[color_type]
			if fill:
				self.pdf.set_fill_color(r, g, b)
			else:
				self.pdf.set_text_color(r, g, b)
	
	def _add_section_header(self, title, level=1):
		"""Add a standardized section header with consistent formatting."""
		# Add some space before header
		self.pdf.ln(h=10)
		
		# Set header color and font
		self._set_color('header')
		if level == 1:
			self.pdf.set_font('helvetica', 'B', 20)
			height = 15
		elif level == 2:
			self.pdf.set_font('helvetica', 'B', 16)
			height = 12
		else:
			self.pdf.set_font('helvetica', 'B', 14)
			height = 10
		
		# Add the header
		self.pdf.cell(0, height, title, 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Reset color to text
		self._set_color('text')
		self.pdf.ln(h=5)  # Small gap after header
	
	def _create_table_header(self, headers, widths=None, font_size=9):
		"""Create a standardized table header with consistent formatting."""
		if widths is None:
			# Auto-calculate widths if not provided
			total_width = 180  # Reasonable default
			widths = [total_width // len(headers)] * len(headers)
		
		# Set header styling
		self._set_color('table_header')
		self._set_color('table_header', fill=True)
		self.pdf.set_font('helvetica', 'B', font_size)
		
		# Create header cells
		for i, (header, width) in enumerate(zip(headers, widths)):
			is_last = (i == len(headers) - 1)
			new_x = XPos.LMARGIN if is_last else XPos.RIGHT
			new_y = YPos.NEXT if is_last else YPos.TOP
			
			self.pdf.cell(width, 8, str(header), 1, 
						 new_x=new_x, new_y=new_y, align='C', fill=True)
		
		# Reset styling for table content
		self._set_color('text')
		self.pdf.set_font('helvetica', '', font_size - 1)
	
	def _create_table_row(self, values, widths, alternate_row=False, font_size=8):
		"""Create a table row with optional alternating background."""
		if alternate_row:
			self._set_color('table_alt', fill=True)
		
		for i, (value, width) in enumerate(zip(values, widths)):
			is_last = (i == len(values) - 1)
			new_x = XPos.LMARGIN if is_last else XPos.RIGHT
			new_y = YPos.NEXT if is_last else YPos.TOP
			
			# Truncate long values to fit
			str_value = str(value)
			if len(str_value) > width // 3:  # Rough character width estimation
				str_value = str_value[:width//3-2] + '...'
			
			self.pdf.cell(width, 6, str_value, 1,
						 new_x=new_x, new_y=new_y, align='C', fill=alternate_row)
	
	def create(self, data, path):
		ReportCreator.create(self, data, path)
		self.title = data.projectname
		self.output_path = path
		
		# Initialize PDF document with fpdf2 features
		self.pdf = FPDF()
		self.pdf.set_auto_page_break(auto=True, margin=15)
		
		# Set metadata for better PDF properties
		self.pdf.set_title(f"GitStats Report - {data.projectname}")
		self.pdf.set_author("GitStats")
		self.pdf.set_subject(f"Git repository analysis for {data.projectname}")
		self.pdf.set_creator("GitStats with fpdf2")
		self.pdf.set_keywords("git,statistics,analysis,repository")
		
		# Create all pages (tabs)
		self._create_title_page(data)
		self._create_general_page(data)
		self._create_activity_page(data)
		self._create_authors_page(data)
		self._create_team_analysis_page(data)
		self._create_files_page(data)
		self._create_lines_page(data)
		self._create_tags_page(data)
		self._create_branches_page(data)
		
		# Save PDF with fpdf2's enhanced output method
		pdf_path = os.path.join(path, f"gitstats_{data.projectname.replace(' ', '_')}.pdf")
		
		# Use fpdf2's output method with proper file handling
		try:
			self.pdf.output(pdf_path)
			print(f"PDF report saved to: {pdf_path}")
			# Verify file was created and has content
			if os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0:
				print(f"PDF file size: {os.path.getsize(pdf_path)} bytes")
			else:
				print("Warning: PDF file was not created properly")
		except Exception as e:
			print(f"Error saving PDF: {e}")
			raise
	
	def _add_chart_if_exists(self, chart_filename, width=None, height=None):
		"""Add a chart image to the PDF if it exists, with improved fpdf2 handling."""
		chart_path = os.path.join(self.output_path, chart_filename)
		if os.path.exists(chart_path):
			try:
				# Get current position
				x = self.pdf.get_x()
				y = self.pdf.get_y()
				
				# Calculate dimensions with better defaults
				if width is None:
					width = 150  # Default width
				if height is None:
					height = 80  # Default height
				
				# Get page dimensions for better space calculation
				page_width = self.pdf.w
				page_height = self.pdf.h
				margin = 15  # Same as auto_page_break margin
				
				# Check if there's enough space on current page
				if y + height > (page_height - margin):
					self.pdf.add_page()
					x = self.pdf.get_x()
					y = self.pdf.get_y()
				
				# Add image with fpdf2's enhanced image handling
				# fpdf2 automatically handles different image formats
				self.pdf.image(chart_path, x=x, y=y, w=width, h=height)
				
				# Move cursor below image with better spacing
				self.pdf.set_y(y + height + 8)  # Increased spacing for better layout
				
				return True
			except Exception as e:
				print(f"Warning: Could not add chart {chart_filename}: {e}")
				return False
		return False
	
	def _create_title_page(self, data):
		"""Create the title page of the PDF report."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 24)
		self.pdf.cell(0, 20, f'GitStats Report - {data.projectname}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		self.pdf.ln(h=10)
		self.pdf.set_font('helvetica', '', 12)
		format = '%Y-%m-%d %H:%M:%S'
		
		# Report generation info
		self.pdf.cell(0, 10, f'Generated: {datetime.datetime.now().strftime(format)}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
		self.pdf.cell(0, 10, f'Generator: GitStats (version {getversion()})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
		self.pdf.cell(0, 10, f'Git Version: {getgitversion()}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
		if getgnuplotversion():
			self.pdf.cell(0, 10, f'Gnuplot Version: {getgnuplotversion()}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
		
		self.pdf.ln(h=10)
		self.pdf.cell(0, 10, f'Report Period: {data.getFirstCommitDate().strftime(format)} to {data.getLastCommitDate().strftime(format)}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT)
		
		# Table of contents
		self.pdf.ln(h=15)
		self.pdf.set_font('helvetica', 'B', 16)
		self.pdf.cell(0, 10, 'Table of Contents', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.set_font('helvetica', '', 12)
		sections = [
			'1. General Statistics',
			'2. Activity Statistics', 
			'3. Authors Statistics',
			'4. Team Analysis',
			'5. Files Statistics',
			'6. Lines of Code Statistics',
			'7. Tags Statistics',
			'8. Branches Statistics'
		]
		
		for section in sections:
			self.pdf.cell(0, 8, section, 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
	
	def _create_general_page(self, data):
		"""Create the general statistics page (mirrors index.html)."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 20)
		self.pdf.cell(0, 15, '1. General Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.set_font('helvetica', '', 12)
		
		# Calculate basic stats
		total_commits = data.getTotalCommits()
		total_active_days = len(data.getActiveDays()) if hasattr(data, 'getActiveDays') else 0
		delta_days = data.getCommitDeltaDays() if hasattr(data, 'getCommitDeltaDays') else 0
		total_authors = data.getTotalAuthors()
		
		# General statistics (matching index.html exactly)
		stats = [
			('Project name', data.projectname),
			('Generated', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
			('Report Period', f"{data.getFirstCommitDate().strftime('%Y-%m-%d %H:%M:%S')} to {data.getLastCommitDate().strftime('%Y-%m-%d %H:%M:%S')}"),
			('Age', f"{delta_days} days, {total_active_days} active days ({(100.0 * total_active_days / delta_days) if delta_days else 0.0:.2f}%)"),
			('Total Files', str(data.getTotalFiles())),
			('Total Lines of Code', f"{data.getTotalLOC()} ({data.total_lines_added} added, {data.total_lines_removed} removed)"),
			('Source Lines of Code', f"{data.getTotalSourceLines()} ({(100.0 * data.getTotalSourceLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
			('Comment Lines', f"{data.getTotalCommentLines()} ({(100.0 * data.getTotalCommentLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
			('Blank Lines', f"{data.getTotalBlankLines()} ({(100.0 * data.getTotalBlankLines() / data.getTotalLOC()) if data.getTotalLOC() else 0.0:.1f}%)"),
			('Total Commits', f"{total_commits} (average {(float(total_commits) / total_active_days) if total_active_days else 0.0:.1f} commits per active day, {(float(total_commits) / delta_days) if delta_days else 0.0:.1f} per all days)"),
			('Authors', f"{total_authors} (average {(float(total_commits) / total_authors) if total_authors else 0.0:.1f} commits per author)"),
			('Total Branches', str(len(data.getBranches()))),
			('Unmerged Branches', str(len(data.getUnmergedBranches()))),
			('Main Branch', data.main_branch if hasattr(data, 'main_branch') else 'N/A')
		]
		
		# Display stats
		for label, value in stats:
			self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.ln(h=10)
	
	def _create_activity_page(self, data):
		"""Create the activity statistics page with charts (mirrors activity.html)."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 20)
		self.pdf.cell(0, 15, '2. Activity Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Weekly activity section
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Weekly Activity', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self.pdf.set_font('helvetica', '', 10)
		self.pdf.cell(0, 6, 'Last 32 weeks activity (see chart below)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self.pdf.ln(h=5)
		
		# Hour of Day section
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Hour of Day', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.set_font('helvetica', '', 10)
		hour_of_day = data.getActivityByHourOfDay()
		total_commits = data.getTotalCommits()
		
		# Create hour of day table
		self.pdf.set_font('helvetica', 'B', 8)
		self.pdf.cell(20, 6, 'Hour', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		for h in range(0, 24):
			self.pdf.cell(7, 6, str(h), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.ln()
		
		self.pdf.cell(20, 6, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		for h in range(0, 24):
			commits = hour_of_day.get(h, 0)
			self.pdf.cell(7, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.ln()
		
		self.pdf.cell(20, 6, '%', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		for h in range(0, 24):
			commits = hour_of_day.get(h, 0)
			percent = (100.0 * commits / total_commits) if total_commits else 0.0
			self.pdf.cell(7, 6, f"{percent:.1f}", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.ln(h=10)
		
		# Add hour of day chart
		self._add_chart_if_exists('hour_of_day.png', 180, 90)
		
		# Day of Week section
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Day of Week', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.set_font('helvetica', '', 10)
		day_of_week = data.getActivityByDayOfWeek()
		
		# Create day of week table
		self.pdf.set_font('helvetica', 'B', 10)
		self.pdf.cell(30, 8, 'Day', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(30, 8, 'Total (%)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		self.pdf.set_font('helvetica', '', 10)
		for d in range(0, 7):
			day_name = WEEKDAYS[d]
			commits = day_of_week.get(d, 0)
			percent = (100.0 * commits / total_commits) if total_commits else 0.0
			self.pdf.cell(30, 6, day_name, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(30, 6, f"{commits} ({percent:.2f}%)", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.ln(h=5)
		self._add_chart_if_exists('day_of_week.png', 180, 90)
		
		# Month of Year section  
		if hasattr(data, 'activity_by_month_of_year'):
			self.pdf.set_font('helvetica', 'B', 14) 
			self.pdf.cell(0, 10, 'Month of Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			
			self.pdf.set_font('helvetica', 'B', 10)
			self.pdf.cell(30, 8, 'Month', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(40, 8, 'Commits (%)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
			
			self.pdf.set_font('helvetica', '', 10)
			for mm in range(1, 13):
				commits = data.activity_by_month_of_year.get(mm, 0)
				percent = (100.0 * commits / total_commits) if total_commits else 0.0
				self.pdf.cell(30, 6, str(mm), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(40, 6, f"{commits} ({percent:.2f} %)", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			
			self.pdf.ln(h=5)
			self._add_chart_if_exists('month_of_year.png', 180, 90)
		
		# Add page break for next major chart
		if self.pdf.get_y() > 200:
			self.pdf.add_page()
		
		# Commits by year/month chart
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Commits by Year/Month', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self._add_chart_if_exists('commits_by_year_month.png', 180, 100)
		
		# Commits by year chart 
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Commits by Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self._add_chart_if_exists('commits_by_year.png', 180, 100)
	
	def _create_authors_page(self, data):
		"""Create the authors statistics page with charts (mirrors authors.html)."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 20)
		self.pdf.cell(0, 15, '3. Authors Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# List of Authors table
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'List of Authors', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		authors = data.getAuthors(conf['max_authors'])
		
		# Table header
		self.pdf.set_font('helvetica', 'B', 8)
		self.pdf.cell(35, 6, 'Author', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(20, 6, 'Commits (%)', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(15, 6, '+ lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(15, 6, '- lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 6, 'First commit', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 6, 'Last commit', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(20, 6, 'Age', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(15, 6, 'Active days', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		# Table data
		self.pdf.set_font('helvetica', '', 7)
		for author in authors[:20]:  # Top 20 authors
			info = data.getAuthorInfo(author)
			
			# Truncate long author names
			display_author = author[:18] + "..." if len(author) > 21 else author
			
			self.pdf.cell(35, 5, display_author, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(20, 5, f"{info['commits']} ({info['commits_frac']:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(15, 5, str(info['lines_added']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(15, 5, str(info['lines_removed']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 5, info['date_first'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 5, info['date_last'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			
			# Calculate age
			try:
				age_days = (datetime.datetime.strptime(info['date_last'][:10], '%Y-%m-%d') - 
						   datetime.datetime.strptime(info['date_first'][:10], '%Y-%m-%d')).days
				age_text = f"{age_days} days" if age_days > 0 else "1 day"
			except:
				age_text = "N/A"
			
			active_days = len(info.get('active_days', [0])) if 'active_days' in info else 1
			
			self.pdf.cell(20, 5, age_text[:12], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(15, 5, str(active_days), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		self.pdf.ln(h=10)
		
		# Lines of code by author chart
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Cumulated Added Lines of Code per Author', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self._add_chart_if_exists('lines_of_code_by_author.png', 180, 110)
		
		# Commits per author chart
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Commits per Author', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self._add_chart_if_exists('commits_by_author.png', 180, 110)
		
		# Commits by domains chart
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Commits by Domains', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self._add_chart_if_exists('domains.png', 180, 100)
	
	def _create_team_analysis_page(self, data):
		"""Create the team analysis page for comprehensive team evaluation (new feature)."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 20)
		self.pdf.cell(0, 15, '4. Team Analysis', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Team Overview
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Team Overview', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.set_font('helvetica', '', 12)
		total_authors = data.getTotalAuthors()
		work_distribution = data.getTeamWorkDistribution()
		
		self.pdf.cell(50, 8, 'Total Team Members:', 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
		self.pdf.cell(0, 8, str(total_authors), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Calculate work distribution metrics
		commit_contributions = [dist['commit_percentage'] for dist in work_distribution.values()]
		if commit_contributions:
			max_contrib = max(commit_contributions)
			min_contrib = min(commit_contributions)
			avg_contrib = sum(commit_contributions) / len(commit_contributions)
			
			self.pdf.cell(50, 8, 'Work Distribution:', 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(0, 8, f'Max: {max_contrib:.1f}%, Min: {min_contrib:.1f}%, Avg: {avg_contrib:.1f}%', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.ln(h=10)
		
		# Team Performance Rankings
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Team Performance Rankings', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Top Contributors
		contrib_ranking = data.getAuthorsByContribution()
		efficiency_ranking = data.getAuthorsByEfficiency()
		
		self.pdf.set_font('helvetica', 'B', 12)
		self.pdf.cell(0, 8, 'Top 10 Contributors (by commit percentage):', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self.pdf.set_font('helvetica', '', 10)
		
		for i, (author, percentage) in enumerate(contrib_ranking[:10], 1):
			display_author = author[:30] + "..." if len(author) > 33 else author
			self.pdf.cell(0, 6, f'{i}. {display_author} ({percentage:.1f}%)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.ln(h=5)
		
		# Team Performance Table
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Detailed Performance Analysis', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		team_performance = data.getTeamPerformance()
		commit_patterns = data.getCommitPatterns()
		
		# Table header
		self.pdf.set_font('helvetica', 'B', 8)
		self.pdf.cell(35, 6, 'Author', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(20, 6, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(20, 6, 'Contrib %', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 6, 'Efficiency', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 6, 'Consistency', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 6, 'Leadership', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 6, 'Overall', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		# Table data - show top 15 performers
		self.pdf.set_font('helvetica', '', 7)
		sorted_authors = sorted(team_performance.items(), key=lambda x: x[1].get('overall_score', 0), reverse=True)
		
		for author, perf in sorted_authors[:15]:
			author_info = data.getAuthorInfo(author)
			
			commits = author_info.get('commits', 0)
			contrib_pct = perf.get('contribution_percentage', 0)
			efficiency = perf.get('efficiency_score', 0)
			consistency = perf.get('consistency', 0)
			leadership = perf.get('leadership_score', 0)
			overall = perf.get('overall_score', 0)
			
			# Truncate long author names
			display_author = author[:18] + "..." if len(author) > 21 else author
			
			self.pdf.cell(35, 5, display_author, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(20, 5, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(20, 5, f'{contrib_pct:.1f}%', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 5, f'{efficiency:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 5, f'{consistency:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 5, f'{leadership:.1f}', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 5, f'{overall:.1f}', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		self.pdf.ln(h=10)
		
		# Team Assessment Conclusion
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Team Assessment Conclusion', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.set_font('helvetica', '', 10)
		
		# Generate team insights
		top_contributor = contrib_ranking[0] if contrib_ranking else ("N/A", 0)
		most_efficient = efficiency_ranking[0] if efficiency_ranking else ("N/A", 0)
		
		self.pdf.cell(0, 6, f'- Top contributor: {top_contributor[0]} ({top_contributor[1]:.1f}% of commits)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self.pdf.cell(0, 6, f'- Most efficient developer: {most_efficient[0]} (score: {most_efficient[1]:.1f})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self.pdf.cell(0, 6, f'- Team size: {total_authors} active contributors', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Work distribution assessment
		if commit_contributions:
			gini_coefficient = self._calculate_gini_coefficient(commit_contributions)
			if gini_coefficient < 0.3:
				distribution_assessment = "Well-distributed (very balanced team)"
			elif gini_coefficient < 0.5:
				distribution_assessment = "Moderately distributed (some imbalance)"
			else:
				distribution_assessment = "Highly concentrated (few dominant contributors)"
			
			self.pdf.cell(0, 6, f'- Work distribution: {distribution_assessment}', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
	
	def _calculate_gini_coefficient(self, values):
		"""Calculate Gini coefficient for work distribution analysis."""
		if not values:
			return 0
		
		sorted_values = sorted(values)
		n = len(sorted_values)
		cumsum = sum(sorted_values)
		
		if cumsum == 0:
			return 0
		
		sum_of_differences = 0
		for i in range(n):
			for j in range(n):
				sum_of_differences += abs(sorted_values[i] - sorted_values[j])
		
		gini = sum_of_differences / (2 * n * cumsum)
		return gini
	
	def _create_files_page(self, data):
		"""Create the files statistics page with charts (mirrors files.html)."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 20)
		self.pdf.cell(0, 15, '5. Files Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Basic file stats
		total_files = data.getTotalFiles()
		total_loc = data.getTotalLOC()
		
		self.pdf.set_font('helvetica', '', 12)
		stats = [
			('Total files', str(total_files)),
			('Total lines', str(total_loc)),
		]
		
		try:
			avg_size = data.getAverageFileSize()
			stats.append(('Average file size', f"{avg_size:.2f} bytes"))
		except (AttributeError, ZeroDivisionError):
			# Fallback to old calculation if new method fails
			avg_size = float(data.getTotalSize()) / total_files if total_files else 0.0
			stats.append(('Average file size', f"{avg_size:.2f} bytes"))
		
		try:
			avg_revisions = data.getAverageRevisionsPerFile()
			stats.append(('Average revisions per file', f"{avg_revisions:.2f}"))
		except AttributeError:
			pass
		
		for label, value in stats:
			self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.ln(h=10)
		
		# File extensions
		if hasattr(data, 'extensions') and data.extensions:
			self.pdf.set_font('helvetica', 'B', 14)
			self.pdf.cell(0, 10, 'File Extensions', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			
			# Table header
			self.pdf.set_font('helvetica', 'B', 9)
			self.pdf.cell(25, 8, 'Extension', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(20, 8, 'Files', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(20, 8, '% Files', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 8, 'Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(20, 8, '% Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 8, 'Lines/File', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
			
			# Table data - show top extensions
			self.pdf.set_font('helvetica', '', 8)
			sorted_extensions = sorted(data.extensions.items(), 
									 key=lambda x: x[1]['files'], reverse=True)[:15]
			
			for ext, ext_data in sorted_extensions:
				files = ext_data['files']
				lines = ext_data['lines']
				loc_percentage = (100.0 * lines / total_loc) if total_loc else 0.0
				files_percentage = (100.0 * files / total_files) if total_files else 0.0
				lines_per_file = (lines // files) if files else 0
				
				display_ext = ext if ext else '(no ext)'
				
				self.pdf.cell(25, 6, display_ext[:12], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
				self.pdf.cell(20, 6, str(files), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(20, 6, f"{files_percentage:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(25, 6, str(lines), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(20, 6, f"{loc_percentage:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(25, 6, str(lines_per_file), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		self.pdf.ln(h=10)
		
		# SLOC Breakdown by Extension
		sloc_data = data.getSLOCByExtension()
		if sloc_data:
			self.pdf.set_font('helvetica', 'B', 14)
			self.pdf.cell(0, 10, 'Source Lines of Code (SLOC) Breakdown', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			
			# Table header
			self.pdf.set_font('helvetica', 'B', 8)
			self.pdf.cell(20, 8, 'Extension', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 8, 'Source Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 8, 'Comment Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 8, 'Blank Lines', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(20, 8, 'Total', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
			
			# Table data
			self.pdf.set_font('helvetica', '', 7)
			sorted_sloc = sorted(sloc_data.items(), 
								key=lambda x: x[1]['total'], reverse=True)[:15]
			
			for ext, sloc_info in sorted_sloc:
				if sloc_info['total'] == 0:
					continue
				
				display_ext = ext if ext else '(no ext)'
				source_pct = (100.0 * sloc_info['source'] / sloc_info['total']) if sloc_info['total'] else 0.0
				comment_pct = (100.0 * sloc_info['comments'] / sloc_info['total']) if sloc_info['total'] else 0.0
				blank_pct = (100.0 * sloc_info['blank'] / sloc_info['total']) if sloc_info['total'] else 0.0
				
				self.pdf.cell(20, 5, display_ext[:8], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
				self.pdf.cell(25, 5, f"{sloc_info['source']} ({source_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(25, 5, f"{sloc_info['comments']} ({comment_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(25, 5, f"{sloc_info['blank']} ({blank_pct:.1f}%)", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(20, 5, str(sloc_info['total']), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		self.pdf.ln(h=10)
		
		# Add new file statistics tables
		try:
			# Largest Files
			largest_files = data.getLargestFiles(10)
			if largest_files:
				self.pdf.set_font('helvetica', 'B', 14)
				self.pdf.cell(0, 10, 'Largest Files', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
				
				# Table header
				self.pdf.set_font('helvetica', 'B', 9)
				self.pdf.cell(80, 8, 'File', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(30, 8, 'Size (bytes)', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(30, 8, 'Size (KB)', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
				
				# Table data
				self.pdf.set_font('helvetica', '', 8)
				for filepath, size in largest_files:
					size_kb = size / 1024.0
					display_path = filepath[:40] + '...' if len(filepath) > 40 else filepath
					self.pdf.cell(80, 6, display_path, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
					self.pdf.cell(30, 6, str(size), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
					self.pdf.cell(30, 6, f"{size_kb:.1f}", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		except (AttributeError, TypeError):
			pass
		
		try:
			# Files with Most Revisions (Hotspots)
			hotspot_files = data.getFilesWithMostRevisions(10)
			if hotspot_files:
				self.pdf.ln(h=10)
				self.pdf.set_font('helvetica', 'B', 14)
				self.pdf.cell(0, 10, 'Files with Most Revisions (Hotspots)', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
				
				# Table header
				self.pdf.set_font('helvetica', 'B', 9)
				self.pdf.cell(80, 8, 'File', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(30, 8, 'Revisions', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(30, 8, '% of Commits', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
				
				# Table data
				self.pdf.set_font('helvetica', '', 8)
				total_commits = data.getTotalCommits()
				for filepath, revisions in hotspot_files:
					revision_pct = (100.0 * revisions / total_commits) if total_commits else 0.0
					display_path = filepath[:40] + '...' if len(filepath) > 40 else filepath
					self.pdf.cell(80, 6, display_path, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
					self.pdf.cell(30, 6, str(revisions), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
					self.pdf.cell(30, 6, f"{revision_pct:.2f}%", 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		except (AttributeError, TypeError):
			pass
		
		self.pdf.ln(h=10)
		
		# Files by date chart
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Files by Date', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self._add_chart_if_exists('files_by_date.png', 180, 100)
	
	def _create_lines_page(self, data):
		"""Create the lines of code statistics page with charts (mirrors lines.html)."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 20)
		self.pdf.cell(0, 15, '6. Lines of Code Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Basic line stats
		self.pdf.set_font('helvetica', '', 12)
		stats = [
			('Total lines', str(data.getTotalLOC())),
			('Lines added', str(data.total_lines_added)),
			('Lines removed', str(data.total_lines_removed)),
			('Net lines', str(data.total_lines_added - data.total_lines_removed)),
		]
		
		for label, value in stats:
			self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.ln(h=10)
		
		# Lines by year
		if hasattr(data, 'commits_by_year') and data.commits_by_year:
			self.pdf.set_font('helvetica', 'B', 14)
			self.pdf.cell(0, 10, 'Activity by Year', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			
			# Table header
			self.pdf.set_font('helvetica', 'B', 10)
			self.pdf.cell(25, 8, 'Year', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(30, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(30, 8, '% of Total', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(35, 8, 'Lines Added', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(35, 8, 'Lines Removed', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
			
			# Table data
			self.pdf.set_font('helvetica', '', 9)
			total_commits = data.getTotalCommits()
			
			for yy in sorted(data.commits_by_year.keys(), reverse=True):
				commits = data.commits_by_year.get(yy, 0)
				percent = (100.0 * commits / total_commits) if total_commits else 0.0
				lines_added = data.lines_added_by_year.get(yy, 0) if hasattr(data, 'lines_added_by_year') else 0
				lines_removed = data.lines_removed_by_year.get(yy, 0) if hasattr(data, 'lines_removed_by_year') else 0
				
				self.pdf.cell(25, 6, str(yy), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(30, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(30, 6, f"{percent:.1f}%", 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(35, 6, str(lines_added), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(35, 6, str(lines_removed), 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		self.pdf.ln(h=10)
		
		# Lines of code chart
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Lines of Code Over Time', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		self._add_chart_if_exists('lines_of_code.png', 180, 100)
	
	def _create_tags_page(self, data):
		"""Create the tags statistics page (mirrors tags.html)."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 20)
		self.pdf.cell(0, 15, '7. Tags Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.set_font('helvetica', '', 12)
		
		if not hasattr(data, 'tags') or not data.tags:
			self.pdf.cell(0, 10, 'No tags found in repository.', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			return
		
		# Basic tag stats
		total_tags = len(data.tags)
		avg_commits_per_tag = (1.0 * data.getTotalCommits() / total_tags) if total_tags else 0.0
		
		stats = [
			('Total tags', str(total_tags)),
			('Average commits per tag', f"{avg_commits_per_tag:.2f}"),
		]
		
		for label, value in stats:
			self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.ln(h=10)
		
		# Tags table
		if hasattr(data, 'tags') and data.tags:
			self.pdf.set_font('helvetica', 'B', 12)
			self.pdf.cell(0, 10, 'List of Tags', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			
			# Table header
			self.pdf.set_font('helvetica', 'B', 10)
			self.pdf.cell(40, 8, 'Tag', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(30, 8, 'Date', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(30, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(50, 8, 'Author', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
			
			# Table data
			self.pdf.set_font('helvetica', '', 9)
			tag_list = sorted(data.tags.items(), key=lambda x: x[1]['date'], reverse=True)
			
			for tag, tag_data in tag_list[:20]:  # Show top 20 tags
				self.pdf.cell(40, 6, tag[:20], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
				self.pdf.cell(30, 6, tag_data.get('date', 'N/A')[:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				self.pdf.cell(30, 6, str(tag_data.get('commits', 0)), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
				author = tag_data.get('author', 'N/A')[:25]
				self.pdf.cell(50, 6, author, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Tags table
		self.pdf.set_font('helvetica', 'B', 14)
		self.pdf.cell(0, 10, 'Recent Tags', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Table header
		self.pdf.set_font('helvetica', 'B', 10)
		self.pdf.cell(40, 8, 'Tag Name', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(30, 8, 'Date', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(80, 8, 'Top Authors', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		# Sort tags by date (most recent first)
		tags_sorted_by_date_desc = list(map(lambda el : el[1], 
										  reversed(sorted(map(lambda el : (el[1]['date'], el[0]), 
														  data.tags.items())))))
		
		# Show up to 20 most recent tags
		self.pdf.set_font('helvetica', '', 8)
		for tag in tags_sorted_by_date_desc[:20]:
			tag_info = data.tags[tag]
			
			# Get top authors for this tag
			if 'authors' in tag_info:
				authors = sorted(tag_info['authors'].items(), 
							   key=lambda x: x[1], reverse=True)[:3]
				author_list = ', '.join([f"{author}({commits})" for author, commits in authors])
			else:
				author_list = ''
			
			# Truncate long names
			display_tag = tag[:18] + "..." if len(tag) > 21 else tag
			display_authors = author_list[:35] + "..." if len(author_list) > 38 else author_list
			
			self.pdf.cell(40, 6, display_tag, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(30, 6, tag_info['date'][:10], 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 6, str(tag_info['commits']), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(80, 6, display_authors, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')

	def _create_branches_page(self, data):
		"""Create the branches statistics page (mirrors branches.html)."""
		self.pdf.add_page()
		self.pdf.set_font('helvetica', 'B', 20)
		self.pdf.cell(0, 15, '8. Branches Statistics', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.set_font('helvetica', '', 12)
		
		if not hasattr(data, 'branches') or not data.branches:
			self.pdf.cell(0, 10, 'No branches found in repository.', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			return
		
		# Basic branch stats
		total_branches = len(data.getBranches())
		unmerged_branches = data.getUnmergedBranches()
		total_unmerged = len(unmerged_branches)
		main_branch = data.main_branch if hasattr(data, 'main_branch') else 'N/A'
		
		stats = [
			('Total branches', str(total_branches)),
			('Unmerged branches', str(total_unmerged)),
			('Main branch', main_branch),
		]
		
		for label, value in stats:
			self.pdf.cell(50, 8, f"{label}:", 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(0, 8, str(value), 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		self.pdf.ln(h=10)
		
		# Branches summary table
		self.pdf.set_font('helvetica', 'B', 12)
		self.pdf.cell(0, 10, 'All Branches', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Table header
		self.pdf.set_font('helvetica', 'B', 9)
		self.pdf.cell(35, 8, 'Branch Name', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(20, 8, 'Status', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(20, 8, 'Commits', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 8, 'Lines Added', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(25, 8, 'Lines Removed', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(20, 8, 'Authors', 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
		self.pdf.cell(45, 8, 'First Author', 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
		
		# Table data - sort by commits descending
		self.pdf.set_font('helvetica', '', 8)
		branches_sorted = sorted(data.branches.items(), 
								key=lambda x: x[1].get('commits', 0), reverse=True)
		
		for branch_name, branch_data in branches_sorted:
			# Determine status
			status = 'Unmerged' if branch_name in [b for b in unmerged_branches] else 'Merged'
			
			# Get branch statistics
			commits = branch_data.get('commits', 0)
			lines_added = branch_data.get('lines_added', 0)
			lines_removed = branch_data.get('lines_removed', 0)
			authors_count = len(branch_data.get('authors', {}))
			
			# Get first/main author
			authors = branch_data.get('authors', {})
			if authors:
				first_author = max(authors.items(), key=lambda x: x[1])[0]
				first_author = first_author[:20] + "..." if len(first_author) > 23 else first_author
			else:
				first_author = 'N/A'
			
			# Truncate branch name if too long
			display_branch = branch_name[:18] + "..." if len(branch_name) > 21 else branch_name
			
			self.pdf.cell(35, 6, display_branch, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='L')
			self.pdf.cell(20, 6, status, 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(20, 6, str(commits), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 6, str(lines_added), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(25, 6, str(lines_removed), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(20, 6, str(authors_count), 1, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
			self.pdf.cell(45, 6, first_author, 1, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
		
		# Unmerged branches detail section
		if total_unmerged > 0:
			self.pdf.ln(h=10)
			self.pdf.set_font('helvetica', 'B', 14)
			self.pdf.cell(0, 10, f'Unmerged Branches Details ({total_unmerged})', 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
			
			self.pdf.set_font('helvetica', '', 10)
			for branch_name in unmerged_branches:
				if branch_name in data.branches:
					branch_data = data.branches[branch_name]
					
					self.pdf.set_font('helvetica', 'B', 10)
					self.pdf.cell(0, 8, f"Branch: {branch_name}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
					
					self.pdf.set_font('helvetica', '', 9)
					self.pdf.cell(20, 6, f"  Commits: {branch_data.get('commits', 0)}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
					self.pdf.cell(20, 6, f"  Lines: +{branch_data.get('lines_added', 0)} -{branch_data.get('lines_removed', 0)}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
					
					# Show authors
					authors = branch_data.get('authors', {})
					if authors:
						author_list = sorted(authors.items(), key=lambda x: x[1], reverse=True)
						author_str = ', '.join([f"{author}({commits})" for author, commits in author_list[:3]])
						if len(author_list) > 3:
							author_str += f" and {len(author_list) - 3} more"
						self.pdf.cell(20, 6, f"  Authors: {author_str}", 0, new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='L')
					
					self.pdf.ln(h=2)
		
		
def is_git_repository(path):
	"""Check if a directory is a valid git repository."""
	if not os.path.exists(path) or not os.path.isdir(path):
		return False
	git_dir = os.path.join(path, '.git')
	return os.path.exists(git_dir)

def discover_repositories(scan_path):
	"""Discover all git repositories in a directory.
	
	Returns a list of tuples: (repo_name, repo_path)
	where repo_name matches the regex pattern and repo_path is the full path.
	"""
	repositories = []
	if not os.path.exists(scan_path) or not os.path.isdir(scan_path):
		return repositories
	
	try:
		for item in os.listdir(scan_path):
			item_path = os.path.join(scan_path, item)
			if os.path.isdir(item_path) and is_git_repository(item_path):
				# Use directory name as repository name
				repo_name = item
				repositories.append((repo_name, item_path))
				if conf['verbose']:
					print(f'  Found repository: {repo_name} at {item_path}')
	except (PermissionError, OSError) as e:
		print(f'Warning: Could not scan directory {scan_path}: {e}')
	
	return repositories

def usage():
	print("""
Usage: gitstats [options] <gitpath..> <outputpath>
       gitstats [options] --multi-repo <scan-folder> <outputpath>

Options:
-c key=value     Override configuration value
--debug          Enable debug output
--verbose        Enable verbose output
--multi-repo     Scan folder for multiple repositories and generate reports for each
-h, --help       Show this help message

Note: GitStats always generates both HTML and PDF reports.

Examples:
  gitstats repo output                    # Generates both HTML and PDF reports
  gitstats --verbose repo output          # With verbose output
  gitstats --multi-repo /path/to/repos output  # Generate reports for all repos in folder
  gitstats --debug -c max_authors=50 repo output

With --multi-repo mode:
- Scans the specified folder for git repositories
- Creates a report for each repository in a subfolder named <reponame>_report
- Only processes directories that are valid git repositories

Default config values:
%s

Please see the manual page for more details.
""" % conf)


class GitStats:
	def run(self, args_orig):
		multi_repo_mode = False
		optlist, args = getopt.getopt(args_orig, 'hc:', ["help", "debug", "verbose", "multi-repo"])
		for o,v in optlist:
			if o == '-c':
				if '=' not in v:
					print(f'FATAL: Invalid configuration format. Use key=value: {v}')
					sys.exit(1)
				key, value = v.split('=', 1)
				if key not in conf:
					raise KeyError('no such key "%s" in config' % key)
				
				# Validate configuration values
				try:
					if isinstance(conf[key], int):
						new_value = int(value)
						if key in ['max_authors', 'max_domains'] and new_value < 1:
							print(f'FATAL: {key} must be a positive integer, got: {new_value}')
							sys.exit(1)
						conf[key] = new_value
					elif isinstance(conf[key], bool):
						conf[key] = value.lower() in ('true', '1', 'yes', 'on')
					else:
						conf[key] = value
				except ValueError as e:
					print(f'FATAL: Invalid value for {key}: {value} ({e})')
					sys.exit(1)
			elif o == '--debug':
				conf['debug'] = True
				conf['verbose'] = True  # Debug implies verbose
			elif o == '--verbose':
				conf['verbose'] = True
			elif o == '--multi-repo':
				multi_repo_mode = True
			elif o in ('-h', '--help'):
				usage()
				sys.exit()

		if multi_repo_mode:
			if len(args) != 2:
				print('FATAL: --multi-repo requires exactly two arguments: <scan-folder> <outputpath>')
				usage()
				sys.exit(1)
			
			scan_folder = os.path.abspath(args[0])
			outputpath = os.path.abspath(args[1])
			
			# Validate scan folder
			if not os.path.exists(scan_folder):
				print(f'FATAL: Scan folder does not exist: {scan_folder}')
				sys.exit(1)
			if not os.path.isdir(scan_folder):
				print(f'FATAL: Scan folder is not a directory: {scan_folder}')
				sys.exit(1)
			
			# Discover repositories
			print(f'Scanning folder for git repositories: {scan_folder}')
			repositories = discover_repositories(scan_folder)
			
			if not repositories:
				print(f'No git repositories found in: {scan_folder}')
				sys.exit(0)
			
			print(f'Found {len(repositories)} git repositories:')
			for repo_name, repo_path in repositories:
				print(f'  - {repo_name}')
			
			# Generate reports for each repository
			self.run_multi_repo(repositories, outputpath)
		else:
			# Original single/multiple repository mode
			if len(args) < 2:
				usage()
				sys.exit(0)
			
			self.run_single_mode(args)
	
	def run_multi_repo(self, repositories, base_outputpath):
		"""Generate reports for multiple repositories."""
		rundir = os.getcwd()
		
		# Validate and create base output directory
		try:
			os.makedirs(base_outputpath, exist_ok=True)
		except PermissionError:
			print(f'FATAL: Permission denied creating output directory: {base_outputpath}')
			sys.exit(1)
		except OSError as e:
			print(f'FATAL: Error creating output directory {base_outputpath}: {e}')
			sys.exit(1)
		
		if not os.path.isdir(base_outputpath):
			print('FATAL: Output path is not a directory or does not exist')
			sys.exit(1)
		
		# Check write permissions
		if not os.access(base_outputpath, os.W_OK):
			print(f'FATAL: No write permission for output directory: {base_outputpath}')
			sys.exit(1)

		if not getgnuplotversion():
			print('gnuplot not found')
			sys.exit(1)

		if conf['verbose']:
			print('Configuration:')
			for key, value in conf.items():
				print(f'  {key}: {value}')
			print()

		print(f'Base output path: {base_outputpath}')
		
		successful_reports = 0
		failed_reports = []
		
		for repo_name, repo_path in repositories:
			print(f'\n{"="*60}')
			print(f'Processing repository: {repo_name}')
			print(f'Repository path: {repo_path}')
			
			# Create repository-specific output directory with pattern: repositoryname_report
			repo_output_path = os.path.join(base_outputpath, f'{repo_name}_report')
			
			try:
				os.makedirs(repo_output_path, exist_ok=True)
				print(f'Report output path: {repo_output_path}')
				
				# Process this repository
				self.process_single_repository(repo_path, repo_output_path, rundir)
				successful_reports += 1
				print(f'✓ Successfully generated report for {repo_name}')
				
			except Exception as e:
				failed_reports.append((repo_name, str(e)))
				print(f'✗ Failed to generate report for {repo_name}: {e}')
				if conf['debug']:
					import traceback
					traceback.print_exc()
		
		# Summary
		print(f'\n{"="*60}')
		print(f'Multi-repository report generation complete!')
		print(f'Successfully processed: {successful_reports}/{len(repositories)} repositories')
		
		if failed_reports:
			print(f'\nFailed repositories:')
			for repo_name, error in failed_reports:
				print(f'  - {repo_name}: {error}')
		
		if successful_reports > 0:
			print(f'\nReports generated in: {base_outputpath}')
			print('Repository reports:')
			for repo_name, repo_path in repositories:
				if (repo_name, f'Error processing {repo_name}') not in failed_reports:
					report_path = os.path.join(base_outputpath, f'{repo_name}_report')
					print(f'  - {repo_name}: {report_path}/index.html')
	
	def run_single_mode(self, args):
		"""Original single/multiple repository mode."""
		outputpath = os.path.abspath(args[-1])
		rundir = os.getcwd()

		# Validate git paths
		git_paths = args[0:-1]
		for gitpath in git_paths:
			if not os.path.exists(gitpath):
				print(f'FATAL: Git repository path does not exist: {gitpath}')
				sys.exit(1)
			if not os.path.isdir(gitpath):
				print(f'FATAL: Git repository path is not a directory: {gitpath}')
				sys.exit(1)
			git_dir = os.path.join(gitpath, '.git')
			if not os.path.exists(git_dir):
				print(f'FATAL: Path is not a git repository (no .git directory found): {gitpath}')
				sys.exit(1)

		# Validate and create output directory
		try:
			os.makedirs(outputpath, exist_ok=True)
		except PermissionError:
			print(f'FATAL: Permission denied creating output directory: {outputpath}')
			sys.exit(1)
		except OSError as e:
			print(f'FATAL: Error creating output directory {outputpath}: {e}')
			sys.exit(1)
		
		if not os.path.isdir(outputpath):
			print('FATAL: Output path is not a directory or does not exist')
			sys.exit(1)
		
		# Check write permissions
		if not os.access(outputpath, os.W_OK):
			print(f'FATAL: No write permission for output directory: {outputpath}')
			sys.exit(1)

		if not getgnuplotversion():
			print('gnuplot not found')
			sys.exit(1)

		if conf['verbose']:
			print('Configuration:')
			for key, value in conf.items():
				print(f'  {key}: {value}')
			print()

		print('Output path: %s' % outputpath)
		cachefile = os.path.join(outputpath, 'gitstats.cache')

		data = GitDataCollector()
		data.loadCache(cachefile)

		for gitpath in git_paths:
			print('Git path: %s' % gitpath)

			prevdir = os.getcwd()
			os.chdir(gitpath)

			print('Collecting data...')
			data.collect(gitpath)

			os.chdir(prevdir)

		print('Refining data...')
		data.saveCache(cachefile)
		data.refine()

		os.chdir(rundir)

		print('Generating report...')
		
		# Always generate both HTML and PDF reports
		print('Creating HTML report...')
		html_report = HTMLReportCreator()
		html_report.create(data, outputpath)
		
		print('Creating PDF report...')
		pdf_report = PDFReportCreator()
		pdf_report.create(data, outputpath)

		time_end = time.time()
		exectime_internal = time_end - time_start
		external_percentage = (100.0 * exectime_external) / exectime_internal if exectime_internal > 0 else 0.0
		print('Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, external_percentage))
		
		if sys.stdin.isatty():
			print('You may now run:')
			print()
			print('   sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''"))
			pdf_filename = f"gitstats_{data.projectname.replace(' ', '_')}.pdf"
			print('   PDF report: \'%s\'' % os.path.join(outputpath, pdf_filename).replace("'", "'\\''"))
			print()
	
	def process_single_repository(self, repo_path, output_path, rundir):
		"""Process a single repository and generate its report."""
		cachefile = os.path.join(output_path, 'gitstats.cache')

		data = GitDataCollector()
		data.loadCache(cachefile)

		print(f'  Collecting data from: {repo_path}')
		
		prevdir = os.getcwd()
		os.chdir(repo_path)

		data.collect(repo_path)
		os.chdir(prevdir)

		print('  Refining data...')
		data.saveCache(cachefile)
		data.refine()

		os.chdir(rundir)

		print('  Generating report...')
		
		# Always generate both HTML and PDF reports
		print('  Creating HTML report...')
		html_report = HTMLReportCreator()
		html_report.create(data, output_path)
		
		print('  Creating PDF report...')
		pdf_report = PDFReportCreator()
		pdf_report.create(data, output_path)

		print(f'  Report generated in: {output_path}')

if __name__=='__main__':
	try:
		g = GitStats()
		g.run(sys.argv[1:])
	except KeyboardInterrupt:
		print('\nInterrupted by user')
		sys.exit(1)
	except KeyError as e:
		print(f'FATAL: Configuration error: {e}')
		sys.exit(1)
	except Exception as e:
		print(f'FATAL: Unexpected error: {e}')
		if conf.get('debug', False):
			import traceback
			traceback.print_exc()
		sys.exit(1)