Преглед на файлове

(Final Stage)

Upgrade to python 3
Make this a module (and installable)
Refactor single file into discrete files
Dan Rapp преди 7 години
родител
ревизия
b414bd5e23

+ 11
- 0
gitstats/__init__.py Целия файл

@@ -0,0 +1,11 @@
1
+from gitstats.gitstats import GitStats
2
+from gitstats._version import get_versions
3
+__version__ = get_versions()['version']
4
+del get_versions
5
+
6
+def main():
7
+    g = GitStats()
8
+    g.run()
9
+
10
+if __name__ == "__main__":
11
+    main()

+ 520
- 0
gitstats/_version.py Целия файл

@@ -0,0 +1,520 @@
1
+
2
+# This file helps to compute a version number in source trees obtained from
3
+# git-archive tarball (such as those provided by githubs download-from-tag
4
+# feature). Distribution tarballs (built by setup.py sdist) and build
5
+# directories (produced by setup.py build) will contain a much shorter file
6
+# that just contains the computed version number.
7
+
8
+# This file is released into the public domain. Generated by
9
+# versioneer-0.18 (https://github.com/warner/python-versioneer)
10
+
11
+"""Git implementation of _version.py."""
12
+
13
+import errno
14
+import os
15
+import re
16
+import subprocess
17
+import sys
18
+
19
+
20
+def get_keywords():
21
+    """Get the keywords needed to look up the version information."""
22
+    # these strings will be replaced by git during git-archive.
23
+    # setup.py/versioneer.py will grep for the variable names, so they must
24
+    # each be defined on a line of their own. _version.py will just call
25
+    # get_keywords().
26
+    git_refnames = "$Format:%d$"
27
+    git_full = "$Format:%H$"
28
+    git_date = "$Format:%ci$"
29
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
30
+    return keywords
31
+
32
+
33
+class VersioneerConfig:
34
+    """Container for Versioneer configuration parameters."""
35
+
36
+
37
+def get_config():
38
+    """Create, populate and return the VersioneerConfig() object."""
39
+    # these strings are filled in when 'setup.py versioneer' creates
40
+    # _version.py
41
+    cfg = VersioneerConfig()
42
+    cfg.VCS = "git"
43
+    cfg.style = "pep440"
44
+    cfg.tag_prefix = ""
45
+    cfg.parentdir_prefix = "gitstats-"
46
+    cfg.versionfile_source = "gitstats/_version.py"
47
+    cfg.verbose = False
48
+    return cfg
49
+
50
+
51
+class NotThisMethod(Exception):
52
+    """Exception raised if a method is not valid for the current scenario."""
53
+
54
+
55
+LONG_VERSION_PY = {}
56
+HANDLERS = {}
57
+
58
+
59
+def register_vcs_handler(vcs, method):  # decorator
60
+    """Decorator to mark a method as the handler for a particular VCS."""
61
+    def decorate(f):
62
+        """Store f in HANDLERS[vcs][method]."""
63
+        if vcs not in HANDLERS:
64
+            HANDLERS[vcs] = {}
65
+        HANDLERS[vcs][method] = f
66
+        return f
67
+    return decorate
68
+
69
+
70
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
71
+                env=None):
72
+    """Call the given command(s)."""
73
+    assert isinstance(commands, list)
74
+    p = None
75
+    for c in commands:
76
+        try:
77
+            dispcmd = str([c] + args)
78
+            # remember shell=False, so use git.cmd on windows, not just git
79
+            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
80
+                                 stdout=subprocess.PIPE,
81
+                                 stderr=(subprocess.PIPE if hide_stderr
82
+                                         else None))
83
+            break
84
+        except EnvironmentError:
85
+            e = sys.exc_info()[1]
86
+            if e.errno == errno.ENOENT:
87
+                continue
88
+            if verbose:
89
+                print("unable to run %s" % dispcmd)
90
+                print(e)
91
+            return None, None
92
+    else:
93
+        if verbose:
94
+            print("unable to find command, tried %s" % (commands,))
95
+        return None, None
96
+    stdout = p.communicate()[0].strip()
97
+    if sys.version_info[0] >= 3:
98
+        stdout = stdout.decode()
99
+    if p.returncode != 0:
100
+        if verbose:
101
+            print("unable to run %s (error)" % dispcmd)
102
+            print("stdout was %s" % stdout)
103
+        return None, p.returncode
104
+    return stdout, p.returncode
105
+
106
+
107
+def versions_from_parentdir(parentdir_prefix, root, verbose):
108
+    """Try to determine the version from the parent directory name.
109
+
110
+    Source tarballs conventionally unpack into a directory that includes both
111
+    the project name and a version string. We will also support searching up
112
+    two directory levels for an appropriately named parent directory
113
+    """
114
+    rootdirs = []
115
+
116
+    for i in range(3):
117
+        dirname = os.path.basename(root)
118
+        if dirname.startswith(parentdir_prefix):
119
+            return {"version": dirname[len(parentdir_prefix):],
120
+                    "full-revisionid": None,
121
+                    "dirty": False, "error": None, "date": None}
122
+        else:
123
+            rootdirs.append(root)
124
+            root = os.path.dirname(root)  # up a level
125
+
126
+    if verbose:
127
+        print("Tried directories %s but none started with prefix %s" %
128
+              (str(rootdirs), parentdir_prefix))
129
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
130
+
131
+
132
+@register_vcs_handler("git", "get_keywords")
133
+def git_get_keywords(versionfile_abs):
134
+    """Extract version information from the given file."""
135
+    # the code embedded in _version.py can just fetch the value of these
136
+    # keywords. When used from setup.py, we don't want to import _version.py,
137
+    # so we do it with a regexp instead. This function is not used from
138
+    # _version.py.
139
+    keywords = {}
140
+    try:
141
+        f = open(versionfile_abs, "r")
142
+        for line in f.readlines():
143
+            if line.strip().startswith("git_refnames ="):
144
+                mo = re.search(r'=\s*"(.*)"', line)
145
+                if mo:
146
+                    keywords["refnames"] = mo.group(1)
147
+            if line.strip().startswith("git_full ="):
148
+                mo = re.search(r'=\s*"(.*)"', line)
149
+                if mo:
150
+                    keywords["full"] = mo.group(1)
151
+            if line.strip().startswith("git_date ="):
152
+                mo = re.search(r'=\s*"(.*)"', line)
153
+                if mo:
154
+                    keywords["date"] = mo.group(1)
155
+        f.close()
156
+    except EnvironmentError:
157
+        pass
158
+    return keywords
159
+
160
+
161
+@register_vcs_handler("git", "keywords")
162
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
163
+    """Get version information from git keywords."""
164
+    if not keywords:
165
+        raise NotThisMethod("no keywords at all, weird")
166
+    date = keywords.get("date")
167
+    if date is not None:
168
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
169
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
170
+        # -like" string, which we must then edit to make compliant), because
171
+        # it's been around since git-1.5.3, and it's too difficult to
172
+        # discover which version we're using, or to work around using an
173
+        # older one.
174
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
175
+    refnames = keywords["refnames"].strip()
176
+    if refnames.startswith("$Format"):
177
+        if verbose:
178
+            print("keywords are unexpanded, not using")
179
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180
+    refs = set([r.strip() for r in refnames.strip("()").split(",")])
181
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
183
+    TAG = "tag: "
184
+    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
185
+    if not tags:
186
+        # Either we're using git < 1.8.3, or there really are no tags. We use
187
+        # a heuristic: assume all version tags have a digit. The old git %d
188
+        # expansion behaves like git log --decorate=short and strips out the
189
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
190
+        # between branches and tags. By ignoring refnames without digits, we
191
+        # filter out many common branch names like "release" and
192
+        # "stabilization", as well as "HEAD" and "master".
193
+        tags = set([r for r in refs if re.search(r'\d', r)])
194
+        if verbose:
195
+            print("discarding '%s', no digits" % ",".join(refs - tags))
196
+    if verbose:
197
+        print("likely tags: %s" % ",".join(sorted(tags)))
198
+    for ref in sorted(tags):
199
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
200
+        if ref.startswith(tag_prefix):
201
+            r = ref[len(tag_prefix):]
202
+            if verbose:
203
+                print("picking %s" % r)
204
+            return {"version": r,
205
+                    "full-revisionid": keywords["full"].strip(),
206
+                    "dirty": False, "error": None,
207
+                    "date": date}
208
+    # no suitable tags, so version is "0+unknown", but full hex is still there
209
+    if verbose:
210
+        print("no suitable tags, using unknown + full revision id")
211
+    return {"version": "0+unknown",
212
+            "full-revisionid": keywords["full"].strip(),
213
+            "dirty": False, "error": "no suitable tags", "date": None}
214
+
215
+
216
+@register_vcs_handler("git", "pieces_from_vcs")
217
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
218
+    """Get version from 'git describe' in the root of the source tree.
219
+
220
+    This only gets called if the git-archive 'subst' keywords were *not*
221
+    expanded, and _version.py hasn't already been rewritten with a short
222
+    version string, meaning we're inside a checked out source tree.
223
+    """
224
+    GITS = ["git"]
225
+    if sys.platform == "win32":
226
+        GITS = ["git.cmd", "git.exe"]
227
+
228
+    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
229
+                          hide_stderr=True)
230
+    if rc != 0:
231
+        if verbose:
232
+            print("Directory %s not under git control" % root)
233
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
234
+
235
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
236
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
237
+    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
238
+                                          "--always", "--long",
239
+                                          "--match", "%s*" % tag_prefix],
240
+                                   cwd=root)
241
+    # --long was added in git-1.5.5
242
+    if describe_out is None:
243
+        raise NotThisMethod("'git describe' failed")
244
+    describe_out = describe_out.strip()
245
+    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
246
+    if full_out is None:
247
+        raise NotThisMethod("'git rev-parse' failed")
248
+    full_out = full_out.strip()
249
+
250
+    pieces = {}
251
+    pieces["long"] = full_out
252
+    pieces["short"] = full_out[:7]  # maybe improved later
253
+    pieces["error"] = None
254
+
255
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
256
+    # TAG might have hyphens.
257
+    git_describe = describe_out
258
+
259
+    # look for -dirty suffix
260
+    dirty = git_describe.endswith("-dirty")
261
+    pieces["dirty"] = dirty
262
+    if dirty:
263
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
264
+
265
+    # now we have TAG-NUM-gHEX or HEX
266
+
267
+    if "-" in git_describe:
268
+        # TAG-NUM-gHEX
269
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
270
+        if not mo:
271
+            # unparseable. Maybe git-describe is misbehaving?
272
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
273
+                               % describe_out)
274
+            return pieces
275
+
276
+        # tag
277
+        full_tag = mo.group(1)
278
+        if not full_tag.startswith(tag_prefix):
279
+            if verbose:
280
+                fmt = "tag '%s' doesn't start with prefix '%s'"
281
+                print(fmt % (full_tag, tag_prefix))
282
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
283
+                               % (full_tag, tag_prefix))
284
+            return pieces
285
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
286
+
287
+        # distance: number of commits since tag
288
+        pieces["distance"] = int(mo.group(2))
289
+
290
+        # commit: short hex revision ID
291
+        pieces["short"] = mo.group(3)
292
+
293
+    else:
294
+        # HEX: no tags
295
+        pieces["closest-tag"] = None
296
+        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
297
+                                    cwd=root)
298
+        pieces["distance"] = int(count_out)  # total number of commits
299
+
300
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
301
+    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
302
+                       cwd=root)[0].strip()
303
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
304
+
305
+    return pieces
306
+
307
+
308
+def plus_or_dot(pieces):
309
+    """Return a + if we don't already have one, else return a ."""
310
+    if "+" in pieces.get("closest-tag", ""):
311
+        return "."
312
+    return "+"
313
+
314
+
315
+def render_pep440(pieces):
316
+    """Build up version string, with post-release "local version identifier".
317
+
318
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
319
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
320
+
321
+    Exceptions:
322
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
323
+    """
324
+    if pieces["closest-tag"]:
325
+        rendered = pieces["closest-tag"]
326
+        if pieces["distance"] or pieces["dirty"]:
327
+            rendered += plus_or_dot(pieces)
328
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
329
+            if pieces["dirty"]:
330
+                rendered += ".dirty"
331
+    else:
332
+        # exception #1
333
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
334
+                                          pieces["short"])
335
+        if pieces["dirty"]:
336
+            rendered += ".dirty"
337
+    return rendered
338
+
339
+
340
+def render_pep440_pre(pieces):
341
+    """TAG[.post.devDISTANCE] -- No -dirty.
342
+
343
+    Exceptions:
344
+    1: no tags. 0.post.devDISTANCE
345
+    """
346
+    if pieces["closest-tag"]:
347
+        rendered = pieces["closest-tag"]
348
+        if pieces["distance"]:
349
+            rendered += ".post.dev%d" % pieces["distance"]
350
+    else:
351
+        # exception #1
352
+        rendered = "0.post.dev%d" % pieces["distance"]
353
+    return rendered
354
+
355
+
356
+def render_pep440_post(pieces):
357
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
358
+
359
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
360
+    (a dirty tree will appear "older" than the corresponding clean one),
361
+    but you shouldn't be releasing software with -dirty anyways.
362
+
363
+    Exceptions:
364
+    1: no tags. 0.postDISTANCE[.dev0]
365
+    """
366
+    if pieces["closest-tag"]:
367
+        rendered = pieces["closest-tag"]
368
+        if pieces["distance"] or pieces["dirty"]:
369
+            rendered += ".post%d" % pieces["distance"]
370
+            if pieces["dirty"]:
371
+                rendered += ".dev0"
372
+            rendered += plus_or_dot(pieces)
373
+            rendered += "g%s" % pieces["short"]
374
+    else:
375
+        # exception #1
376
+        rendered = "0.post%d" % pieces["distance"]
377
+        if pieces["dirty"]:
378
+            rendered += ".dev0"
379
+        rendered += "+g%s" % pieces["short"]
380
+    return rendered
381
+
382
+
383
+def render_pep440_old(pieces):
384
+    """TAG[.postDISTANCE[.dev0]] .
385
+
386
+    The ".dev0" means dirty.
387
+
388
+    Eexceptions:
389
+    1: no tags. 0.postDISTANCE[.dev0]
390
+    """
391
+    if pieces["closest-tag"]:
392
+        rendered = pieces["closest-tag"]
393
+        if pieces["distance"] or pieces["dirty"]:
394
+            rendered += ".post%d" % pieces["distance"]
395
+            if pieces["dirty"]:
396
+                rendered += ".dev0"
397
+    else:
398
+        # exception #1
399
+        rendered = "0.post%d" % pieces["distance"]
400
+        if pieces["dirty"]:
401
+            rendered += ".dev0"
402
+    return rendered
403
+
404
+
405
+def render_git_describe(pieces):
406
+    """TAG[-DISTANCE-gHEX][-dirty].
407
+
408
+    Like 'git describe --tags --dirty --always'.
409
+
410
+    Exceptions:
411
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
412
+    """
413
+    if pieces["closest-tag"]:
414
+        rendered = pieces["closest-tag"]
415
+        if pieces["distance"]:
416
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
417
+    else:
418
+        # exception #1
419
+        rendered = pieces["short"]
420
+    if pieces["dirty"]:
421
+        rendered += "-dirty"
422
+    return rendered
423
+
424
+
425
+def render_git_describe_long(pieces):
426
+    """TAG-DISTANCE-gHEX[-dirty].
427
+
428
+    Like 'git describe --tags --dirty --always -long'.
429
+    The distance/hash is unconditional.
430
+
431
+    Exceptions:
432
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
433
+    """
434
+    if pieces["closest-tag"]:
435
+        rendered = pieces["closest-tag"]
436
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
437
+    else:
438
+        # exception #1
439
+        rendered = pieces["short"]
440
+    if pieces["dirty"]:
441
+        rendered += "-dirty"
442
+    return rendered
443
+
444
+
445
+def render(pieces, style):
446
+    """Render the given version pieces into the requested style."""
447
+    if pieces["error"]:
448
+        return {"version": "unknown",
449
+                "full-revisionid": pieces.get("long"),
450
+                "dirty": None,
451
+                "error": pieces["error"],
452
+                "date": None}
453
+
454
+    if not style or style == "default":
455
+        style = "pep440"  # the default
456
+
457
+    if style == "pep440":
458
+        rendered = render_pep440(pieces)
459
+    elif style == "pep440-pre":
460
+        rendered = render_pep440_pre(pieces)
461
+    elif style == "pep440-post":
462
+        rendered = render_pep440_post(pieces)
463
+    elif style == "pep440-old":
464
+        rendered = render_pep440_old(pieces)
465
+    elif style == "git-describe":
466
+        rendered = render_git_describe(pieces)
467
+    elif style == "git-describe-long":
468
+        rendered = render_git_describe_long(pieces)
469
+    else:
470
+        raise ValueError("unknown style '%s'" % style)
471
+
472
+    return {"version": rendered, "full-revisionid": pieces["long"],
473
+            "dirty": pieces["dirty"], "error": None,
474
+            "date": pieces.get("date")}
475
+
476
+
477
+def get_versions():
478
+    """Get version information or return default if unable to do so."""
479
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
480
+    # __file__, we can work backwards from there to the root. Some
481
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
482
+    # case we can only use expanded keywords.
483
+
484
+    cfg = get_config()
485
+    verbose = cfg.verbose
486
+
487
+    try:
488
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
489
+                                          verbose)
490
+    except NotThisMethod:
491
+        pass
492
+
493
+    try:
494
+        root = os.path.realpath(__file__)
495
+        # versionfile_source is the relative path from the top of the source
496
+        # tree (where the .git directory might live) to this file. Invert
497
+        # this to find the root from __file__.
498
+        for i in cfg.versionfile_source.split('/'):
499
+            root = os.path.dirname(root)
500
+    except NameError:
501
+        return {"version": "0+unknown", "full-revisionid": None,
502
+                "dirty": None,
503
+                "error": "unable to find root of source tree",
504
+                "date": None}
505
+
506
+    try:
507
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
508
+        return render(pieces, cfg.style)
509
+    except NotThisMethod:
510
+        pass
511
+
512
+    try:
513
+        if cfg.parentdir_prefix:
514
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
515
+    except NotThisMethod:
516
+        pass
517
+
518
+    return {"version": "0+unknown", "full-revisionid": None,
519
+            "dirty": None,
520
+            "error": "unable to compute version", "date": None}

+ 157
- 0
gitstats/datacollector.py Целия файл

@@ -0,0 +1,157 @@
1
+import datetime
2
+import os
3
+import pickle
4
+import time
5
+import zlib
6
+
7
+
8
+class DataCollector:
9
+    """Manages data collection from a revision control repository."""
10
+
11
+    def __init__(self, conf):
12
+        self.conf = conf
13
+        self.stamp_created = time.time()
14
+        self.cache = {}
15
+        self.total_authors = 0
16
+        self.activity_by_hour_of_day = {}  # hour -> commits
17
+        self.activity_by_day_of_week = {}  # day -> commits
18
+        self.activity_by_month_of_year = {}  # month [1-12] -> commits
19
+        self.activity_by_hour_of_week = {}  # weekday -> hour -> commits
20
+        self.activity_by_hour_of_day_busiest = 0
21
+        self.activity_by_hour_of_week_busiest = 0
22
+        self.activity_by_year_week = {}  # yy_wNN -> commits
23
+        self.activity_by_year_week_peak = 0
24
+
25
+        self.authors = {}  # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
26
+
27
+        self.total_commits = 0
28
+        self.total_files = 0
29
+        self.authors_by_commits = 0
30
+
31
+        # domains
32
+        self.domains = {}  # domain -> commits
33
+
34
+        # author of the month
35
+        self.author_of_month = {}  # month -> author -> commits
36
+        self.author_of_year = {}  # year -> author -> commits
37
+        self.commits_by_month = {}  # month -> commits
38
+        self.commits_by_year = {}  # year -> commits
39
+        self.lines_added_by_month = {}  # month -> lines added
40
+        self.lines_added_by_year = {}  # year -> lines added
41
+        self.lines_removed_by_month = {}  # month -> lines removed
42
+        self.lines_removed_by_year = {}  # year -> lines removed
43
+        self.first_commit_stamp = 0
44
+        self.last_commit_stamp = 0
45
+        self.last_active_day = None
46
+        self.active_days = set()
47
+
48
+        # lines
49
+        self.total_lines = 0
50
+        self.total_lines_added = 0
51
+        self.total_lines_removed = 0
52
+
53
+        # size
54
+        self.total_size = 0
55
+
56
+        # timezone
57
+        self.commits_by_timezone = {}  # timezone -> commits
58
+
59
+        # tags
60
+        self.tags = {}
61
+
62
+        self.files_by_stamp = {}  # stamp -> files
63
+
64
+        # extensions
65
+        self.extensions = {}  # extension -> files, lines
66
+
67
+        # line statistics
68
+        self.changes_by_date = {}  # stamp -> { files, ins, del }
69
+
70
+    ##
71
+    # This should be the main function to extract data from the repository.
72
+    def collect(self, dir):
73
+        self.dir = dir
74
+        if len(self.conf['project_name']) == 0:
75
+            self.projectname = os.path.basename(os.path.abspath(dir))
76
+        else:
77
+            self.projectname = self.conf['project_name']
78
+
79
+    ##
80
+    # Load cacheable data
81
+    def loadCache(self, cachefile):
82
+        if not os.path.exists(cachefile):
83
+            return
84
+        print('Loading cache...')
85
+        f = open(cachefile, 'rb')
86
+        try:
87
+            self.cache = pickle.loads(zlib.decompress(f.read()))
88
+        except:
89
+            # temporary hack to upgrade non-compressed caches
90
+            f.seek(0)
91
+            self.cache = pickle.load(f)
92
+        f.close()
93
+
94
+    ##
95
+    # Produce any additional statistics from the extracted data.
96
+    def refine(self):
97
+        pass
98
+
99
+    ##
100
+    # : get a dictionary of author
101
+    def getAuthorInfo(self, author):
102
+        return None
103
+
104
+    def getActivityByDayOfWeek(self):
105
+        return {}
106
+
107
+    def getActivityByHourOfDay(self):
108
+        return {}
109
+
110
+    # : get a dictionary of domains
111
+    def getDomainInfo(self, domain):
112
+        return None
113
+
114
+    ##
115
+    # Get a list of authors
116
+    def getAuthors(self):
117
+        return []
118
+
119
+    def getFirstCommitDate(self):
120
+        return datetime.datetime.now()
121
+
122
+    def getLastCommitDate(self):
123
+        return datetime.datetime.now()
124
+
125
+    def getStampCreated(self):
126
+        return self.stamp_created
127
+
128
+    def getTags(self):
129
+        return []
130
+
131
+    def getTotalAuthors(self):
132
+        return -1
133
+
134
+    def getTotalCommits(self):
135
+        return -1
136
+
137
+    def getTotalFiles(self):
138
+        return -1
139
+
140
+    def getTotalLOC(self):
141
+        return -1
142
+
143
+    ##
144
+    # Save cacheable data
145
+    def saveCache(self, cachefile):
146
+        print('Saving cache...')
147
+        tempfile = cachefile + '.tmp'
148
+        f = open(tempfile, 'wb')
149
+        # pickle.dump(self.cache, f)
150
+        data = zlib.compress(pickle.dumps(self.cache))
151
+        f.write(data)
152
+        f.close()
153
+        try:
154
+            os.remove(cachefile)
155
+        except OSError:
156
+            pass
157
+        os.rename(tempfile, cachefile)

+ 452
- 0
gitstats/gitdatacollector.py Целия файл

@@ -0,0 +1,452 @@
1
+import datetime
2
+import re
3
+import os
4
+
5
+from multiprocessing import Pool
6
+
7
+from .datacollector import DataCollector
8
+from .miscfuncs import getcommitrange, getkeyssortedbyvaluekey, getlogrange, getnumoffilesfromrev, getnumoflinesinblob, \
9
+    getpipeoutput, getstatsummarycounts
10
+
11
+
12
+class GitDataCollector(DataCollector):
13
+    def __init__(self, conf):
14
+        super(GitDataCollector, self).__init__(conf)
15
+        
16
+    def collect(self, directory):
17
+        super(GitDataCollector, self).collect(directory)
18
+
19
+        self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(self.conf), 'wc -l']))
20
+        # self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
21
+
22
+        # tags
23
+        lines = getpipeoutput(['git show-ref --tags']).split('\n')
24
+        for line in lines:
25
+            if len(line) == 0:
26
+                continue
27
+            (line_hash, tag) = line.split(' ')
28
+
29
+            tag = tag.replace('refs/tags/', '')
30
+            output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % line_hash])
31
+            if len(output) > 0:
32
+                parts = output.split(' ')
33
+                try:
34
+                    stamp = int(parts[0])
35
+                except ValueError:
36
+                    stamp = 0
37
+                self.tags[tag] = {'stamp': stamp, 'hash': line_hash,
38
+                                  'date': datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0,
39
+                                  'authors': {}}
40
+
41
+        # collect info on tags, starting from latest
42
+        tags_sorted_by_date_asc = [tup[1] for tup in sorted([(el[1]['date'], el[0]) for el in self.tags.items()])]
43
+        # tags_sorted_by_date_desc = map(lambda el: el[1],
44
+        #                                reversed(sorted(map(lambda el: (el[1]['date'], el[0]), self.tags.items()))))
45
+        prev = None
46
+#        for tag in reversed(tags_sorted_by_date_desc):
47
+        for tag in tags_sorted_by_date_asc:
48
+            cmd = 'git shortlog -s "%s"' % tag
49
+            if prev is not None:
50
+                cmd += ' "^%s"' % prev
51
+            output = getpipeoutput([cmd])
52
+            if len(output) == 0:
53
+                continue
54
+            prev = tag
55
+            for line in output.split('\n'):
56
+                parts = re.split('\s+', line, 2)
57
+                commits = int(parts[1])
58
+                author = parts[2]
59
+                self.tags[tag]['commits'] += commits
60
+                self.tags[tag]['authors'][author] = commits
61
+
62
+        # Collect revision statistics
63
+        # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
64
+        lines = getpipeoutput(
65
+            ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange(self.conf, 'HEAD'), 'grep -v ^commit']).split(
66
+            '\n')
67
+        for line in lines:
68
+            parts = line.split(' ', 4)
69
+            try:
70
+                stamp = int(parts[0])
71
+            except ValueError:
72
+                stamp = 0
73
+            timezone = parts[3]
74
+            author, mail = parts[4].split('<', 1)
75
+            author = author.rstrip()
76
+            mail = mail.rstrip('>')
77
+            domain = '?'
78
+            if mail.find('@') != -1:
79
+                domain = mail.rsplit('@', 1)[1]
80
+            date = datetime.datetime.fromtimestamp(float(stamp))
81
+
82
+            # First and last commit stamp (may be in any order because of cherry-picking and patches)
83
+            if stamp > self.last_commit_stamp:
84
+                self.last_commit_stamp = stamp
85
+            if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
86
+                self.first_commit_stamp = stamp
87
+
88
+            # activity
89
+            # hour
90
+            hour = date.hour
91
+            self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
92
+            # most active hour?
93
+            if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
94
+                self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
95
+
96
+            # day of week
97
+            day = date.weekday()
98
+            self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1
99
+
100
+            # domain stats
101
+            if domain not in self.domains:
102
+                self.domains[domain] = {}
103
+            # commits
104
+            self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1
105
+
106
+            # hour of week
107
+            if day not in self.activity_by_hour_of_week:
108
+                self.activity_by_hour_of_week[day] = {}
109
+            self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
110
+            # most active hour?
111
+            if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
112
+                self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
113
+
114
+            # month of year
115
+            month = date.month
116
+            self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1
117
+
118
+            # yearly/weekly activity
119
+            yyw = date.strftime('%Y-%W')
120
+            self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
121
+            if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
122
+                self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
123
+
124
+            # author stats
125
+            if author not in self.authors:
126
+                self.authors[author] = {}
127
+            # commits, note again that commits may be in any date order because of cherry-picking and patches
128
+            if 'last_commit_stamp' not in self.authors[author]:
129
+                self.authors[author]['last_commit_stamp'] = stamp
130
+            if stamp > self.authors[author]['last_commit_stamp']:
131
+                self.authors[author]['last_commit_stamp'] = stamp
132
+            if 'first_commit_stamp' not in self.authors[author]:
133
+                self.authors[author]['first_commit_stamp'] = stamp
134
+            if stamp < self.authors[author]['first_commit_stamp']:
135
+                self.authors[author]['first_commit_stamp'] = stamp
136
+
137
+            # author of the month/year
138
+            yymm = date.strftime('%Y-%m')
139
+            if yymm in self.author_of_month:
140
+                self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
141
+            else:
142
+                self.author_of_month[yymm] = {}
143
+                self.author_of_month[yymm][author] = 1
144
+            self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1
145
+
146
+            yy = date.year
147
+            if yy in self.author_of_year:
148
+                self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
149
+            else:
150
+                self.author_of_year[yy] = {}
151
+                self.author_of_year[yy][author] = 1
152
+            self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1
153
+
154
+            # authors: active days
155
+            yymmdd = date.strftime('%Y-%m-%d')
156
+            if 'last_active_day' not in self.authors[author]:
157
+                self.authors[author]['last_active_day'] = yymmdd
158
+                self.authors[author]['active_days'] = {yymmdd}
159
+            elif yymmdd != self.authors[author]['last_active_day']:
160
+                self.authors[author]['last_active_day'] = yymmdd
161
+                self.authors[author]['active_days'].add(yymmdd)
162
+
163
+            # project: active days
164
+            if yymmdd != self.last_active_day:
165
+                self.last_active_day = yymmdd
166
+                self.active_days.add(yymmdd)
167
+
168
+            # timezone
169
+            self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
170
+
171
+        # outputs "<stamp> <files>" for each revision
172
+        revlines = getpipeoutput(
173
+            ['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange(self.conf, 'HEAD'), 'grep -v ^commit']).strip().split('\n')
174
+        lines = []
175
+        revs_to_read = []
176
+        # Look up rev in cache and take info from cache if found
177
+        # If not append rev to list of rev to read from repo
178
+        for revline in revlines:
179
+            time, rev = revline.split(' ')
180
+            # if cache empty then add time and rev to list of new rev's
181
+            # otherwise try to read needed info from cache
182
+            if 'files_in_tree' not in self.cache.keys():
183
+                revs_to_read.append((time, rev))
184
+                continue
185
+            if rev in self.cache['files_in_tree'].keys():
186
+                lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
187
+            else:
188
+                revs_to_read.append((time, rev))
189
+
190
+        # Read revisions from repo
191
+        pool = Pool(processes=self.conf['processes'])
192
+        time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
193
+        pool.terminate()
194
+        pool.join()
195
+
196
+        # Update cache with new revisions and append then to general list
197
+        for (time, rev, count) in time_rev_count:
198
+            if 'files_in_tree' not in self.cache:
199
+                self.cache['files_in_tree'] = {}
200
+            self.cache['files_in_tree'][rev] = count
201
+            lines.append('%d %d' % (int(time), count))
202
+
203
+        self.total_commits += len(lines)
204
+        for line in lines:
205
+            parts = line.split(' ')
206
+            if len(parts) != 2:
207
+                continue
208
+            (stamp, files) = parts[0:2]
209
+            try:
210
+                self.files_by_stamp[int(stamp)] = int(files)
211
+            except ValueError:
212
+                print(f'Warning: failed to parse line "{line}"')
213
+
214
+        # extensions and size of files
215
+        lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange(self.conf, 'HEAD', end_only=True)]).split('\000')
216
+        blobs_to_read = []
217
+        for line in lines:
218
+            if len(line) == 0:
219
+                continue
220
+            parts = re.split('\s+', line, 4)
221
+            if parts[0] == '160000' and parts[3] == '-':
222
+                # skip submodules
223
+                continue
224
+            blob_id = parts[2]
225
+            size = int(parts[3])
226
+            fullpath = parts[4]
227
+
228
+            self.total_size += size
229
+            self.total_files += 1
230
+
231
+            _, ext = os.path.splitext(fullpath)
232
+            if len(ext) > self.conf['max_ext_length']:
233
+                ext = ''
234
+            if ext not in self.extensions:
235
+                self.extensions[ext] = {'files': 0, 'lines': 0}
236
+            self.extensions[ext]['files'] += 1
237
+            # if cache empty then add ext and blob id to list of new blob's
238
+            # otherwise try to read needed info from cache
239
+            if 'lines_in_blob' not in self.cache.keys():
240
+                blobs_to_read.append((ext, blob_id))
241
+                continue
242
+            if blob_id in self.cache['lines_in_blob'].keys():
243
+                self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
244
+            else:
245
+                blobs_to_read.append((ext, blob_id))
246
+
247
+        # Get info abount line count for new blob's that wasn't found in cache
248
+        pool = Pool(processes=self.conf['processes'])
249
+        ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
250
+        pool.terminate()
251
+        pool.join()
252
+
253
+        # Update cache and write down info about number of number of lines
254
+        for (ext, blob_id, linecount) in ext_blob_linecount:
255
+            if 'lines_in_blob' not in self.cache:
256
+                self.cache['lines_in_blob'] = {}
257
+            self.cache['lines_in_blob'][blob_id] = linecount
258
+            self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
259
+
260
+        # line statistics
261
+        # outputs:
262
+        #  N files changed, N insertions (+), N deletions(-)
263
+        # <stamp> <author>
264
+        self.changes_by_date = {}  # stamp -> { files, ins, del }
265
+        # computation of lines of code by date is better done
266
+        # on a linear history.
267
+        extra = ''
268
+        if self.conf['linear_linestats']:
269
+            extra = '--first-parent -m'
270
+        lines = getpipeoutput(
271
+            ['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange(self.conf, 'HEAD'))]).split('\n')
272
+        lines.reverse()
273
+        files = 0
274
+        inserted = 0
275
+        deleted = 0
276
+        total_lines = 0
277
+        for line in lines:
278
+            if len(line) == 0:
279
+                continue
280
+
281
+            # <stamp> <author>
282
+            if re.search('files? changed', line) is None:
283
+                pos = line.find(' ')
284
+                if pos != -1:
285
+                    try:
286
+                        (stamp, author) = (int(line[:pos]), line[pos + 1:])
287
+                        self.changes_by_date[stamp] = {'files': files, 'ins': inserted, 'del': deleted,
288
+                                                       'lines': total_lines}
289
+
290
+                        date = datetime.datetime.fromtimestamp(stamp)
291
+                        yymm = date.strftime('%Y-%m')
292
+                        self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
293
+                        self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
294
+
295
+                        yy = date.year
296
+                        self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy, 0) + inserted
297
+                        self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
298
+
299
+                        files, inserted, deleted = 0, 0, 0
300
+                    except ValueError:
301
+                        print(f'Warning: unexpected line "{line}')
302
+                else:
303
+                    print(f'Warning: unexpected line "{line}')
304
+            else:
305
+                numbers = getstatsummarycounts(line)
306
+
307
+                if len(numbers) == 3:
308
+                    (files, inserted, deleted) = map(lambda el: int(el), numbers)
309
+                    total_lines += inserted
310
+                    total_lines -= deleted
311
+                    self.total_lines_added += inserted
312
+                    self.total_lines_removed += deleted
313
+
314
+                else:
315
+                    print(f'Warning: failed to handle line "{line}"')
316
+                    (files, inserted, deleted) = (0, 0, 0)
317
+            # self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
318
+        self.total_lines += total_lines
319
+
320
+        # Per-author statistics
321
+
322
+        # defined for stamp, author only if author commited at this timestamp.
323
+        self.changes_by_date_by_author = {}  # stamp -> author -> lines_added
324
+
325
+        # Similar to the above, but never use --first-parent
326
+        # (we need to walk through every commit to know who
327
+        # committed what, not just through mainline)
328
+        lines = getpipeoutput(
329
+            ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange(self.conf, 'HEAD'))]).split('\n')
330
+        lines.reverse()
331
+        inserted = 0
332
+        deleted = 0
333
+        stamp = 0
334
+        for line in lines:
335
+            if len(line) == 0:
336
+                continue
337
+
338
+            # <stamp> <author>
339
+            if re.search('files? changed', line) is None:
340
+                pos = line.find(' ')
341
+                if pos != -1:
342
+                    try:
343
+                        oldstamp = stamp
344
+                        (stamp, author) = (int(line[:pos]), line[pos + 1:])
345
+                        if oldstamp > stamp:
346
+                            # clock skew, keep old timestamp to avoid having ugly graph
347
+                            stamp = oldstamp
348
+                        if author not in self.authors:
349
+                            self.authors[author] = {'lines_added': 0, 'lines_removed': 0, 'commits': 0}
350
+                        self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
351
+                        self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
352
+                        self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
353
+                        if stamp not in self.changes_by_date_by_author:
354
+                            self.changes_by_date_by_author[stamp] = {}
355
+                        if author not in self.changes_by_date_by_author[stamp]:
356
+                            self.changes_by_date_by_author[stamp][author] = {}
357
+                        self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author][
358
+                            'lines_added']
359
+                        self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
360
+                        files, inserted, deleted = 0, 0, 0
361
+                    except ValueError:
362
+                        print(f'Warning: unexpected line "{line}')
363
+                else:
364
+                    print(f'Warning: unexpected line "{line}')
365
+            else:
366
+                numbers = getstatsummarycounts(line)
367
+
368
+                if len(numbers) == 3:
369
+                    (files, inserted, deleted) = map(lambda el: int(el), numbers)
370
+                else:
371
+                    print(f'Warning: failed to handle line "{line}"')
372
+                    (files, inserted, deleted) = (0, 0, 0)
373
+
374
+    def refine(self):
375
+        # authors
376
+        # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
377
+        self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
378
+        self.authors_by_commits.reverse()  # most first
379
+        for i, name in enumerate(self.authors_by_commits):
380
+            self.authors[name]['place_by_commits'] = i + 1
381
+
382
+        for name in self.authors.keys():
383
+            a = self.authors[name]
384
+            a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
385
+            date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
386
+            date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
387
+            delta = date_last - date_first
388
+            a['date_first'] = date_first.strftime('%Y-%m-%d')
389
+            a['date_last'] = date_last.strftime('%Y-%m-%d')
390
+            a['timedelta'] = delta
391
+            if 'lines_added' not in a:
392
+                a['lines_added'] = 0
393
+            if 'lines_removed' not in a:
394
+                a['lines_removed'] = 0
395
+
396
+    def getActiveDays(self):
397
+        return self.active_days
398
+
399
+    def getActivityByDayOfWeek(self):
400
+        return self.activity_by_day_of_week
401
+
402
+    def getActivityByHourOfDay(self):
403
+        return self.activity_by_hour_of_day
404
+
405
+    def getAuthorInfo(self, author):
406
+        return self.authors[author]
407
+
408
+    def getAuthors(self, limit=None):
409
+        res = getkeyssortedbyvaluekey(self.authors, 'commits')
410
+        res.reverse()
411
+        return res[:limit]
412
+
413
+    def getCommitDeltaDays(self):
414
+        return (self.last_commit_stamp / 86400 - self.first_commit_stamp / 86400) + 1
415
+
416
+    def getDomainInfo(self, domain):
417
+        return self.domains[domain]
418
+
419
+    def getDomains(self):
420
+        return self.domains.keys()
421
+
422
+    def getFirstCommitDate(self):
423
+        return datetime.datetime.fromtimestamp(self.first_commit_stamp)
424
+
425
+    def getLastCommitDate(self):
426
+        return datetime.datetime.fromtimestamp(self.last_commit_stamp)
427
+
428
+    def getTags(self):
429
+        lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
430
+        return lines.split('\n')
431
+
432
+    def getTagDate(self, tag):
433
+        return self.revToDate('tags/' + tag)
434
+
435
+    def getTotalAuthors(self):
436
+        return self.total_authors
437
+
438
+    def getTotalCommits(self):
439
+        return self.total_commits
440
+
441
+    def getTotalFiles(self):
442
+        return self.total_files
443
+
444
+    def getTotalLOC(self):
445
+        return self.total_lines
446
+
447
+    def getTotalSize(self):
448
+        return self.total_size
449
+
450
+    def revToDate(self, rev):
451
+        stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
452
+        return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')

+ 116
- 0
gitstats/gitstats.py Целия файл

@@ -0,0 +1,116 @@
1
+#!/usr/bin/python
2
+# Copyright (c) 2007-2014 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/AUTHOR)
3
+# GPLv2 / GPLv3
4
+import argparse
5
+import os
6
+import sys
7
+import time
8
+
9
+from .gitdatacollector import GitDataCollector
10
+from .htmlreportcreator import HTMLReportCreator
11
+from .miscfuncs import getgnuplotversion
12
+
13
+exectime_internal = 0.0
14
+exectime_external = 0.0
15
+
16
+conf = {
17
+    'max_domains': 10,
18
+    'max_ext_length': 10,
19
+    'style': 'gitstats.css',
20
+    'max_authors': 20,
21
+    'authors_top': 5,
22
+    'commit_begin': '',
23
+    'commit_end': 'HEAD',
24
+    'linear_linestats': 1,
25
+    'project_name': '',
26
+    'processes': 8,
27
+    'start_date': ''
28
+}
29
+
30
+class GitStats:
31
+    def _usage(self):
32
+        print(f"""
33
+    Usage: gitstats [options] <gitpath..> <outputpath>
34
+
35
+    Options:
36
+    -c key=value     Override configuration value
37
+
38
+    Default config values:
39
+    {conf}
40
+
41
+    Please see the manual page for more details.
42
+    """)
43
+
44
+    def run(self):
45
+        if len(sys.argv) < 2:
46
+            self._usage()
47
+            sys.exit(0)
48
+
49
+        parser = argparse.ArgumentParser(description='GitStats')
50
+#        parser.add_argument('-c', '--config', dest='config')
51
+
52
+        (args, remaining_args) = parser.parse_known_args()
53
+#        if args.config:
54
+#            self.conf.load(args.config)
55
+
56
+        time_start = time.time()
57
+
58
+        outputpath = remaining_args[-1]
59
+        paths = remaining_args[0:-1]
60
+        outputpath = os.path.abspath(outputpath)
61
+
62
+        rundir = os.getcwd()
63
+
64
+        try:
65
+            os.makedirs(outputpath)
66
+        except OSError:
67
+            pass
68
+        if not os.path.isdir(outputpath):
69
+            print('FATAL: Output path is not a directory or does not exist')
70
+            sys.exit(1)
71
+
72
+        if not getgnuplotversion():
73
+            print('gnuplot not found')
74
+            sys.exit(1)
75
+
76
+        print(f'Output path: {outputpath}')
77
+        cachefile = os.path.join(outputpath, 'gitstats.cache')
78
+
79
+        data = GitDataCollector(conf)
80
+        data.loadCache(cachefile)
81
+
82
+        for gitpath in paths:
83
+            print(f'Git path: {gitpath}')
84
+
85
+            prevdir = os.getcwd()
86
+            os.chdir(gitpath)
87
+
88
+            print('Collecting data...')
89
+            data.collect(gitpath)
90
+
91
+            os.chdir(prevdir)
92
+
93
+        print('Refining data...')
94
+        data.saveCache(cachefile)
95
+        data.refine()
96
+
97
+        os.chdir(rundir)
98
+
99
+        print('Generating report...')
100
+        report = HTMLReportCreator(conf)
101
+        report.create(data, outputpath)
102
+
103
+        time_end = time.time()
104
+        calculated_exectime_internal = time_end - time_start
105
+        print(
106
+            f'Execution time {calculated_exectime_internal} secs, {exectime_external} secs ({(100.0 * exectime_external) / calculated_exectime_internal}%) in external commands)')
107
+        if sys.stdin.isatty():
108
+            print('You may now run:')
109
+            print()
110
+            print('   sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''"))
111
+            print()
112
+
113
+
114
+if __name__ == '__main__':
115
+    g = GitStats()
116
+    g.run()

+ 733
- 0
gitstats/htmlreportcreator.py Целия файл

@@ -0,0 +1,733 @@
1
+import datetime
2
+import glob
3
+import pkg_resources
4
+import os
5
+import shutil
6
+import time
7
+
8
+from .reportcreator import ReportCreator
9
+from .miscfuncs import getgitversion, getgnuplotversion, getkeyssortedbyvaluekey, getkeyssortedbyvalues, getpipeoutput, \
10
+    getversion, gnuplot_cmd
11
+
12
+GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
13
+WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
14
+
15
+def html_linkify(text):
16
+    return text.lower().replace(' ', '_')
17
+
18
+
19
+def html_header(level, text):
20
+    name = html_linkify(text)
21
+    return '\n<h%d id="%s"><a href="#%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
22
+
23
+
24
+class HTMLReportCreator(ReportCreator):
25
+    def __init(self, conf):
26
+        super(HTMLReportCreator, self).__init__(conf)
27
+
28
+    def create(self, data, path):
29
+        super(HTMLReportCreator, self).create(data, path)
30
+        self.title = data.projectname
31
+
32
+        # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
33
+        resources = (self.conf['style'], 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif')
34
+        for resource in (self.conf['style'], 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
35
+            resource_file = pkg_resources.resource_filename('gitstats', os.path.join('resources', resource))
36
+            if os.path.exists(resource_file):
37
+                shutil.copyfile(resource_file, os.path.join(path, resource))
38
+            else:
39
+                print(f'Warning: "{resource}" not found, so not copied')
40
+
41
+        f = open(path + "/index.html", 'w')
42
+        format = '%Y-%m-%d %H:%M:%S'
43
+        self.printHeader(f)
44
+
45
+        f.write('<h1>GitStats - %s</h1>' % data.projectname)
46
+
47
+        self.printNav(f)
48
+
49
+        f.write('<dl>')
50
+        f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
51
+        f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (
52
+            datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
53
+        f.write(
54
+            '<dt>Generator</dt><dd><a href="https://github.com/hoxu/gitstats">GitStats</a> (version %s), %s, %s</dd>' % (
55
+                getversion(), getgitversion(), getgnuplotversion()))
56
+        f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (
57
+            data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
58
+        f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (
59
+            data.getCommitDeltaDays(), len(data.getActiveDays()),
60
+            (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
61
+        f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
62
+        f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (
63
+            data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
64
+        f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (
65
+            data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()),
66
+            float(data.getTotalCommits()) / data.getCommitDeltaDays()))
67
+        f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (
68
+            data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
69
+        f.write('</dl>')
70
+
71
+        f.write('</body>\n</html>')
72
+        f.close()
73
+
74
+        ###
75
+        # Activity
76
+        f = open(path + '/activity.html', 'w')
77
+        self.printHeader(f)
78
+        f.write('<h1>Activity</h1>')
79
+        self.printNav(f)
80
+
81
+        # f.write('<h2>Last 30 days</h2>')
82
+
83
+        # f.write('<h2>Last 12 months</h2>')
84
+
85
+        # Weekly activity
86
+        WEEKS = 32
87
+        f.write(html_header(2, 'Weekly activity'))
88
+        f.write('<p>Last %d weeks</p>' % WEEKS)
89
+
90
+        # generate weeks to show (previous N weeks from now)
91
+        now = datetime.datetime.now()
92
+        deltaweek = datetime.timedelta(7)
93
+        weeks = []
94
+        stampcur = now
95
+        for i in range(0, WEEKS):
96
+            weeks.insert(0, stampcur.strftime('%Y-%W'))
97
+            stampcur -= deltaweek
98
+
99
+        # top row: commits & bar
100
+        f.write('<table class="noborders"><tr>')
101
+        for i in range(0, WEEKS):
102
+            commits = 0
103
+            if weeks[i] in data.activity_by_year_week:
104
+                commits = data.activity_by_year_week[weeks[i]]
105
+
106
+            percentage = 0
107
+            if weeks[i] in data.activity_by_year_week:
108
+                percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
109
+            height = max(1, int(200 * percentage))
110
+            f.write(
111
+                '<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (
112
+                    commits, height))
113
+
114
+        # bottom row: year/week
115
+        f.write('</tr><tr>')
116
+        for i in range(0, WEEKS):
117
+            f.write('<td>%s</td>' % (WEEKS - i))
118
+        f.write('</tr></table>')
119
+
120
+        # Hour of Day
121
+        f.write(html_header(2, 'Hour of Day'))
122
+        hour_of_day = data.getActivityByHourOfDay()
123
+        f.write('<table><tr><th>Hour</th>')
124
+        for i in range(0, 24):
125
+            f.write('<th>%d</th>' % i)
126
+        f.write('</tr>\n<tr><th>Commits</th>')
127
+        fp = open(path + '/hour_of_day.dat', 'w')
128
+        for i in range(0, 24):
129
+            if i in hour_of_day:
130
+                r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
131
+                f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
132
+                fp.write('%d %d\n' % (i, hour_of_day[i]))
133
+            else:
134
+                f.write('<td>0</td>')
135
+                fp.write('%d 0\n' % i)
136
+        fp.close()
137
+        f.write('</tr>\n<tr><th>%</th>')
138
+        totalcommits = data.getTotalCommits()
139
+        for i in range(0, 24):
140
+            if i in hour_of_day:
141
+                r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
142
+                f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (
143
+                    r, (100.0 * hour_of_day[i]) / totalcommits))
144
+            else:
145
+                f.write('<td>0.00</td>')
146
+        f.write('</tr></table>')
147
+        f.write('<img src="hour_of_day.png" alt="Hour of Day">')
148
+        fg = open(path + '/hour_of_day.dat', 'w')
149
+        for i in range(0, 24):
150
+            if i in hour_of_day:
151
+                fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
152
+            else:
153
+                fg.write('%d 0\n' % (i + 1))
154
+        fg.close()
155
+
156
+        # Day of Week
157
+        f.write(html_header(2, 'Day of Week'))
158
+        day_of_week = data.getActivityByDayOfWeek()
159
+        f.write('<div class="vtable"><table>')
160
+        f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
161
+        fp = open(path + '/day_of_week.dat', 'w')
162
+        for d in range(0, 7):
163
+            commits = 0
164
+            if d in day_of_week:
165
+                commits = day_of_week[d]
166
+            fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
167
+            f.write('<tr>')
168
+            f.write('<th>%s</th>' % (WEEKDAYS[d]))
169
+            if d in day_of_week:
170
+                f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
171
+            else:
172
+                f.write('<td>0</td>')
173
+            f.write('</tr>')
174
+        f.write('</table></div>')
175
+        f.write('<img src="day_of_week.png" alt="Day of Week">')
176
+        fp.close()
177
+
178
+        # Hour of Week
179
+        f.write(html_header(2, 'Hour of Week'))
180
+        f.write('<table>')
181
+
182
+        f.write('<tr><th>Weekday</th>')
183
+        for hour in range(0, 24):
184
+            f.write('<th>%d</th>' % (hour))
185
+        f.write('</tr>')
186
+
187
+        for weekday in range(0, 7):
188
+            f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
189
+            for hour in range(0, 24):
190
+                try:
191
+                    commits = data.activity_by_hour_of_week[weekday][hour]
192
+                except KeyError:
193
+                    commits = 0
194
+                if commits != 0:
195
+                    f.write('<td')
196
+                    r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
197
+                    f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
198
+                    f.write('>%d</td>' % commits)
199
+                else:
200
+                    f.write('<td></td>')
201
+            f.write('</tr>')
202
+
203
+        f.write('</table>')
204
+
205
+        # Month of Year
206
+        f.write(html_header(2, 'Month of Year'))
207
+        f.write('<div class="vtable"><table>')
208
+        f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
209
+        fp = open(path + '/month_of_year.dat', 'w')
210
+        for mm in range(1, 13):
211
+            commits = 0
212
+            if mm in data.activity_by_month_of_year:
213
+                commits = data.activity_by_month_of_year[mm]
214
+            f.write(
215
+                '<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
216
+            fp.write('%d %d\n' % (mm, commits))
217
+        fp.close()
218
+        f.write('</table></div>')
219
+        f.write('<img src="month_of_year.png" alt="Month of Year">')
220
+
221
+        # Commits by year/month
222
+        f.write(html_header(2, 'Commits by year/month'))
223
+        f.write(
224
+            '<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
225
+        for yymm in reversed(sorted(data.commits_by_month.keys())):
226
+            f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (
227
+                yymm, data.commits_by_month.get(yymm, 0), data.lines_added_by_month.get(yymm, 0),
228
+                data.lines_removed_by_month.get(yymm, 0)))
229
+        f.write('</table></div>')
230
+        f.write('<img src="commits_by_year_month.png" alt="Commits by year/month">')
231
+        fg = open(path + '/commits_by_year_month.dat', 'w')
232
+        for yymm in sorted(data.commits_by_month.keys()):
233
+            fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
234
+        fg.close()
235
+
236
+        # Commits by year
237
+        f.write(html_header(2, 'Commits by Year'))
238
+        f.write(
239
+            '<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
240
+        for yy in reversed(sorted(data.commits_by_year.keys())):
241
+            f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (
242
+                yy, data.commits_by_year.get(yy, 0), (100.0 * data.commits_by_year.get(yy, 0)) / data.getTotalCommits(),
243
+                data.lines_added_by_year.get(yy, 0), data.lines_removed_by_year.get(yy, 0)))
244
+        f.write('</table></div>')
245
+        f.write('<img src="commits_by_year.png" alt="Commits by Year">')
246
+        fg = open(path + '/commits_by_year.dat', 'w')
247
+        for yy in sorted(data.commits_by_year.keys()):
248
+            fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
249
+        fg.close()
250
+
251
+        # Commits by timezone
252
+        f.write(html_header(2, 'Commits by Timezone'))
253
+        f.write('<table><tr>')
254
+        f.write('<th>Timezone</th><th>Commits</th>')
255
+        f.write('</tr>')
256
+        max_commits_on_tz = max(data.commits_by_timezone.values())
257
+        for i in sorted(data.commits_by_timezone.keys(), key=lambda n: int(n)):
258
+            commits = data.commits_by_timezone[i]
259
+            r = 127 + int((float(commits) / max_commits_on_tz) * 128)
260
+            f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
261
+        f.write('</table>')
262
+
263
+        f.write('</body></html>')
264
+        f.close()
265
+
266
+        ###
267
+        # Authors
268
+        f = open(path + '/authors.html', 'w')
269
+        self.printHeader(f)
270
+
271
+        f.write('<h1>Authors</h1>')
272
+        self.printNav(f)
273
+
274
+        # Authors :: List of authors
275
+        f.write(html_header(2, 'List of Authors'))
276
+
277
+        f.write('<table class="authors sortable" id="authors">')
278
+        f.write(
279
+            '<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
280
+        for author in data.getAuthors(self.conf['max_authors']):
281
+            info = data.getAuthorInfo(author)
282
+            f.write(
283
+                '<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (
284
+                    author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'],
285
+                    info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']),
286
+                    info['place_by_commits']))
287
+        f.write('</table>')
288
+
289
+        allauthors = data.getAuthors()
290
+        if len(allauthors) > self.conf['max_authors']:
291
+            rest = allauthors[self.conf['max_authors']:]
292
+            f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
293
+
294
+        f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
295
+        f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author">')
296
+        if len(allauthors) > self.conf['max_authors']:
297
+            f.write('<p class="moreauthors">Only top %d authors shown</p>' % self.conf['max_authors'])
298
+
299
+        f.write(html_header(2, 'Commits per Author'))
300
+        f.write('<img src="commits_by_author.png" alt="Commits per Author">')
301
+        if len(allauthors) > self.conf['max_authors']:
302
+            f.write('<p class="moreauthors">Only top %d authors shown</p>' % self.conf['max_authors'])
303
+
304
+        fgl = open(path + '/lines_of_code_by_author.dat', 'w')
305
+        fgc = open(path + '/commits_by_author.dat', 'w')
306
+
307
+        lines_by_authors = {}  # cumulated added lines by
308
+        # author. to save memory,
309
+        # changes_by_date_by_author[stamp][author] is defined
310
+        # only at points where author commits.
311
+        # lines_by_authors allows us to generate all the
312
+        # points in the .dat file.
313
+
314
+        # Don't rely on getAuthors to give the same order each
315
+        # time. Be robust and keep the list in a variable.
316
+        commits_by_authors = {}  # cumulated added lines by
317
+
318
+        self.authors_to_plot = data.getAuthors(self.conf['max_authors'])
319
+        for author in self.authors_to_plot:
320
+            lines_by_authors[author] = 0
321
+            commits_by_authors[author] = 0
322
+        for stamp in sorted(data.changes_by_date_by_author.keys()):
323
+            fgl.write('%d' % stamp)
324
+            fgc.write('%d' % stamp)
325
+            for author in self.authors_to_plot:
326
+                if author in data.changes_by_date_by_author[stamp].keys():
327
+                    lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
328
+                    commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
329
+                fgl.write(' %d' % lines_by_authors[author])
330
+                fgc.write(' %d' % commits_by_authors[author])
331
+            fgl.write('\n')
332
+            fgc.write('\n')
333
+        fgl.close()
334
+        fgc.close()
335
+
336
+        # Authors :: Author of Month
337
+        f.write(html_header(2, 'Author of Month'))
338
+        f.write('<table class="sortable" id="aom">')
339
+        f.write(
340
+            '<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' %
341
+            self.conf['authors_top'])
342
+        for yymm in reversed(sorted(data.author_of_month.keys())):
343
+            authordict = data.author_of_month[yymm]
344
+            authors = getkeyssortedbyvalues(authordict)
345
+            authors.reverse()
346
+            commits = data.author_of_month[yymm][authors[0]]
347
+            next = ', '.join(authors[1:self.conf['authors_top'] + 1])
348
+            f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (
349
+                yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm],
350
+                next, len(authors)))
351
+
352
+        f.write('</table>')
353
+
354
+        f.write(html_header(2, 'Author of Year'))
355
+        f.write(
356
+            '<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' %
357
+            self.conf['authors_top'])
358
+        for yy in reversed(sorted(data.author_of_year.keys())):
359
+            authordict = data.author_of_year[yy]
360
+            authors = getkeyssortedbyvalues(authordict)
361
+            authors.reverse()
362
+            commits = data.author_of_year[yy][authors[0]]
363
+            next = ', '.join(authors[1:self.conf['authors_top'] + 1])
364
+            f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (
365
+                yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next,
366
+                len(authors)))
367
+        f.write('</table>')
368
+
369
+        # Domains
370
+        f.write(html_header(2, 'Commits by Domains'))
371
+        domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
372
+        domains_by_commits.reverse()  # most first
373
+        f.write('<div class="vtable"><table>')
374
+        f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
375
+        fp = open(path + '/domains.dat', 'w')
376
+        n = 0
377
+        for domain in domains_by_commits:
378
+            if n == self.conf['max_domains']:
379
+                break
380
+            commits = 0
381
+            n += 1
382
+            info = data.getDomainInfo(domain)
383
+            fp.write('%s %d %d\n' % (domain, n, info['commits']))
384
+            f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (
385
+                domain, info['commits'], (100.0 * info['commits'] / totalcommits)))
386
+        f.write('</table></div>')
387
+        f.write('<img src="domains.png" alt="Commits by Domains">')
388
+        fp.close()
389
+
390
+        f.write('</body></html>')
391
+        f.close()
392
+
393
+        ###
394
+        # Files
395
+        f = open(path + '/files.html', 'w')
396
+        self.printHeader(f)
397
+        f.write('<h1>Files</h1>')
398
+        self.printNav(f)
399
+
400
+        f.write('<dl>\n')
401
+        f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
402
+        f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
403
+        try:
404
+            f.write(
405
+                '<dt>Average file size</dt><dd>%.2f bytes</dd>' % (float(data.getTotalSize()) / data.getTotalFiles()))
406
+        except ZeroDivisionError:
407
+            pass
408
+        f.write('</dl>\n')
409
+
410
+        # Files :: File count by date
411
+        f.write(html_header(2, 'File count by date'))
412
+
413
+        # use set to get rid of duplicate/unnecessary entries
414
+        files_by_date = set()
415
+        for stamp in sorted(data.files_by_stamp.keys()):
416
+            files_by_date.add(
417
+                '%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
418
+
419
+        fg = open(path + '/files_by_date.dat', 'w')
420
+        for line in sorted(list(files_by_date)):
421
+            fg.write('%s\n' % line)
422
+        # for stamp in sorted(data.files_by_stamp.keys()):
423
+        #	fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
424
+        fg.close()
425
+
426
+        f.write('<img src="files_by_date.png" alt="Files by Date">')
427
+
428
+        # f.write('<h2>Average file size by date</h2>')
429
+
430
+        # Files :: Extensions
431
+        f.write(html_header(2, 'Extensions'))
432
+        f.write(
433
+            '<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
434
+        for ext in sorted(data.extensions.keys()):
435
+            files = data.extensions[ext]['files']
436
+            lines = data.extensions[ext]['lines']
437
+            try:
438
+                loc_percentage = (100.0 * lines) / data.getTotalLOC()
439
+            except ZeroDivisionError:
440
+                loc_percentage = 0
441
+            f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (
442
+                ext, files, (100.0 * files) / data.getTotalFiles(), lines, loc_percentage, lines / files))
443
+        f.write('</table>')
444
+
445
+        f.write('</body></html>')
446
+        f.close()
447
+
448
+        ###
449
+        # Lines
450
+        f = open(path + '/lines.html', 'w')
451
+        self.printHeader(f)
452
+        f.write('<h1>Lines</h1>')
453
+        self.printNav(f)
454
+
455
+        f.write('<dl>\n')
456
+        f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
457
+        f.write('</dl>\n')
458
+
459
+        f.write(html_header(2, 'Lines of Code'))
460
+        f.write('<img src="lines_of_code.png" alt="Lines of Code">')
461
+
462
+        fg = open(path + '/lines_of_code.dat', 'w')
463
+        for stamp in sorted(data.changes_by_date.keys()):
464
+            fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
465
+        fg.close()
466
+
467
+        f.write('</body></html>')
468
+        f.close()
469
+
470
+        ###
471
+        # tags.html
472
+        f = open(path + '/tags.html', 'w')
473
+        self.printHeader(f)
474
+        f.write('<h1>Tags</h1>')
475
+        self.printNav(f)
476
+
477
+        f.write('<dl>')
478
+        f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
479
+        if len(data.tags) > 0:
480
+            f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
481
+        f.write('</dl>')
482
+
483
+        f.write('<table class="tags">')
484
+        f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
485
+        # sort the tags by date desc
486
+        tags_sorted_by_date_desc = map(lambda el: el[1],
487
+                                       reversed(sorted(map(lambda el: (el[1]['date'], el[0]), data.tags.items()))))
488
+        for tag in tags_sorted_by_date_desc:
489
+            authorinfo = []
490
+            self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
491
+            for i in reversed(self.authors_by_commits):
492
+                authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
493
+            f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (
494
+                tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
495
+        f.write('</table>')
496
+
497
+        f.write('</body></html>')
498
+        f.close()
499
+
500
+        self.createGraphs(path)
501
+
502
+    def createGraphs(self, path):
503
+        print('Generating graphs...')
504
+
505
+        # hour of day
506
+        f = open(path + '/hour_of_day.plot', 'w')
507
+        f.write(GNUPLOT_COMMON)
508
+        f.write(
509
+            """
510
+            set output 'hour_of_day.png'
511
+            unset key
512
+            set xrange [0.5:24.5]
513
+            set yrange [0:]
514
+            set xtics 4
515
+            set grid y
516
+            set ylabel "Commits"
517
+            plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
518
+            """)
519
+        f.close()
520
+
521
+        # day of week
522
+        f = open(path + '/day_of_week.plot', 'w')
523
+        f.write(GNUPLOT_COMMON)
524
+        f.write(
525
+            """
526
+            set output 'day_of_week.png'
527
+            unset key
528
+            set xrange [0.5:7.5]
529
+            set yrange [0:]
530
+            set xtics 1
531
+            set grid y
532
+            set ylabel "Commits"
533
+            plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
534
+            """)
535
+        f.close()
536
+
537
+        # Domains
538
+        f = open(path + '/domains.plot', 'w')
539
+        f.write(GNUPLOT_COMMON)
540
+        f.write(
541
+            """
542
+            set output 'domains.png'
543
+            unset key
544
+            unset xtics
545
+            set yrange [0:]
546
+            set grid y
547
+            set ylabel "Commits"
548
+            plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
549
+            """)
550
+        f.close()
551
+
552
+        # Month of Year
553
+        f = open(path + '/month_of_year.plot', 'w')
554
+        f.write(GNUPLOT_COMMON)
555
+        f.write(
556
+            """
557
+            set output 'month_of_year.png'
558
+            unset key
559
+            set xrange [0.5:12.5]
560
+            set yrange [0:]
561
+            set xtics 1
562
+            set grid y
563
+            set ylabel "Commits"
564
+            plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
565
+            """)
566
+        f.close()
567
+
568
+        # commits_by_year_month
569
+        f = open(path + '/commits_by_year_month.plot', 'w')
570
+        f.write(GNUPLOT_COMMON)
571
+        f.write(
572
+            """
573
+            set output 'commits_by_year_month.png'
574
+            unset key
575
+            set yrange [0:]
576
+            set xdata time
577
+            set timefmt "%Y-%m"
578
+            set format x "%Y-%m"
579
+            set xtics rotate
580
+            set bmargin 5
581
+            set grid y
582
+            set ylabel "Commits"
583
+            plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
584
+            """)
585
+        f.close()
586
+
587
+        # commits_by_year
588
+        f = open(path + '/commits_by_year.plot', 'w')
589
+        f.write(GNUPLOT_COMMON)
590
+        f.write(
591
+            """
592
+            set output 'commits_by_year.png'
593
+            unset key
594
+            set yrange [0:]
595
+            set xtics 1 rotate
596
+            set grid y
597
+            set ylabel "Commits"
598
+            set yrange [0:]
599
+            plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
600
+            """)
601
+        f.close()
602
+
603
+        # Files by date
604
+        f = open(path + '/files_by_date.plot', 'w')
605
+        f.write(GNUPLOT_COMMON)
606
+        f.write(
607
+            """
608
+            set output 'files_by_date.png'
609
+            unset key
610
+            set yrange [0:]
611
+            set xdata time
612
+            set timefmt "%Y-%m-%d"
613
+            set format x "%Y-%m-%d"
614
+            set grid y
615
+            set ylabel "Files"
616
+            set xtics rotate
617
+            set ytics autofreq
618
+            set bmargin 6
619
+            plot 'files_by_date.dat' using 1:2 w steps
620
+            """)
621
+        f.close()
622
+
623
+        # Lines of Code
624
+        f = open(path + '/lines_of_code.plot', 'w')
625
+        f.write(GNUPLOT_COMMON)
626
+        f.write(
627
+            """
628
+            set output 'lines_of_code.png'
629
+            unset key
630
+            set yrange [0:]
631
+            set xdata time
632
+            set timefmt "%s"
633
+            set format x "%Y-%m-%d"
634
+            set grid y
635
+            set ylabel "Lines"
636
+            set xtics rotate
637
+            set bmargin 6
638
+            plot 'lines_of_code.dat' using 1:2 w lines
639
+            """)
640
+        f.close()
641
+
642
+        # Lines of Code Added per author
643
+        f = open(path + '/lines_of_code_by_author.plot', 'w')
644
+        f.write(GNUPLOT_COMMON)
645
+        f.write(
646
+            """
647
+            set terminal png transparent size 640,480
648
+            set output 'lines_of_code_by_author.png'
649
+            set key left top
650
+            set yrange [0:]
651
+            set xdata time
652
+            set timefmt "%s"
653
+            set format x "%Y-%m-%d"
654
+            set grid y
655
+            set ylabel "Lines"
656
+            set xtics rotate
657
+            set bmargin 6
658
+            plot """
659
+        )
660
+        i = 1
661
+        plots = []
662
+        for a in self.authors_to_plot:
663
+            i = i + 1
664
+            author = a.replace("\"", "\\\"").replace("`", "")
665
+            plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
666
+        f.write(", ".join(plots))
667
+        f.write('\n')
668
+
669
+        f.close()
670
+
671
+        # Commits per author
672
+        f = open(path + '/commits_by_author.plot', 'w')
673
+        f.write(GNUPLOT_COMMON)
674
+        f.write(
675
+            """
676
+            set terminal png transparent size 640,480
677
+            set output 'commits_by_author.png'
678
+            set key left top
679
+            set yrange [0:]
680
+            set xdata time
681
+            set timefmt "%s"
682
+            set format x "%Y-%m-%d"
683
+            set grid y
684
+            set ylabel "Commits"
685
+            set xtics rotate
686
+            set bmargin 6
687
+            plot """
688
+        )
689
+        i = 1
690
+        plots = []
691
+        for a in self.authors_to_plot:
692
+            i = i + 1
693
+            author = a.replace("\"", "\\\"").replace("`", "")
694
+            plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
695
+        f.write(", ".join(plots))
696
+        f.write('\n')
697
+
698
+        f.close()
699
+
700
+        os.chdir(path)
701
+        files = glob.glob(path + '/*.plot')
702
+        for f in files:
703
+            out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
704
+            if len(out) > 0:
705
+                print(out)
706
+
707
+    def printHeader(self, f, title=''):
708
+        f.write(
709
+            """<!DOCTYPE html>
710
+            <html>
711
+            <head>
712
+                <meta charset="UTF-8">
713
+                <title>GitStats - %s</title>
714
+                <link rel="stylesheet" href="%s" type="text/css">
715
+                <meta name="generator" content="GitStats %s">
716
+                <script type="text/javascript" src="sortable.js"></script>
717
+            </head>
718
+            <body>
719
+            """ % (self.title, self.conf['style'], getversion()))
720
+
721
+    def printNav(self, f):
722
+        f.write("""
723
+<div class="nav">
724
+<ul>
725
+<li><a href="index.html">General</a></li>
726
+<li><a href="activity.html">Activity</a></li>
727
+<li><a href="authors.html">Authors</a></li>
728
+<li><a href="files.html">Files</a></li>
729
+<li><a href="lines.html">Lines</a></li>
730
+<li><a href="tags.html">Tags</a></li>
731
+</ul>
732
+</div>
733
+""")

+ 105
- 0
gitstats/miscfuncs.py Целия файл

@@ -0,0 +1,105 @@
1
+import os
2
+import platform
3
+import re
4
+import subprocess
5
+import sys
6
+import time
7
+
8
+os.environ['LC_ALL'] = 'C'
9
+
10
+ON_LINUX = (platform.system() == 'Linux')
11
+
12
+# By default, gnuplot is searched from path, but can be overridden with the
13
+# environment variable "GNUPLOT"
14
+gnuplot_cmd = 'gnuplot'
15
+if 'GNUPLOT' in os.environ:
16
+    gnuplot_cmd = os.environ['GNUPLOT']
17
+
18
+
19
+def getpipeoutput(cmds, quiet=False):
20
+    start = time.time()
21
+    if not quiet and ON_LINUX and os.isatty(1):
22
+        print('>> ' + ' | '.join(cmds), sys.stdout.flush())
23
+    p = subprocess.Popen(cmds[0], stdout=subprocess.PIPE, shell=True)
24
+    processes = [p]
25
+    for x in cmds[1:]:
26
+        p = subprocess.Popen(x, stdin=p.stdout, stdout=subprocess.PIPE, shell=True)
27
+        processes.append(p)
28
+    output = p.communicate()[0].decode('utf-8')
29
+    for p in processes:
30
+        p.wait()
31
+    end = time.time()
32
+    if not quiet:
33
+        if ON_LINUX and os.isatty(1):
34
+            print(f'\r[{end - start}] >> {" | ".join(cmds)}')
35
+    return output.rstrip('\n')
36
+
37
+
38
+def getlogrange(conf, defaultrange='HEAD', end_only=True):
39
+    commit_range = getcommitrange(conf, defaultrange, end_only)
40
+    if len(conf['start_date']) > 0:
41
+        return '--since="%s" "%s"' % (conf['start_date'], commit_range)
42
+    return commit_range
43
+
44
+
45
+def getcommitrange(conf, defaultrange='HEAD', end_only=False):
46
+    if len(conf['commit_end']) > 0:
47
+        if end_only or len(conf['commit_begin']) == 0:
48
+            return conf['commit_end']
49
+        return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
50
+    return defaultrange
51
+
52
+
53
+def getkeyssortedbyvalues(dictionary):
54
+    return [elem[1] for elem in sorted([(el[1], el[0]) for el in dictionary.items()])]
55
+
56
+
57
+# dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
58
+def getkeyssortedbyvaluekey(d, key):
59
+    return [elem[1] for elem in sorted([(d[el][key], el) for el in d.keys()])]
60
+
61
+
62
+def getstatsummarycounts(line):
63
+    numbers = re.findall('\d+', line)
64
+    if len(numbers) == 1:
65
+        # neither insertions nor deletions: may probably only happen for "0 files changed"
66
+        numbers.append(0)
67
+        numbers.append(0)
68
+    elif len(numbers) == 2 and line.find('(+)') != -1:
69
+        numbers.append(0)  # only insertions were printed on line
70
+    elif len(numbers) == 2 and line.find('(-)') != -1:
71
+        numbers.insert(1, 0)  # only deletions were printed on line
72
+    return numbers
73
+
74
+
75
+def getversion():
76
+    from ._version import get_versions
77
+    __version__ = get_versions()['version']
78
+    del get_versions
79
+    return __version__
80
+
81
+
82
+def getgitversion():
83
+    return getpipeoutput(['git --version']).split('\n')[0]
84
+
85
+
86
+def getgnuplotversion():
87
+    return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
88
+
89
+
90
+def getnumoffilesfromrev(time_rev):
91
+    """
92
+    Get number of files changed in commit
93
+    """
94
+    time_portion, rev = time_rev
95
+    return (int(time_portion), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))
96
+
97
+
98
+def getnumoflinesinblob(ext_blob):
99
+    """
100
+    Get number of lines in blob
101
+    """
102
+    ext, blob_id = ext_blob
103
+    return ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0])
104
+
105
+

+ 11
- 0
gitstats/reportcreator.py Целия файл

@@ -0,0 +1,11 @@
1
+class ReportCreator:
2
+    """Creates the actual report based on given data."""
3
+
4
+    def __init__(self, conf):
5
+        self.conf = conf
6
+        self.data = None
7
+        self.path = None
8
+
9
+    def create(self, data, path):
10
+        self.data = data
11
+        self.path = path

BIN
gitstats/resources/arrow-down.gif Целия файл


BIN
gitstats/resources/arrow-none.gif Целия файл


BIN
gitstats/resources/arrow-up.gif Целия файл


+ 145
- 0
gitstats/resources/gitstats.css Целия файл

@@ -0,0 +1,145 @@
1
+/**
2
+ * GitStats - default style
3
+ */
4
+body {
5
+	color: black;
6
+	background-color: #dfd;
7
+}
8
+
9
+dt {
10
+	font-weight: bold;
11
+	float: left;
12
+	margin-right: 1em;
13
+}
14
+
15
+dt:after {
16
+	content: ': ';
17
+}
18
+
19
+dd {
20
+	display: block;
21
+	clear: left;
22
+}
23
+
24
+table {
25
+	border: 1px solid black;
26
+	border-collapse: collapse;
27
+	font-size: 80%;
28
+	margin-bottom: 1em;
29
+}
30
+
31
+table.noborders {
32
+	border: none;
33
+}
34
+
35
+table.noborders td {
36
+	border: none;
37
+}
38
+
39
+.vtable {
40
+	float: right;
41
+	clear: both;
42
+}
43
+
44
+table.tags td {
45
+	vertical-align: top;
46
+}
47
+
48
+td {
49
+	background-color: white;
50
+}
51
+
52
+th {
53
+	background-color: #ddf;
54
+}
55
+
56
+th a {
57
+	text-decoration: none;
58
+}
59
+
60
+tr:hover {
61
+	background-color: #ddf;
62
+}
63
+
64
+td {
65
+	border: 1px solid black;
66
+	padding: 0.2em;
67
+	padding-left: 0.3em;
68
+	padding-right: 0.2em;
69
+}
70
+
71
+/* Navigation bar; tabbed style */
72
+.nav {
73
+	border-bottom: 1px solid black;
74
+	padding: 0.3em;
75
+}
76
+
77
+.nav ul {
78
+	list-style-type: none;
79
+	display: inline;
80
+	margin: 0;
81
+	padding: 0;
82
+}
83
+
84
+.nav li {
85
+	display: inline;
86
+}
87
+
88
+.nav li a {
89
+	padding: 0.3em;
90
+	text-decoration: none;
91
+	color: black;
92
+	border: 1px solid black;
93
+	margin: 0.5em;
94
+	background-color: #ddf;
95
+}
96
+
97
+.nav li a:hover {
98
+	background-color: #ddd;
99
+	border-bottom: 1px solid #ddf;
100
+}
101
+
102
+img {
103
+	border: 1px solid black;
104
+	padding: 0.5em;
105
+	background-color: white;
106
+}
107
+
108
+th img {
109
+	border: 0px;
110
+	padding: 0px;
111
+	background-color: #ddf;
112
+}
113
+
114
+h1 a, h2 a {
115
+	color: black;
116
+	text-decoration: none;
117
+}
118
+
119
+h1:hover a:after,
120
+h2:hover a:after {
121
+	content: '¶';
122
+	color: #555;
123
+}
124
+
125
+h1 {
126
+	font-size: x-large;
127
+}
128
+
129
+h2 {
130
+	background-color: #564;
131
+	border: 1px solid black;
132
+	padding-left: 0.5em;
133
+	padding-right: 0.5em;
134
+	color: white;
135
+	font-size: large;
136
+	clear: both;
137
+}
138
+
139
+h2 a {
140
+	color: white;
141
+}
142
+
143
+.moreauthors {
144
+	font-size: 80%;
145
+}

+ 324
- 0
gitstats/resources/sortable.js Целия файл

@@ -0,0 +1,324 @@
1
+/*
2
+Table sorting script  by Joost de Valk, check it out at http://www.joostdevalk.nl/code/sortable-table/.
3
+Based on a script from http://www.kryogenix.org/code/browser/sorttable/.
4
+Distributed under the MIT license: http://www.kryogenix.org/code/browser/licence.html .
5
+
6
+Copyright (c) 1997-2007 Stuart Langridge, Joost de Valk.
7
+
8
+Version 1.5.7
9
+*/
10
+
11
+/* You can change these values */
12
+var image_path = "";
13
+var image_up = "arrow-up.gif";
14
+var image_down = "arrow-down.gif";
15
+var image_none = "arrow-none.gif";
16
+var europeandate = true;
17
+var alternate_row_colors = true;
18
+
19
+/* Don't change anything below this unless you know what you're doing */
20
+addEvent(window, "load", sortables_init);
21
+
22
+var SORT_COLUMN_INDEX;
23
+var thead = false;
24
+
25
+function sortables_init() {
26
+	// Find all tables with class sortable and make them sortable
27
+	if (!document.getElementsByTagName) return;
28
+	tbls = document.getElementsByTagName("table");
29
+	for (ti=0;ti<tbls.length;ti++) {
30
+		thisTbl = tbls[ti];
31
+		if (((' '+thisTbl.className+' ').indexOf("sortable") != -1) && (thisTbl.id)) {
32
+			ts_makeSortable(thisTbl);
33
+		}
34
+	}
35
+}
36
+
37
+function ts_makeSortable(t) {
38
+	if (t.rows && t.rows.length > 0) {
39
+		if (t.tHead && t.tHead.rows.length > 0) {
40
+			var firstRow = t.tHead.rows[t.tHead.rows.length-1];
41
+			thead = true;
42
+		} else {
43
+			var firstRow = t.rows[0];
44
+		}
45
+	}
46
+	if (!firstRow) return;
47
+	
48
+	// We have a first row: assume it's the header, and make its contents clickable links
49
+	for (var i=0;i<firstRow.cells.length;i++) {
50
+		var cell = firstRow.cells[i];
51
+		var txt = ts_getInnerText(cell);
52
+		if (cell.className != "unsortable" && cell.className.indexOf("unsortable") == -1) {
53
+			cell.innerHTML = '<a href="#" class="sortheader" onclick="ts_resortTable(this, '+i+');return false;">'+txt+'<span class="sortarrow">&nbsp;&nbsp;<img src="'+ image_path + image_none + '" alt="&darr;"/></span></a>';
54
+		}
55
+	}
56
+	if (alternate_row_colors) {
57
+		alternate(t);
58
+	}
59
+}
60
+
61
+function ts_getInnerText(el) {
62
+	if (typeof el == "string") return el;
63
+	if (typeof el == "undefined") { return el };
64
+	if (el.innerText) return el.innerText;	//Not needed but it is faster
65
+	var str = "";
66
+	
67
+	var cs = el.childNodes;
68
+	var l = cs.length;
69
+	for (var i = 0; i < l; i++) {
70
+		switch (cs[i].nodeType) {
71
+			case 1: //ELEMENT_NODE
72
+				str += ts_getInnerText(cs[i]);
73
+				break;
74
+			case 3:	//TEXT_NODE
75
+				str += cs[i].nodeValue;
76
+				break;
77
+		}
78
+	}
79
+	return str;
80
+}
81
+
82
+function ts_resortTable(lnk, clid) {
83
+	var span;
84
+	for (var ci=0;ci<lnk.childNodes.length;ci++) {
85
+		if (lnk.childNodes[ci].tagName && lnk.childNodes[ci].tagName.toLowerCase() == 'span') span = lnk.childNodes[ci];
86
+	}
87
+	var spantext = ts_getInnerText(span);
88
+	var td = lnk.parentNode;
89
+	var column = clid || td.cellIndex;
90
+	var t = getParent(td,'TABLE');
91
+	// Work out a type for the column
92
+	if (t.rows.length <= 1) return;
93
+	var itm = "";
94
+	var i = 1;
95
+	while (itm == "" && i < t.tBodies[0].rows.length) {
96
+		var itm = ts_getInnerText(t.tBodies[0].rows[i].cells[column]);
97
+		itm = trim(itm);
98
+		if (itm.substr(0,4) == "<!--" || itm.length == 0) {
99
+			itm = "";
100
+		}
101
+		i++;
102
+	}
103
+	if (itm == "") return; 
104
+	sortfn = ts_sort_caseinsensitive;
105
+	if (itm.match(/^\d\d[\/\.-][a-zA-z][a-zA-Z][a-zA-Z][\/\.-]\d\d\d\d$/)) sortfn = ts_sort_date;
106
+	if (itm.match(/^\d\d[\/\.-]\d\d[\/\.-]\d\d\d{2}?$/)) sortfn = ts_sort_date;
107
+	if (itm.match(/^-?[£$€Û¢´]\d/)) sortfn = ts_sort_numeric;
108
+	// ignore stuff in () after the numbers.
109
+	if (itm.match(/^-?(\d+[,\.]?)+(E[-+][\d]+)?%?( \(.*\))?$/)) sortfn = ts_sort_numeric;
110
+	SORT_COLUMN_INDEX = column;
111
+	var firstRow = new Array();
112
+	var newRows = new Array();
113
+	for (k=0;k<t.tBodies.length;k++) {
114
+		for (i=0;i<t.tBodies[k].rows[0].length;i++) { 
115
+			firstRow[i] = t.tBodies[k].rows[0][i]; 
116
+		}
117
+	}
118
+	for (k=0;k<t.tBodies.length;k++) {
119
+		if (!thead) {
120
+			// Skip the first row
121
+			for (j=1;j<t.tBodies[k].rows.length;j++) { 
122
+				newRows[j-1] = t.tBodies[k].rows[j];
123
+			}
124
+		} else {
125
+			// Do NOT skip the first row
126
+			for (j=0;j<t.tBodies[k].rows.length;j++) { 
127
+				newRows[j] = t.tBodies[k].rows[j];
128
+			}
129
+		}
130
+	}
131
+	newRows.sort(sortfn);
132
+	if (span.getAttribute("sortdir") == 'down') {
133
+			ARROW = '&nbsp;&nbsp;<img src="'+ image_path + image_down + '" alt="&darr;"/>';
134
+			newRows.reverse();
135
+			span.setAttribute('sortdir','up');
136
+	} else {
137
+			ARROW = '&nbsp;&nbsp;<img src="'+ image_path + image_up + '" alt="&uarr;"/>';
138
+			span.setAttribute('sortdir','down');
139
+	} 
140
+    // We appendChild rows that already exist to the tbody, so it moves them rather than creating new ones
141
+    // don't do sortbottom rows
142
+    for (i=0; i<newRows.length; i++) { 
143
+		if (!newRows[i].className || (newRows[i].className && (newRows[i].className.indexOf('sortbottom') == -1))) {
144
+			t.tBodies[0].appendChild(newRows[i]);
145
+		}
146
+	}
147
+    // do sortbottom rows only
148
+    for (i=0; i<newRows.length; i++) {
149
+		if (newRows[i].className && (newRows[i].className.indexOf('sortbottom') != -1)) 
150
+			t.tBodies[0].appendChild(newRows[i]);
151
+	}
152
+	// Delete any other arrows there may be showing
153
+	var allspans = document.getElementsByTagName("span");
154
+	for (var ci=0;ci<allspans.length;ci++) {
155
+		if (allspans[ci].className == 'sortarrow') {
156
+			if (getParent(allspans[ci],"table") == getParent(lnk,"table")) { // in the same table as us?
157
+				allspans[ci].innerHTML = '&nbsp;&nbsp;<img src="'+ image_path + image_none + '" alt="&darr;"/>';
158
+			}
159
+		}
160
+	}		
161
+	span.innerHTML = ARROW;
162
+	alternate(t);
163
+}
164
+
165
+function getParent(el, pTagName) {
166
+	if (el == null) {
167
+		return null;
168
+	} else if (el.nodeType == 1 && el.tagName.toLowerCase() == pTagName.toLowerCase()) {
169
+		return el;
170
+	} else {
171
+		return getParent(el.parentNode, pTagName);
172
+	}
173
+}
174
+
175
+function sort_date(date) {	
176
+	// y2k notes: two digit years less than 50 are treated as 20XX, greater than 50 are treated as 19XX
177
+	dt = "00000000";
178
+	if (date.length == 11) {
179
+		mtstr = date.substr(3,3);
180
+		mtstr = mtstr.toLowerCase();
181
+		switch(mtstr) {
182
+			case "jan": var mt = "01"; break;
183
+			case "feb": var mt = "02"; break;
184
+			case "mar": var mt = "03"; break;
185
+			case "apr": var mt = "04"; break;
186
+			case "may": var mt = "05"; break;
187
+			case "jun": var mt = "06"; break;
188
+			case "jul": var mt = "07"; break;
189
+			case "aug": var mt = "08"; break;
190
+			case "sep": var mt = "09"; break;
191
+			case "oct": var mt = "10"; break;
192
+			case "nov": var mt = "11"; break;
193
+			case "dec": var mt = "12"; break;
194
+			// default: var mt = "00";
195
+		}
196
+		dt = date.substr(7,4)+mt+date.substr(0,2);
197
+		return dt;
198
+	} else if (date.length == 10) {
199
+		if (europeandate == false) {
200
+			dt = date.substr(6,4)+date.substr(0,2)+date.substr(3,2);
201
+			return dt;
202
+		} else {
203
+			dt = date.substr(6,4)+date.substr(3,2)+date.substr(0,2);
204
+			return dt;
205
+		}
206
+	} else if (date.length == 8) {
207
+		yr = date.substr(6,2);
208
+		if (parseInt(yr) < 50) { 
209
+			yr = '20'+yr; 
210
+		} else { 
211
+			yr = '19'+yr; 
212
+		}
213
+		if (europeandate == true) {
214
+			dt = yr+date.substr(3,2)+date.substr(0,2);
215
+			return dt;
216
+		} else {
217
+			dt = yr+date.substr(0,2)+date.substr(3,2);
218
+			return dt;
219
+		}
220
+	}
221
+	return dt;
222
+}
223
+
224
+function ts_sort_date(a,b) {
225
+	dt1 = sort_date(ts_getInnerText(a.cells[SORT_COLUMN_INDEX]));
226
+	dt2 = sort_date(ts_getInnerText(b.cells[SORT_COLUMN_INDEX]));
227
+	
228
+	if (dt1==dt2) {
229
+		return 0;
230
+	}
231
+	if (dt1<dt2) { 
232
+		return -1;
233
+	}
234
+	return 1;
235
+}
236
+function ts_sort_numeric(a,b) {
237
+	var aa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);
238
+	aa = clean_num(aa);
239
+	var bb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);
240
+	bb = clean_num(bb);
241
+	return compare_numeric(aa,bb);
242
+}
243
+function compare_numeric(a,b) {
244
+	var a = parseFloat(a);
245
+	a = (isNaN(a) ? 0 : a);
246
+	var b = parseFloat(b);
247
+	b = (isNaN(b) ? 0 : b);
248
+	return a - b;
249
+}
250
+function ts_sort_caseinsensitive(a,b) {
251
+	aa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]).toLowerCase();
252
+	bb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]).toLowerCase();
253
+	if (aa==bb) {
254
+		return 0;
255
+	}
256
+	if (aa<bb) {
257
+		return -1;
258
+	}
259
+	return 1;
260
+}
261
+function ts_sort_default(a,b) {
262
+	aa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);
263
+	bb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);
264
+	if (aa==bb) {
265
+		return 0;
266
+	}
267
+	if (aa<bb) {
268
+		return -1;
269
+	}
270
+	return 1;
271
+}
272
+function addEvent(elm, evType, fn, useCapture)
273
+// addEvent and removeEvent
274
+// cross-browser event handling for IE5+,	NS6 and Mozilla
275
+// By Scott Andrew
276
+{
277
+	if (elm.addEventListener){
278
+		elm.addEventListener(evType, fn, useCapture);
279
+		return true;
280
+	} else if (elm.attachEvent){
281
+		var r = elm.attachEvent("on"+evType, fn);
282
+		return r;
283
+	} else {
284
+		alert("Handler could not be removed");
285
+	}
286
+}
287
+function clean_num(str) {
288
+	str = str.replace(new RegExp(/[^-?0-9.]/g),"");
289
+	return str;
290
+}
291
+function trim(s) {
292
+	return s.replace(/^\s+|\s+$/g, "");
293
+}
294
+function alternate(table) {
295
+	// Take object table and get all it's tbodies.
296
+	var tableBodies = table.getElementsByTagName("tbody");
297
+	// Loop through these tbodies
298
+	for (var i = 0; i < tableBodies.length; i++) {
299
+		// Take the tbody, and get all it's rows
300
+		var tableRows = tableBodies[i].getElementsByTagName("tr");
301
+		// Loop through these rows
302
+		// Start at 1 because we want to leave the heading row untouched
303
+		for (var j = 0; j < tableRows.length; j++) {
304
+			// Check if j is even, and apply classes for both possible results
305
+			if ( (j % 2) == 0  ) {
306
+				if ( !(tableRows[j].className.indexOf('odd') == -1) ) {
307
+					tableRows[j].className = tableRows[j].className.replace('odd', 'even');
308
+				} else {
309
+					if ( tableRows[j].className.indexOf('even') == -1 ) {
310
+						tableRows[j].className += " even";
311
+					}
312
+				}
313
+			} else {
314
+				if ( !(tableRows[j].className.indexOf('even') == -1) ) {
315
+					tableRows[j].className = tableRows[j].className.replace('even', 'odd');
316
+				} else {
317
+					if ( tableRows[j].className.indexOf('odd') == -1 ) {
318
+						tableRows[j].className += " odd";
319
+					}
320
+				}
321
+			} 
322
+		}
323
+	}
324
+}