Merge lp:~kfogel/launchpad/add-community-contributions-script into lp:launchpad

Proposed by Karl Fogel
Status: Merged
Merged at revision: not available
Proposed branch: lp:~kfogel/launchpad/add-community-contributions-script
Merge into: lp:launchpad
Diff against target: None lines
To merge this branch: bzr merge lp:~kfogel/launchpad/add-community-contributions-script
Reviewer Review Type Date Requested Status
Jonathan Lange Pending
Review via email: mp+11417@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Karl Fogel (kfogel) wrote :

Add a script that detects community contributions and updates a wiki page with information about them. This script doesn't affect Launchpad itself at all; it just takes a Launchpad branch as read-only input.

Note the dependency on editmoin.py. The import just errors informatively if editmoin can't be found, so the user will know where to get it. This seemed preferable to dragging editmoin.py into utilities/.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'utilities/community-contributions.py'
2--- utilities/community-contributions.py 1970-01-01 00:00:00 +0000
3+++ utilities/community-contributions.py 2009-09-09 04:39:12 +0000
4@@ -0,0 +1,387 @@
5+#!/usr/bin/python
6+# -*- coding: utf-8 -*-
7+#
8+# Copyright 2009 Canonical Ltd. This software is licensed under the
9+# GNU Affero General Public License version 3 (see the file LICENSE).
10+
11+"""Show what Launchpad community contributors have done, by trawling
12+a Launchpad branch's history, detecting contributions by non-Canonical
13+developers, and updating https://dev.launchpad.net/Contributions accordingly.
14+
15+Usage: community-contributions.py [options] PATH_TO_LAUNCHPAD_DEVEL_BRANCH
16+
17+Requirements:
18+ You need a 'devel' branch of Launchpad available locally (see
19+ https://dev.launchpad.net/Getting), your ~/.moin_ids file must
20+ be set up correctly, and you need editmoin.py (if you don't
21+ have it, the error message will tell you where to get it).
22+
23+Options:
24+ -q Print no non-essential messages.
25+ -h, --help Print this help.
26+ --dry-run Don't update the wiki, just print the new wiki page to stdout.
27+"""
28+
29+# General notes:
30+#
31+# The Right Way to do this would probably be to output some kind of
32+# XML format, and then have a separate converter script transform that
33+# to wiki syntax and update the wiki page. But as the wiki is our
34+# only consumer right now, we just output wiki syntax and update the
35+# wiki page directly, premature generalization being the root of all
36+# evil.
37+#
38+# For understanding the code, you may find it helpful to see
39+# bzrlib/log.py and http://bazaar-vcs.org/Integrating_with_Bazaar.
40+
41+import re
42+import sys
43+import getopt
44+from bzrlib.branch import Branch
45+from bzrlib import log
46+from bzrlib.osutils import format_date
47+
48+try:
49+ from editmoin import editshortcut
50+except:
51+ sys.stderr.write("""ERROR: Unable to import from 'editmoin'. How to solve:
52+As of 2009-09-01, you can get editmoin.py from
53+
54+ https://bazaar.launchpad.net/~kfogel/lp-dev-utils/lp-user-tools/files
55+
56+(This is a transitional location; it may move to a more public place.)
57+""")
58+ sys.exit(1)
59+
60+
61+# While anyone with "@canonical.com" in their email address will be
62+# counted as a Canonical contributor, sometimes Canonical people
63+# submit from personal addresses, so we still need a list.
64+#
65+# ### TODO: Really, this ought to use launchpadlib to consult
66+# ### Launchpad itself to find out who's a Canonical developer.
67+known_canonical_devs = (
68+ u'Aaron Bentley',
69+ u'Abel Deuring',
70+ u'Adam Conrad',
71+ u'Andrew Bennetts',
72+ u'Barry Warsaw',
73+ u'Brad Crittenden',
74+ u'Carlos Perello Marin',
75+ u'Carlos Perelló Marín',
76+ u'Celso Providelo',
77+ u'Christian Robottom Reis',
78+ u'Cody Somerville',
79+ u'Curtis Hovey',
80+ u'Dafydd Harries',
81+ u'Daniel Silverstone',
82+ u'Danilo Šegan',
83+ u'David Allouche',
84+ u'Deryck Hodge',
85+ u'Diogo Matsubara',
86+ u'Elliot Murphy',
87+ u'Francis J. Lacoste',
88+ u'Gabriel Neuman gneuman@async.com',
89+ u'Gary Poster',
90+ u'Guilherme Salgado',
91+ u'Gustavo Niemeyer',
92+ u'Henning Eggers',
93+ u'Herb McNew',
94+ u'James Henstridge',
95+ u'Jeroen Vermeulen',
96+ u'Jonathan Knowles',
97+ u'Jonathan Lange',
98+ u'Julian Edwards',
99+ u'Karl Fogel',
100+ u'Launch Pad',
101+ u'Launchpad Developers',
102+ u'Leonard Richardson',
103+ u'Malcolm Cleaton',
104+ u'Maris Fogels',
105+ u'Martin Albisetti',
106+ u'Matt Zimmerman',
107+ u'Matthew Revell',
108+ u'Michael Hudson',
109+ u'Michael Nelson',
110+ u'Muharem Hrnjadovic',
111+ u'Patch Queue Manager',
112+ u'Paul Hummer',
113+ u'Robert Collins',
114+ u'Sidnei',
115+ u'Sidnei da Silva',
116+ u'Steve McInerney',
117+ u'Stuart Bishop',
118+ u'Tom Berger',
119+ u'david',
120+ u'jml@mumak.net',
121+ u'kiko@beetle',
122+ )
123+
124+
125+class RevisionError(Exception):
126+ pass;
127+
128+
129+class ContainerRevision():
130+ """A wrapper for a top-level LogRevision containing child LogRevisions."""
131+
132+ def __init__(self, top_lr):
133+ self.top_rev = top_lr # e.g. LogRevision for r9371.
134+ self.contained_revs = [ ] # e.g. [ {9369.1.1}, {9206.4.4}, ... ],
135+ # where "{X}" means "LogRevision for X"
136+ def add_subrev(self, lr):
137+ """Add a descendant child of this container revision."""
138+ self.contained_revs.append(lr)
139+
140+ def __str__(self):
141+ timestamp = self.top_rev.rev.timestamp
142+ timezone = self.top_rev.rev.timezone
143+ message = self.top_rev.rev.message or "(NO LOG MESSAGE)"
144+ rev_id = self.top_rev.rev.revision_id or "(NO REVISION ID)"
145+ inventory_sha1 = self.top_rev.rev.inventory_sha1
146+ if timestamp:
147+ date_str = format_date(timestamp, timezone or 0, 'original')
148+ else:
149+ date_str = "(NO DATE)"
150+
151+ # ### TODO: just using 'devel' branch for now. We have four
152+ # ### trunks; that makes life hard. Not sure what to do about
153+ # ### that; unifying the data is possible, but a bit of work.
154+ # ### See https://dev.launchpad.net/Trunk for more information.
155+ rev_url_base = "http://bazaar.launchpad.net/~launchpad-pqm/" \
156+ "launchpad/devel/revision/"
157+
158+ # In loggerhead, you can use either a revision number or a
159+ # revision ID. In other words, these would reach the same page:
160+ #
161+ # http://bazaar.launchpad.net/~launchpad-pqm/launchpad/devel/revision/9202
162+ #
163+ # -and-
164+ #
165+ # http://bazaar.launchpad.net/~launchpad-pqm/launchpad/devel/revision/\
166+ # launchpad@pqm.canonical.com-20090821221206-ritpv21q8w61gbpt
167+ #
168+ # In our links, even when the link text is a revnum, we still
169+ # use a rev-id for the target. This is both so that the URL will
170+ # still work if you manually tweak it (say from "devel" to
171+ # "devel") and so that hovering over a revnum on the wiki page
172+ # will give you some information about it before you click
173+ # (because a rev id often identifies the committer).
174+ rev_id_url = rev_url_base + rev_id
175+ s = " * [[%s|r%s]] -- %s\n" % (rev_id_url, self.top_rev.revno, date_str)
176+ s += " {{{\n%s\n}}}\n" % message
177+ s += " '''Commits:'''\n "
178+ s += "\n ".join(["[[%s|%s]]" % (rev_url_base + lr.rev.revision_id,
179+ lr.revno) for lr in self.contained_revs])
180+ s += "\n"
181+ return s
182+
183+
184+# "ExternalContributor" is too much to type, so I guess we'll just use this.
185+class ExCon():
186+ """A contributor to Launchpad from outside Canonical."""
187+ def __init__(self, name):
188+ """Create a new external contributor named NAME. NAME is usually
189+ e.g. "Veronica Random <veronica@example.com>", but any "@"-sign
190+ will be disguised in the new object."""
191+
192+ self.name = name.replace("@", " {_AT_} ")
193+ # If name is "Veronica Random <veronica {_AT_} example.com>",
194+ # then name_as_anchor will be "veronica_random".
195+ self.name_as_anchor = \
196+ re.compile("\\s+").sub("_", name.split("<")[0].strip()).lower()
197+ # All the top-level revisions this contributor is associated with
198+ # (key == value == ContainerRevision). We use a dictionary
199+ # instead of list to get set semantics; set() would be overkill.
200+ self._landings = { }
201+
202+ def num_landings(self):
203+ """Return the number of top-level landings that include revisions
204+ by this contributor."""
205+ return len(self._landings)
206+
207+ def add_top_level_revision(self, cr):
208+ """Record ContainableRevision CR as associated with this contributor."""
209+ self._landings[cr] = cr
210+
211+ def show_contributions(self):
212+ """Return a wikified string showing this contributor's contributions."""
213+ s = "== %s ==\n\n" % self.name
214+ plural = "s"
215+ if self.num_landings() == 1:
216+ plural = ""
217+ s += "''%d top-level landing%s:''\n\n" % (self.num_landings(), plural)
218+ def prefer_recent_revs(a, b):
219+ # A and B are LogRevisions; put more recent ones higher in the list.
220+ return cmp(b.top_rev.revno, a.top_rev.revno)
221+ for cr in sorted(self._landings, prefer_recent_revs):
222+ s += str(cr)
223+ s += "\n"
224+ return s
225+
226+
227+def get_ex_cons(authors, all_ex_cons):
228+ """Return a list of ExCon objects corresponding to AUTHORS (a list
229+ of strings). If there are no external contributors in authors,
230+ return an empty list.
231+
232+ ALL_EX_CONS is a dictionary mapping author names (as received from
233+ the bzr logs, i.e., with email address undisguised) to ExCon objects.
234+ """
235+ ex_cons_this_rev = [ ]
236+ for a in authors:
237+ known = False
238+ for name_fragment in known_canonical_devs:
239+ if u"@canonical.com" in a or name_fragment in a:
240+ known = True
241+ break
242+ if not known:
243+ ### There's a variant of the Singleton pattern that could be
244+ ### used for this, whereby instantiating an ExCon object would
245+ ### just get back an existing object if such has already been
246+ ### instantiated for this name. But that would make this code
247+ ### non-reentrant, and that's just not cool.
248+ if all_ex_cons.has_key(a):
249+ ec = all_ex_cons[a]
250+ else:
251+ ec = ExCon(a)
252+ all_ex_cons[a] = ec
253+ ex_cons_this_rev.append(ec)
254+ return ex_cons_this_rev
255+
256+
257+# The LogFormatter abstract class should really be called LogReceiver
258+# or something -- subclasses don't have to be about display.
259+class LogExCons(log.LogFormatter):
260+ """Log all the external contributions, by Contributor."""
261+ # See log.LogFormatter documentation.
262+ supports_merge_revisions = True
263+
264+ def __init__(self):
265+ super(LogExCons, self).__init__(to_file=None)
266+ # Dictionary mapping author names (with undisguised email
267+ # addresses) to ExCon objects.
268+ self.all_ex_cons = { }
269+ # ContainerRevision object representing most-recently-seen top-level rev.
270+ current_top_level_rev = None
271+
272+ def result(self):
273+ """Return a moin-wiki-syntax string with TOC followed by contributions."""
274+ def prefer_more_revs(a, b):
275+ # List the most prolific contributors first.
276+ return cmp(b.num_landings(), a.num_landings())
277+ sorted_contributors = \
278+ sorted(self.all_ex_cons.values(), prefer_more_revs)
279+ s = "-----\n\n"
280+ s += "= Who =\n\n"
281+ for val in sorted_contributors:
282+ plural = "s"
283+ if val.num_landings() == 1:
284+ plural = ""
285+ s += " 1. [[#%s|%s]] ''(%d top-level landing%s)''\n" \
286+ % (val.name_as_anchor, val.name, val.num_landings(), plural)
287+ s += "\n-----\n\n"
288+ s += "= What =\n\n"
289+ for val in sorted_contributors:
290+ s += "<<Anchor(%s)>>\n" % val.name_as_anchor
291+ s += val.show_contributions()
292+ return s
293+
294+ def log_revision(self, lr):
295+ """Log a revision.
296+ :param lr: The LogRevision to be logged.
297+ """
298+ # We count on always seeing the containing rev before its subrevs.
299+ if lr.merge_depth == 0:
300+ self.current_top_level_rev = ContainerRevision(lr)
301+ else:
302+ self.current_top_level_rev.add_subrev(lr)
303+ ex_cons = get_ex_cons(lr.rev.get_apparent_authors(), self.all_ex_cons)
304+ for ec in ex_cons:
305+ ec.add_top_level_revision(self.current_top_level_rev)
306+
307+
308+### TODO: is this really necessary? See bzrlib/log.py.
309+log.log_formatter_registry.register('external_contributors', LogExCons,
310+ 'Find non-Canonical contributors.')
311+
312+
313+def usage():
314+ print __doc__
315+
316+
317+page_intro = """This page shows contributions to Launchpad from outside Canonical. It only lists changes that have landed in the Launchpad ''devel'' tree, so changes that land in ''db-devel'' first may take a while to show up (see the [[Trunk|trunk explanation]] for more).
318+
319+~-''Note for maintainers: this page is updated every 10 minutes by a cron job running as kfogel on devpad (though if there are no new contributions, the page's timestamp won't change). The code that generates this page is [[http://bazaar.launchpad.net/%7Elaunchpad-pqm/launchpad/devel/annotate/head%3A/utilities/community-contributions.py|utilities/community-contributions.py]] in the Launchpad tree.''-~
320+
321+"""
322+
323+def main():
324+ quiet = False
325+ target = None
326+ dry_run = False
327+
328+ if len(sys.argv) < 2:
329+ usage()
330+ sys.exit(1)
331+
332+ try:
333+ opts, args = getopt.getopt(sys.argv[1:], '?hq',
334+ ['help', 'usage', 'dry-run'])
335+ except getopt.GetoptError, e:
336+ sys.stderr.write("ERROR: " + str(e) + '\n\n')
337+ usage()
338+ sys.exit(1)
339+
340+ for opt, value in opts:
341+ if opt == '--help' or opt == '-h' or opt == '-?' or opt == 'usage':
342+ usage()
343+ sys.exit(0)
344+ elif opt == '-q' or opt == '--quiet':
345+ quiet = True
346+ elif opt == '--dry-run':
347+ dry_run = True
348+
349+ # Ensure we have the arguments we need.
350+ if len(args) < 1:
351+ sys.stderr.write("ERROR: path to Launchpad branch required as argument\n")
352+ usage()
353+ sys.exit(1)
354+
355+ target = args[0]
356+
357+ # Do everything.
358+ b = Branch.open(target)
359+
360+ # ### TODO: 8976 is the first non-Canonical contribution on 'devel'.
361+ # On 'db-devel', the magic revision number is 8327. We're aiming at
362+ # 'devel' right now, but perhaps it would be good to parameterize
363+ # this, or just auto-detect the branch and choose the right number.
364+ logger = log.Logger(b, {'start_revision' : 8976,
365+ 'direction' : 'reverse',
366+ 'levels' : 0, })
367+ lec = LogExCons()
368+ if not quiet:
369+ print "Calculating (this may take a while)..."
370+ logger.show(lec) # This won't "show" anything; it's just for gathering data.
371+ page_contents = page_intro + lec.result()
372+ def update_if_modified(moinfile):
373+ if moinfile._unescape(moinfile.body) == page_contents:
374+ return 0 # Nothing changed, so cancel the edit.
375+ else:
376+ moinfile.body = page_contents
377+ return 1
378+ if not dry_run:
379+ if not quiet:
380+ print "Updating wiki..."
381+ # Not sure how to get editmoin to obey our quiet flag.
382+ editshortcut("https://dev.launchpad.net/Contributions",
383+ editfile_func=update_if_modified)
384+ if not quiet:
385+ print "Done updating wiki."
386+ else:
387+ print page_contents
388+
389+
390+if __name__ == '__main__':
391+ main()