1
=== added file 'utilities/format-imports'
2
--- utilities/format-imports	1970-01-01 00:00:00 +0000
3
+++ utilities/format-imports	2010-08-27 20:21:03 +0000
4
@@ -0,0 +1,387 @@
5
1
#!/usr/bin/python
6
2
#
7
3
# Copyright 2010 Canonical Ltd.  This software is licensed under the
8
4
# GNU Affero General Public License version 3 (see the file LICENSE).
9
5
10
6
""" Format import sections in python files
11
7
12
8
= Usage =
13
9
14
10
format-imports <file or directory> ...
15
11
16
12
= Operation =
17
13
18
14
The script will process each filename on the command line. If the file is a
19
15
directory it recurses into it an process all *.py files found in the tree.
20
16
It will output the paths of all the files that have been changed.
21
17
22
18
For Launchpad it was applied to the "lib/canonical/launchpad" and the "lib/lp"
23
19
subtrees. Running it with those parameters on a freshly branched LP tree
24
20
should not produce any output, meaning that all the files in the tree should
25
21
be formatted correctly.
26
22
27
23
The script identifies the import section of each file as a block of lines
28
24
that start with "import" or "from" or are indented with at least one space or
29
25
are blank lines. Comment lines are also included if they are followed by an
30
26
import statement. An inital __future__ import and a module docstring are
31
27
explicitly skipped.  
32
28
33
29
The import section is rewritten as three subsections, each separated by a
34
30
blank line. Any of the sections may be empty.
35
31
 1. Standard python library modules
36
32
 2. Import statements explicitly ordered to the top (see below)
37
33
 3. Third-party modules, meaning anything not fitting one of the other
38
34
    subsection criteria
39
35
 4. Local modules that begin with "canonical" or "lp".
40
36
41
37
Each section is sorted alphabetically by module name. Each module is put
42
38
on its own line, i.e.
43
39
{{{
44
40
  import os, sys
45
41
}}}
46
42
becomes
47
43
{{{
48
44
  import os
49
45
  import sys
50
46
}}}
51
47
Multiple import statements for the same module are conflated into one
52
48
statement, or two if the module was imported alongside an object inside it,
53
49
i.e.
54
50
{{{
55
51
  import sys
56
52
  from sys import stdin
57
53
}}}
58
54
59
55
Statements that import more than one objects are put on multiple lines in
60
56
list style, i.e.
61
57
{{{
62
58
  from sys import (
63
59
      stdin,
64
60
      stdout,
65
61
      )
66
62
}}}
67
63
Objects are sorted alphabetically and case-insensitively. One-object imports
68
64
are only formatted in this manner if the statement exceeds 78 characters in
69
65
length.
70
66
71
67
Comments stick with the import statement that followed them. Comments at the
72
68
end of one-line statements are moved to be be in front of it, .i.e.
73
69
{{{
74
70
  from sys import exit # Have a way out
75
71
}}}
76
72
becomes
77
73
{{{
78
74
  # Have a way out
79
75
  from sys import exit
80
76
}}}
81
77
82
78
= Format control =
83
79
84
80
Two special comments allow to control the operation of the formatter.
85
81
86
82
When an import statement is immediately preceded by a comment that starts
87
83
with the word "FIRST", it is placed into the second subsection (see above).
88
84
89
85
When the first import statement is directly preceded by a comment that starts
90
86
with the word "SKIP", the entire file is exempt from formatting.
91
87
92
88
= Known bugs =
93
89
94
90
Make sure to always check the result of the re-formatting to see if you have
95
91
been bitten by one of these.
96
92
97
93
Comments inside multi-line import statements break the formatter. A statement
98
94
like this will be ignored:
99
95
{{{
100
96
  from lp.app.interfaces import (
101
97
      # Don't do this.
102
98
      IMyInterface,
103
99
      IMyOtherInterface, # Don't do this either
104
100
      )
105
101
}}}
106
102
Actually, this will make the statement and all following to be ignored:
107
103
{{{
108
104
  from lp.app.interfaces import (
109
105
  # Breaks indentation rules anyway.
110
106
      IMyInterface,
111
107
      IMyOtherInterface,
112
108
      )
113
109
}}}
114
110
115
111
If a single-line statement has both a comment in front of it and at the end
116
112
of the line, only the end-line comment will survive. This could probably
117
113
easily be fixed to concatenate the too.
118
114
{{{
119
115
  # I am a gonner.
120
116
  from lp.app.interfaces import IMyInterface # I will survive!
121
117
}}}
122
118
123
119
Line continuation characters are recognized and resolved but
124
120
not re-introduced. This may leave the re-formatted text with a line that
125
121
is over the length limit.
126
122
{{{
127
123
    from lp.app.verylongnames.orverlydeep.modulestructure.leavenoroom \
128
124
        import object
129
125
}}}
130
126
""" 
131
127
132
128
__metaclass__ = type
133
129
134
130
# SKIP this file when reformatting.
135
131
import os
136
132
import re
137
133
import sys
138
134
from textwrap import dedent
139
135
140
136
sys.path[0:0] = [os.path.dirname(__file__)]
141
137
from python_standard_libs import python_standard_libs
142
138
143
139
144
140
# To search for escaped newline chars.
145
141
escaped_nl_regex = re.compile("\\\\\n", re.M)
146
142
import_regex = re.compile("^import +(?P<module>.+)$", re.M)
147
143
from_import_single_regex = re.compile(
148
144
    "^from (?P<module>.+) +import +"
149
145
    "(?P<objects>[*]|[a-zA-Z0-9_, ]+)"
150
146
    "(?P<comment>#.*)?$", re.M)
151
147
from_import_multi_regex = re.compile(
152
148
    "^from +(?P<module>.+) +import *[(](?P<objects>[a-zA-Z0-9_, \n]+)[)]$", re.M)
153
149
comment_regex = re.compile(
154
150
    "(?P<comment>(^#.+\n)+)(^import|^from) +(?P<module>[a-zA-Z0-9_.]+)", re.M)
155
151
split_regex = re.compile(",\s*")
156
152
157
153
# Module docstrings are multiline (""") strings that are not indented and are
158
154
# followed at some point by an import .
159
155
module_docstring_regex = re.compile(
160
156
    '(?P<docstring>^["]{3}[^"]+["]{3}\n).*^(import |from .+ import)', re.M | re.S)
161
157
# The imports section starts with an import state that is not a __future__
162
158
# import and consists of import lines, indented lines, empty lines and
163
159
# comments which are followed by an import line. Sometimes we even find
164
160
# lines that contain a single ")"... :-(
165
161
imports_section_regex = re.compile(
166
162
    "(^#.+\n)*^(import|(from ((?!__future__)\S+) import)).*\n"
167
163
    "(^import .+\n|^from .+\n|^[\t ]+.+\n|(^#.+\n)+((^import|^from) .+\n)|^\n|^[)]\n)*",
168
164
    re.M)
169
165
170
166
171
167
def format_import_lines(module, objects):
172
168
    """Generate correct from...import strings."""
173
169
    if len(objects) == 1:
174
170
        statement = "from %s import %s" % (module, objects[0])
175
171
        if len(statement) < 79:
176
172
            return statement
177
173
    return "from %s import (\n    %s,\n    )" % (
178
174
        module, ",\n    ".join(objects))
179
175
180
176
181
177
def find_imports_section(content):
182
178
    """Return that part of the file that contains the import statements."""
183
179
    # Skip module docstring.
184
180
    match = module_docstring_regex.search(content)
185
181
    if match is None:
186
182
        startpos = 0
187
183
    else:
188
184
        startpos = match.end('docstring')
189
185
190
186
    match = imports_section_regex.search(content, startpos)
191
187
    if match is None:
192
188
        return (None, None)
193
189
    startpos = match.start()
194
190
    endpos = match.end()
195
191
    if content[startpos:endpos].startswith('# SKIP'):
196
192
        # Skip files explicitely.
197
193
        return(None, None)
198
194
    return (startpos, endpos)
199
195
200
196
201
197
class ImportStatement:
202
198
    """Holds information about an import statement."""
203
199
204
200
    def __init__(self, objects=None, comment=None):
205
201
        self.import_module = objects is None
206
202
        if objects is None:
207
203
            self.objects = None
208
204
        else:
209
205
            self.objects = sorted(objects, key=str.lower)
210
206
        self.comment = comment
211
207
212
208
    def addObjects(self, new_objects):
213
209
        """More objects in this statement; eliminate duplicates."""
214
210
        if self.objects is None:
215
211
            # No objects so far.
216
212
            self.objects = new_objects
217
213
        else:
218
214
            # Use set to eliminate double objects.
219
215
            more_objects = set(self.objects + new_objects)
220
216
            self.objects = sorted(list(more_objects), key=str.lower)
221
217
222
218
    def setComment(self, comment):
223
219
        """Add a comment to the statement."""
224
220
        self.comment = comment
225
221
226
222
227
223
def parse_import_statements(import_section):
228
224
    """Split the import section into statements.
229
225
230
226
    Returns a dictionary with the module as the key and the objects being
231
227
    imported as a sorted list of strings."""
232
228
    imports = {}
233
229
    # Search for escaped newlines and remove them.
234
230
    searchpos =  0
235
231
    while True:
236
232
        match = escaped_nl_regex.search(import_section, searchpos)
237
233
        if match is None:
238
234
            break
239
235
        start = match.start()
240
236
        end = match.end()
241
237
        import_section = import_section[:start]+import_section[end:]
242
238
        searchpos = start
243
239
    # Search for simple one-line import statements.
244
240
    searchpos =  0
245
241
    while True:
246
242
        match = import_regex.search(import_section, searchpos)
247
243
        if match is None:
248
244
            break
249
245
        # These imports are marked by a "None" value.
250
246
        # Multiple modules in one statement are split up.
251
247
        for module in split_regex.split(match.group('module').strip()):
252
248
            imports[module] = ImportStatement()
253
249
        searchpos = match.end()
254
250
    # Search for "from ... import" statements.
255
251
    for pattern in (from_import_single_regex, from_import_multi_regex):
256
252
        searchpos = 0
257
253
        while True:
258
254
            match = pattern.search(import_section, searchpos)
259
255
            if match is None:
260
256
                break
261
257
            import_objects = split_regex.split(
262
258
                match.group('objects').strip(" \n,"))
263
259
            module = match.group('module').strip()
264
260
            # Only one pattern has a 'comment' group.
265
261
            comment = match.groupdict().get('comment', None)
266
262
            if module in imports:
267
263
                # Catch double import lines.
268
264
                imports[module].addObjects(import_objects)
269
265
            else:
270
266
                imports[module] = ImportStatement(import_objects)
271
267
            if comment is not None:
272
268
                imports[module].setComment(comment)
273
269
            searchpos = match.end()
274
270
    # Search for comments in import section.
275
271
    searchpos = 0
276
272
    while True:
277
273
        match = comment_regex.search(import_section, searchpos)
278
274
        if match is None:
279
275
            break
280
276
        module = match.group('module').strip()
281
277
        comment = match.group('comment').strip()
282
278
        imports[module].setComment(comment)
283
279
        searchpos = match.end()
284
280
285
281
    return imports
286
282
287
283
288
284
def format_imports(imports):
289
285
    """Group and order imports, return the new import statements."""
290
286
    standard_section = {}
291
287
    first_section = {}
292
288
    thirdparty_section = {}
293
289
    local_section = {}
294
290
    # Group modules into sections.
295
291
    for module, statement in imports.iteritems():
296
292
        module_base = module.split('.')[0]
297
293
        comment = statement.comment
298
294
        if comment is not None and comment.startswith("# FIRST"):
299
295
            first_section[module] = statement
300
296
        elif module_base in ('canonical', 'lp'):
301
297
            local_section[module] = statement
302
298
        elif module_base in python_standard_libs:
303
299
            standard_section[module] = statement
304
300
        else:
305
301
            thirdparty_section[module] = statement
306
302
    
307
303
    all_import_lines = []
308
304
    # Sort within each section and generate statement strings.
309
305
    sections = (
310
306
        standard_section,
311
307
        first_section,
312
308
        thirdparty_section,
313
309
        local_section,
314
310
        )
315
311
    for section in sections:
316
312
        import_lines = []
317
313
        for module in sorted(section.keys(), key=str.lower):
318
314
            if section[module].comment is not None:
319
315
                import_lines.append(section[module].comment)
320
316
            if section[module].import_module:
321
317
                import_lines.append("import %s" % module)
322
318
            if section[module].objects is not None:
323
319
                import_lines.append(
324
320
                    format_import_lines(module, section[module].objects))
325
321
        if len(import_lines) > 0:
326
322
            all_import_lines.append('\n'.join(import_lines))
327
323
    # Sections are separated by two blank lines.
328
324
    return '\n\n'.join(all_import_lines)        
329
325
330
326
331
327
def reformat_importsection(filename):
332
328
    """Replace the given file with a reformatted version of it."""
333
329
    pyfile = file(filename).read()
334
330
    import_start, import_end = find_imports_section(pyfile)
335
331
    if import_start is None:
336
332
        # Skip files with no import section.
337
333
        return False
338
334
    imports_section = pyfile[import_start:import_end]
339
335
    imports = parse_import_statements(imports_section)
340
336
341
337
    if pyfile[import_end:import_end+1] != '#':
342
338
        # Two newlines before anything but comments.
343
339
        number_of_newlines = 3
344
340
    else:
345
341
        number_of_newlines = 2
346
342
347
343
    new_imports = format_imports(imports)+"\n"*number_of_newlines
348
344
    if new_imports == imports_section:
349
345
      # No change, no need to write a new file.
350
346
      return False
351
347
    
352
348
    new_file = open(filename, "w")
353
349
    new_file.write(pyfile[:import_start])
354
350
    new_file.write(new_imports)
355
351
    new_file.write(pyfile[import_end:])
356
352
357
353
    return True
358
354
359
355
360
356
def process_file(fpath):
361
357
    """Process the file with the given path."""
362
358
    changed = reformat_importsection(fpath)
363
359
    if changed:
364
360
        print fpath
365
361
366
362
367
363
def process_tree(dpath):
368
364
    """Walk a directory tree and process all *.py files."""
369
365
    for dirpath, dirnames, filenames in os.walk(dpath):
370
366
        for filename in filenames:
371
367
            if filename.endswith('.py'):
372
368
                process_file(os.path.join(dirpath, filename))
373
369
374
370
375
371
if __name__ == "__main__":
376
372
    if len(sys.argv) == 1 or sys.argv[1] in ("-h", "-?", "--help"):
377
373
        sys.stderr.write(dedent("""\
378
374
        usage: format-imports <file or directory> ...
379
375
        
380
376
        Type "format-imports --docstring | less" to see the documentation.
381
377
        """))
382
378
        sys.exit(1)
383
379
    if sys.argv[1] == "--docstring":
384
380
        sys.stdout.write(__doc__)
385
381
        sys.exit(2)
386
382
    for filename in sys.argv[1:]:
387
383
        if os.path.isdir(filename):
388
384
            process_tree(filename)
389
385
        else:
390
386
            process_file(filename)
391
387
    sys.exit(0)
392
0
388
393
=== added file 'utilities/python_standard_libs.py'
394
--- utilities/python_standard_libs.py	1970-01-01 00:00:00 +0000
395
+++ utilities/python_standard_libs.py	2010-08-27 20:21:03 +0000
396
@@ -0,0 +1,240 @@
397
1
# Copyright 2010 Canonical Ltd.  This software is licensed under the
398
2
# GNU Affero General Public License version 3 (see the file LICENSE).
399
3
400
4
""" A list of top-level standard python library names.
401
5
402
6
This list is used by format-imports to determine if a module is in this group
403
7
or not.
404
8
The list is taken from http://docs.python.org/release/2.5.4/lib/modindex.html
405
9
but modules specific to other OSs have been taken out. It may need to be
406
10
updated from time to time.
407
11
"""
408
12
409
13
python_standard_libs = [
410
14
    'aifc',
411
15
    'anydbm',
412
16
    'array',
413
17
    'asynchat',
414
18
    'asyncore',
415
19
    'atexit',
416
20
    'audioop',
417
21
    'base64',
418
22
    'BaseHTTPServer',
419
23
    'Bastion',
420
24
    'binascii',
421
25
    'binhex',
422
26
    'bisect',
423
27
    'bsddb',
424
28
    'bz2',
425
29
    'calendar',
426
30
    'cgi',
427
31
    'CGIHTTPServer',
428
32
    'cgitb',
429
33
    'chunk',
430
34
    'cmath',
431
35
    'cmd',
432
36
    'code',
433
37
    'codecs',
434
38
    'codeop',
435
39
    'collections',
436
40
    'colorsys',
437
41
    'commands',
438
42
    'compileall',
439
43
    'compiler',
440
44
    'ConfigParser',
441
45
    'contextlib',
442
46
    'Cookie',
443
47
    'cookielib',
444
48
    'copy',
445
49
    'copy_reg',
446
50
    'cPickle',
447
51
    'cProfile',
448
52
    'crypt',
449
53
    'cStringIO',
450
54
    'csv',
451
55
    'ctypes',
452
56
    'curses',
453
57
    'datetime',
454
58
    'dbhash',
455
59
    'dbm',
456
60
    'decimal',
457
61
    'difflib',
458
62
    'dircache',
459
63
    'dis',
460
64
    'distutils',
461
65
    'dl',
462
66
    'doctest',
463
67
    'DocXMLRPCServer',
464
68
    'dumbdbm',
465
69
    'dummy_thread',
466
70
    'dummy_threading',
467
71
    'email',
468
72
    'encodings',
469
73
    'errno',
470
74
    'exceptions',
471
75
    'fcntl',
472
76
    'filecmp',
473
77
    'fileinput',
474
78
    'fnmatch',
475
79
    'formatter',
476
80
    'fpectl',
477
81
    'fpformat',
478
82
    'ftplib',
479
83
    'functools',
480
84
    'gc',
481
85
    'gdbm',
482
86
    'getopt',
483
87
    'getpass',
484
88
    'gettext',
485
89
    'glob',
486
90
    'gopherlib',
487
91
    'grp',
488
92
    'gzip',
489
93
    'hashlib',
490
94
    'heapq',
491
95
    'hmac',
492
96
    'hotshot',
493
97
    'htmlentitydefs',
494
98
    'htmllib',
495
99
    'HTMLParser',
496
100
    'httplib',
497
101
    'imageop',
498
102
    'imaplib',
499
103
    'imghdr',
500
104
    'imp',
501
105
    'inspect',
502
106
    'itertools',
503
107
    'keyword',
504
108
    'linecache',
505
109
    'locale',
506
110
    'logging',
507
111
    'mailbox',
508
112
    'mailcap',
509
113
    'marshal',
510
114
    'math',
511
115
    'md5',
512
116
    'mhlib',
513
117
    'mimetools',
514
118
    'mimetypes',
515
119
    'MimeWriter',
516
120
    'mimify',
517
121
    'mmap',
518
122
    'modulefinder',
519
123
    'multifile',
520
124
    'mutex',
521
125
    'netrc',
522
126
    'new',
523
127
    'nis',
524
128
    'nntplib',
525
129
    'operator',
526
130
    'optparse',
527
131
    'os',
528
132
    'ossaudiodev',
529
133
    'parser',
530
134
    'pdb',
531
135
    'pickle',
532
136
    'pickletools',
533
137
    'pipes',
534
138
    'pkgutil',
535
139
    'platform',
536
140
    'popen2',
537
141
    'poplib',
538
142
    'posix',
539
143
    'posixfile',
540
144
    'pprint',
541
145
    'profile',
542
146
    'pstats',
543
147
    'pty',
544
148
    'pwd',
545
149
    'py_compile',
546
150
    'pyclbr',
547
151
    'pydoc',
548
152
    'Queue',
549
153
    'quopri',
550
154
    'random',
551
155
    're',
552
156
    'readline',
553
157
    'repr',
554
158
    'resource',
555
159
    'rexec',
556
160
    'rfc822',
557
161
    'rgbimg',
558
162
    'rlcompleter',
559
163
    'robotparser',
560
164
    'runpy',
561
165
    'sched',
562
166
    'ScrolledText',
563
167
    'select',
564
168
    'sets',
565
169
    'sgmllib',
566
170
    'sha',
567
171
    'shelve',
568
172
    'shlex',
569
173
    'shutil',
570
174
    'signal',
571
175
    'SimpleHTTPServer',
572
176
    'SimpleXMLRPCServer',
573
177
    'site',
574
178
    'smtpd',
575
179
    'smtplib',
576
180
    'sndhdr',
577
181
    'socket',
578
182
    'SocketServer',
579
183
    'spwd',
580
184
    'sqlite3',
581
185
    'stat',
582
186
    'statvfs',
583
187
    'string',
584
188
    'StringIO',
585
189
    'stringprep',
586
190
    'struct',
587
191
    'subprocess',
588
192
    'sunau',
589
193
    'symbol',
590
194
    'sys',
591
195
    'syslog',
592
196
    'tabnanny',
593
197
    'tarfile',
594
198
    'telnetlib',
595
199
    'tempfile',
596
200
    'termios',
597
201
    'test.test_support',
598
202
    'test',
599
203
    'textwrap',
600
204
    'thread',
601
205
    'threading',
602
206
    'time',
603
207
    'timeit',
604
208
    'Tix',
605
209
    'Tkinter',
606
210
    'token',
607
211
    'tokenize',
608
212
    'trace',
609
213
    'traceback',
610
214
    'tty',
611
215
    'turtle',
612
216
    'types',
613
217
    'unicodedata',
614
218
    'unittest',
615
219
    'urllib2',
616
220
    'urllib',
617
221
    'urlparse',
618
222
    'user',
619
223
    'UserDict',
620
224
    'UserList',
621
225
    'UserString',
622
226
    'uu',
623
227
    'uuid',
624
228
    'warnings',
625
229
    'wave',
626
230
    'weakref',
627
231
    'webbrowser',
628
232
    'whichdb',
629
233
    'wsgiref',
630
234
    'xdrlib',
631
235
    'xml',
632
236
    'xmlrpclib',
633
237
    'zipfile',
634
238
    'zipimport',
635
239
    'zlib',
636
240
    ]
Status:	Merged
Approved by:	Henning Eggers on 2010-08-27
Approved revision:	no longer in the source branch.
Merged at revision:	11469
Proposed branch:	lp:~henninge/launchpad/format-imports
Merge into:	lp:launchpad
Diff against target:	636 lines (+627/-0) 2 files modified utilities/format-imports (+387/-0) utilities/python_standard_libs.py (+240/-0)
To merge this branch:	bzr merge lp:~henninge/launchpad/format-imports
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Brad Crittenden (community)	code	2010-08-27	Approve on 2010-08-27
Review via email: mp+33926@code.launchpad.net