Merge lp:~stub/launchpad/db-stats into lp:launchpad/db-devel

Proposed by Stuart Bishop
Status: Merged
Approved by: Robert Collins
Approved revision: no longer in the source branch.
Merged at revision: 9568
Proposed branch: lp:~stub/launchpad/db-stats
Merge into: lp:launchpad/db-devel
Diff against target: 175 lines (+78/-22)
2 files modified
lib/lp/scripts/helpers.py (+2/-1)
utilities/report-database-stats.py (+76/-21)
To merge this branch: bzr merge lp:~stub/launchpad/db-stats
Reviewer Review Type Date Requested Status
Robert Collins (community) Approve
Jelmer Vernooij (community) Abstain
Review via email: mp+28713@code.launchpad.net

Commit message

Allow report-database-stats.py to generate reports over arbitrary time frames.

Description of the change

Allow report-database-stats.py to generate reports over arbitrary time frames. Currently it just reports on a period up until now.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

Not actively reviewing this at the moment.

review: Abstain
Revision history for this message
Robert Collins (lifeless) wrote :

8 - TYPES = Option.TYPES + ("datetime",)
9 + TYPES = Option.TYPES + ("datetime", datetime)

is odd

doing string clause generation rather than storm expressions is suboptimal but ok in this context I guess given the existing code.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/scripts/helpers.py'
2--- lib/lp/scripts/helpers.py 2010-01-28 15:17:26 +0000
3+++ lib/lp/scripts/helpers.py 2010-07-22 12:39:52 +0000
4@@ -37,9 +37,10 @@
5
6 Adds a 'datetime' option type.
7 """
8- TYPES = Option.TYPES + ("datetime",)
9+ TYPES = Option.TYPES + ("datetime", datetime)
10 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
11 TYPE_CHECKER["datetime"] = _check_datetime
12+ TYPE_CHECKER[datetime] = _check_datetime
13
14
15 class LPOptionParser(OptionParser):
16
17=== modified file 'utilities/report-database-stats.py'
18--- utilities/report-database-stats.py 2010-06-08 09:15:57 +0000
19+++ utilities/report-database-stats.py 2010-07-22 12:39:52 +0000
20@@ -1,4 +1,4 @@
21-#!/usr/bin/python2.5 -S
22+#!/usr/bin/python -S
23 # Copyright 2010 Canonical Ltd. This software is licensed under the
24 # GNU Affero General Public License version 3 (see the file LICENSE).
25
26@@ -8,7 +8,9 @@
27
28 import _pythonpath
29
30+from datetime import datetime
31 from operator import attrgetter
32+from textwrap import dedent
33
34 from canonical.database.sqlbase import connect, sqlvalues
35 from canonical.launchpad.scripts import db_options
36@@ -19,8 +21,48 @@
37 pass
38
39
40+def get_where_clause(options):
41+ "Generate a WHERE clause referencing the date_created column."
42+ # We have two of the from timestamp, the until timestamp and an
43+ # interval. The interval is in a format unsuitable for processing in
44+ # Python. If the interval is set, it represents the period before
45+ # the until timestamp or the period after the from timestamp,
46+ # depending on which of these is set. From this information,
47+ # generate the SQL representation of the from timestamp and the
48+ # until timestamp.
49+ if options.from_ts:
50+ from_sql = ("CAST(%s AS timestamp without time zone)"
51+ % sqlvalues(options.from_ts))
52+ elif options.interval and options.until_ts:
53+ from_sql = (
54+ "CAST(%s AS timestamp without time zone) - CAST(%s AS interval)"
55+ % sqlvalues(options.until_ts, options.interval))
56+ elif options.interval:
57+ from_sql = (
58+ "(CURRENT_TIMESTAMP AT TIME ZONE 'UTC') - CAST(%s AS interval)"
59+ % sqlvalues(options.interval))
60+ else:
61+ from_sql = "CAST('1970-01-01' AS timestamp without time zone)"
62+
63+ if options.until_ts:
64+ until_sql = (
65+ "CAST(%s AS timestamp without time zone)"
66+ % sqlvalues(options.until_ts))
67+ elif options.interval and options.from_ts:
68+ until_sql = (
69+ "CAST(%s AS timestamp without time zone) + CAST(%s AS interval)"
70+ % sqlvalues(options.from_ts, options.interval))
71+ else:
72+ until_sql = "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'"
73+
74+ clause = "date_created BETWEEN (%s) AND (%s)" % (from_sql, until_sql)
75+
76+ return clause
77+
78+
79 def get_table_stats(cur, options):
80- tablestats_query = """
81+ params = {'where': get_where_clause(options)}
82+ tablestats_query = dedent("""\
83 SELECT
84 Earliest.date_created AS date_start,
85 Latest.date_created AS date_end,
86@@ -46,14 +88,13 @@
87 WHERE
88 Earliest.date_created = (
89 SELECT min(date_created) FROM DatabaseTableStats
90- WHERE date_created >= CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
91- - CAST(%s AS interval))
92+ WHERE %(where)s)
93 AND Latest.date_created = (
94- SELECT max(date_created) FROM DatabaseTableStats)
95+ SELECT max(date_created) FROM DatabaseTableStats
96+ WHERE %(where)s)
97 AND Earliest.schemaname = Latest.schemaname
98 AND Earliest.relname = Latest.relname
99- """ % sqlvalues(options.since_interval)
100-
101+ """ % params)
102 cur.execute(tablestats_query)
103
104 # description[0] is the column name, per PEP-0249
105@@ -75,22 +116,22 @@
106 # This query calculates the averate cpu utilization from the
107 # samples. It assumes samples are taken at regular intervals over
108 # the period.
109- query = """
110+ # Note that we have to use SUM()/COUNT() instead of AVG() as
111+ # database users not connected when the sample was taken are not
112+ # recorded - we want the average utilization over the time period,
113+ # not the subset of the time period the user was actually connected.
114+ params = {'where': get_where_clause(options)}
115+ query = dedent("""\
116 SELECT (
117 CAST(SUM(cpu) AS float) / (
118 SELECT COUNT(DISTINCT date_created) FROM DatabaseCpuStats
119- WHERE
120- date_created >= (CURRENT_TIMESTAMP AT TIME ZONE 'UTC')
121- - CAST (%s AS interval))
122- ) AS avg_cpu, username
123+ WHERE %(where)s
124+ )) AS avg_cpu, username
125 FROM DatabaseCpuStats
126- WHERE date_created >= (CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
127- - CAST(%s AS interval))
128+ WHERE %(where)s
129 GROUP BY username
130- """ % sqlvalues(options.since_interval, options.since_interval)
131-
132+ """ % params)
133 cur.execute(query)
134-
135 return set(cur.fetchall())
136
137
138@@ -98,8 +139,16 @@
139 parser = LPOptionParser()
140 db_options(parser)
141 parser.add_option(
142- "-i", "--interval", dest="since_interval", type=str,
143- default="100 years", metavar="INTERVAL",
144+ "-f", "--from", dest="from_ts", type=datetime,
145+ default=None, metavar="TIMESTAMP",
146+ help="Use statistics collected since TIMESTAMP.")
147+ parser.add_option(
148+ "-u", "--until", dest="until_ts", type=datetime,
149+ default=None, metavar="TIMESTAMP",
150+ help="Use statistics collected up until TIMESTAMP.")
151+ parser.add_option(
152+ "-i", "--interval", dest="interval", type=str,
153+ default=None, metavar="INTERVAL",
154 help=
155 "Use statistics collected over the last INTERVAL period. "
156 "INTERVAL is a string parsable by PostgreSQL "
157@@ -111,11 +160,17 @@
158 parser.set_defaults(dbuser="database_stats_report")
159 options, args = parser.parse_args()
160
161+ if options.from_ts and options.until_ts and options.interval:
162+ parser.error(
163+ "Only two of --from, --until and --interval may be specified.")
164+
165 con = connect(options.dbuser)
166 cur = con.cursor()
167
168- tables = get_table_stats(cur, options)
169- arbitrary_table = list(tables)[0]
170+ tables = list(get_table_stats(cur, options))
171+ if len(tables) == 0:
172+ parser.error("No statistics available in that time range.")
173+ arbitrary_table = tables[0]
174 interval = arbitrary_table.date_end - arbitrary_table.date_start
175 per_second = float(interval.days * 24 * 60 * 60 + interval.seconds)
176

Subscribers

People subscribed via source and target branches

to status/vote changes: