Merge lp:~stub/launchpad/replication into lp:launchpad

Proposed by Stuart Bishop
Status: Merged
Approved by: Tim Penhey
Approved revision: not available
Merged at revision: not available
Proposed branch: lp:~stub/launchpad/replication
Merge into: lp:launchpad
Diff against target: 143 lines (+48/-26)
2 files modified
database/replication/Makefile (+7/-4)
database/replication/repair-restored-db.py (+41/-22)
To merge this branch: bzr merge lp:~stub/launchpad/replication
Reviewer Review Type Date Requested Status
Tim Penhey (community) Approve
Review via email: mp+14980@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

Fix staging & dogfood rebuilds again. Now tested with dumps containing genuine Slony-I cruft.

Also made repair-restored-db.py a little more robust (I was wondering if the Slony UNINSTALL NODE command is just too fragile to bother with, but decided to keep trying to use it anyway as it has likely been improved in more modern versions of Slony-I and will succeed more often once the authdb replication set is somewhere else).

Revision history for this message
Tim Penhey (thumper) wrote :

Looks fine to me.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'database/replication/Makefile'
2--- database/replication/Makefile 2009-11-13 10:41:58 +0000
3+++ database/replication/Makefile 2009-11-18 04:00:30 +0000
4@@ -36,6 +36,8 @@
5 STAGING_CONFIG=staging # For swapping fresh db into place.
6 STAGING_DUMP=launchpad.dump # Dumpfile to build new staging from.
7 STAGING_TABLESPACE=pg_default # 'pg_default' for default
8+DOGFOOD_DBNAME=launchpad_dogfood
9+DOGFOOD_DUMP=launchpad.dump
10
11 _CONFIG=overridden-on-command-line
12 _SLAVE_TABLESPACE=pg_default
13@@ -98,12 +100,13 @@
14 psql -q -d lpmain_staging_new -f authdb_drop.sql
15 psql -q -d lpmain_staging_new -f authdb_create.sql \
16 2>&1 | grep -v _sl || true
17+ # Restore the data
18+ pg_restore --dbname=lpmain_staging_new \
19+ --no-acl --no-owner --disable-triggers --data-only \
20+ --exit-on-error ${STAGING_DUMP}
21 # Uninstall Slony-I if it is installed - a pg_dump of a DB with
22 # Slony-I installed isn't usable without this step.
23 LPCONFIG=${NEW_STAGING_CONFIG} ./repair-restored-db.py
24- pg_restore --dbname=lpmain_staging_new \
25- --no-acl --no-owner --disable-triggers --data-only \
26- --exit-on-error ${STAGING_DUMP}
27 # Setup replication
28 make _replicate LPCONFIG=${NEW_STAGING_CONFIG} LAG="0 seconds" \
29 _MASTER=lpmain_staging_new _SLAVE=lpmain_staging_slave_new \
30@@ -138,10 +141,10 @@
31 psql -q -d ${DOGFOOD_DBNAME} -f authdb_drop.sql
32 psql -q -d ${DOGFOOD_DBNAME} -f authdb_create.sql \
33 2>&1 | grep -v _sl || true
34- ./repair-restored-db.py -d ${DOGFOOD_DBNAME}
35 pg_restore --dbname=${DOGFOOD_DBNAME} --no-acl --no-owner \
36 --disable-triggers --data-only \
37 --exit-on-error ${DOGFOOD_DUMP}
38+ ./repair-restored-db.py -d ${DOGFOOD_DBNAME}
39 ../schema/upgrade.py -d ${DOGFOOD_DBNAME}
40 ../schema/fti.py -d ${DOGFOOD_DBNAME}
41 ../schema/security.py -d ${DOGFOOD_DBNAME}
42
43=== modified file 'database/replication/repair-restored-db.py'
44--- database/replication/repair-restored-db.py 2009-10-17 14:06:03 +0000
45+++ database/replication/repair-restored-db.py 2009-11-18 04:00:30 +0000
46@@ -27,7 +27,8 @@
47
48 from canonical.config import config
49 from canonical.database.postgresql import ConnectionString
50-from canonical.database.sqlbase import connect, quote
51+from canonical.database.sqlbase import (
52+ connect, quote, ISOLATION_LEVEL_AUTOCOMMIT)
53 from canonical.launchpad.scripts import db_options, logger_options, logger
54
55 import replication.helpers
56@@ -44,12 +45,23 @@
57
58 log = logger(options)
59
60- con = connect(options.dbuser)
61+ con = connect(options.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT)
62
63 if not replication.helpers.slony_installed(con):
64 log.info("Slony-I not installed. Nothing to do.")
65 return 0
66
67+ if not repair_with_slonik(log, options, con):
68+ repair_with_drop_schema(log, con)
69+
70+ return 0
71+
72+
73+def repair_with_slonik(log, options, con):
74+ """Attempt to uninstall Slony-I via 'UNINSTALL NODE' per best practice.
75+
76+ Returns True on success, False if unable to do so for any reason.
77+ """
78 cur = con.cursor()
79
80 # Determine the node id the database thinks it is.
81@@ -60,27 +72,19 @@
82 cur.execute(cmd)
83 node_id = cur.fetchone()[0]
84 log.debug("Node Id is %d" % node_id)
85+
86+ # Get a list of set ids in the database.
87+ cur.execute(
88+ "SELECT DISTINCT set_id FROM %s.sl_set"
89+ % replication.helpers.CLUSTER_NAMESPACE)
90+ set_ids = set(row[0] for row in cur.fetchall())
91+ log.debug("Set Ids are %s" % repr(set_ids))
92+
93 except psycopg2.InternalError:
94 # Not enough information to determine node id. Possibly
95- # this is an empty database. Just drop the _sl schema as
96- # it is 'good enough' with Slony-I 1.2 - this mechanism
97- # fails with Slony added primary keys, but we don't do that.
98- con.rollback()
99- cur = con.cursor()
100- cur.execute("DROP SCHEMA _sl CASCADE")
101- con.commit()
102- return 0
103-
104- # Get a list of set ids in the database.
105- cur.execute(
106- "SELECT DISTINCT set_id FROM %s.sl_set"
107- % replication.helpers.CLUSTER_NAMESPACE)
108- set_ids = set(row[0] for row in cur.fetchall())
109- log.debug("Set Ids are %s" % repr(set_ids))
110-
111- # Close so we don't block slonik(1)
112- del cur
113- con.close()
114+ # this is an empty database.
115+ log.debug('Broken or no Slony-I install.')
116+ return False
117
118 connection_string = ConnectionString(config.database.main_master)
119 if options.dbname:
120@@ -103,7 +107,22 @@
121 log.debug(line)
122 script = '\n'.join(script)
123
124- replication.helpers.execute_slonik(script, auto_preamble=False)
125+ return replication.helpers.execute_slonik(
126+ script, auto_preamble=False, exit_on_fail=False)
127+
128+
129+def repair_with_drop_schema(log, con):
130+ """
131+ Just drop the _sl schema as it is 'good enough' with Slony-I 1.2.
132+
133+ This mechanism fails with Slony added primary keys, but we don't
134+ do that.
135+ """
136+ log.info('Fallback mode - dropping _sl schema.')
137+ cur = con.cursor()
138+ cur.execute("DROP SCHEMA _sl CASCADE")
139+ return True
140+
141
142 if __name__ == '__main__':
143 sys.exit(main())