Merge lp:~wgrant/launchpad/ts-rank into lp:launchpad

Proposed by William Grant
Status: Merged
Merged at revision: 18653
Proposed branch: lp:~wgrant/launchpad/ts-rank
Merge into: lp:launchpad
Diff against target: 164 lines (+22/-22)
7 files modified
lib/lp/registry/model/distribution.py (+1/-1)
lib/lp/registry/model/distroseries.py (+1/-1)
lib/lp/registry/model/pillar.py (+4/-4)
lib/lp/registry/vocabularies.py (+2/-2)
lib/lp/services/database/doc/textsearching.txt (+8/-8)
lib/lp/services/database/nl_search.py (+5/-5)
lib/lp/services/database/stormexpr.py (+1/-1)
To merge this branch: bzr merge lp:~wgrant/launchpad/ts-rank
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Review via email: mp+345504@code.launchpad.net

Commit message

Switch all ts2.rank callsites to pg_catalog.ts_rank.

Description of the change

ts2 is long-deprecated and going away, and pg_catalog.rank is a window function.

To post a comment you must log in.
Revision history for this message
Colin Watson (cjwatson) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/registry/model/distribution.py'
2--- lib/lp/registry/model/distribution.py 2016-11-12 21:47:32 +0000
3+++ lib/lp/registry/model/distribution.py 2018-05-14 09:30:49 +0000
4@@ -993,7 +993,7 @@
5 find_spec = (
6 DistributionSourcePackageCache,
7 SourcePackageName,
8- SQL('rank(fti, ftq(?)) AS rank', params=(text,)),
9+ SQL('ts_rank(fti, ftq(?)) AS rank', params=(text,)),
10 )
11 origin = [
12 DistributionSourcePackageCache,
13
14=== modified file 'lib/lp/registry/model/distroseries.py'
15--- lib/lp/registry/model/distroseries.py 2018-01-26 13:47:51 +0000
16+++ lib/lp/registry/model/distroseries.py 2018-05-14 09:30:49 +0000
17@@ -1234,7 +1234,7 @@
18 find_spec = (
19 DistroSeriesPackageCache,
20 BinaryPackageName,
21- SQL('rank(fti, ftq(%s)) AS rank' % sqlvalues(text)))
22+ SQL('ts_rank(fti, ftq(%s)) AS rank' % sqlvalues(text)))
23 origin = [
24 DistroSeriesPackageCache,
25 Join(
26
27=== modified file 'lib/lp/registry/model/pillar.py'
28--- lib/lp/registry/model/pillar.py 2017-11-10 14:07:52 +0000
29+++ lib/lp/registry/model/pillar.py 2018-05-14 09:30:49 +0000
30@@ -214,7 +214,7 @@
31 # row should get the highest search rank (9999999).
32 # Each row in the PillarName table will join with only one
33 # of either the Product, Project, or Distribution tables,
34- # so the coalesce() is necessary to find the rank() which
35+ # so the coalesce() is necessary to find the ts_rank() which
36 # is not null.
37 result.order_by(SQL('''
38 (CASE WHEN PillarName.name = lower(%(text)s)
39@@ -222,9 +222,9 @@
40 OR lower(Project.title) = lower(%(text)s)
41 OR lower(Distribution.title) = lower(%(text)s)
42 THEN 9999999
43- ELSE coalesce(rank(Product.fti, ftq(%(text)s)),
44- rank(Project.fti, ftq(%(text)s)),
45- rank(Distribution.fti, ftq(%(text)s)))
46+ ELSE coalesce(ts_rank(Product.fti, ftq(%(text)s)),
47+ ts_rank(Project.fti, ftq(%(text)s)),
48+ ts_rank(Distribution.fti, ftq(%(text)s)))
49 END) DESC, PillarName.name
50 ''' % sqlvalues(text=text)))
51 # People shouldn't be calling this method with too big limits
52
53=== modified file 'lib/lp/registry/vocabularies.py'
54--- lib/lp/registry/vocabularies.py 2016-09-19 13:44:28 +0000
55+++ lib/lp/registry/vocabularies.py 2018-05-14 09:30:49 +0000
56@@ -321,7 +321,7 @@
57 getUtility(ILaunchBag).user), *vocab_filter)
58 order_by = SQL(
59 '(CASE name WHEN %s THEN 1 '
60- ' ELSE rank(fti, ftq(%s)) END) DESC, displayname, name'
61+ ' ELSE ts_rank(fti, ftq(%s)) END) DESC, displayname, name'
62 % (fti_query, fti_query))
63 return IStore(Product).find(self._table, where_clause).order_by(
64 order_by).config(limit=100)
65@@ -628,7 +628,7 @@
66 when person.name like lower(?) || '%%' then 0.6
67 when lower(person.displayname) like lower(?)
68 || '%%' then 0.5
69- else rank(fti, ftq(?))
70+ else ts_rank(fti, ftq(?))
71 end) as rank
72 FROM Person
73 WHERE Person.name LIKE lower(?) || '%%'
74
75=== modified file 'lib/lp/services/database/doc/textsearching.txt'
76--- lib/lp/services/database/doc/textsearching.txt 2016-11-14 11:45:46 +0000
77+++ lib/lp/services/database/doc/textsearching.txt 2018-05-14 09:30:49 +0000
78@@ -563,11 +563,11 @@
79 -------
80
81 We have ranking information stored in the indexes, as specified in fti.py.
82-The rank of a result is calculated using the tsearch2 rank() function.
83+The rank of a result is calculated using the ts_rank() function.
84
85 >>> runsql(r"""
86 ... SELECT
87- ... name, rank(fti, ftq('gnome')) AS rank
88+ ... name, ts_rank(fti, ftq('gnome')) AS rank
89 ... FROM product
90 ... WHERE fti @@ ftq('gnome')
91 ... ORDER BY rank DESC, name
92@@ -581,7 +581,7 @@
93 You can also build complex multi table queries and mush all the
94 ranked results together. This query does a full text search on
95 the Bug and Message tables, as well as substring name searches on
96-SourcepackageName.name and Product.name. The rank() function returns an
97+SourcepackageName.name and Product.name. The ts_rank() function returns an
98 float between 0 and 1, so I just chose some arbitrary constants for name
99 matches that seemed appropriate. It is also doing a full text search
100 against the Product table, and manually lowering the rank (again using
101@@ -589,7 +589,7 @@
102
103 >>> runsql(r"""
104 ... SELECT title, max(ranking) FROM (
105- ... SELECT Bug.title,rank(Bug.fti||Message.fti,ftq('firefox'))
106+ ... SELECT Bug.title,ts_rank(Bug.fti||Message.fti,ftq('firefox'))
107 ... AS ranking
108 ... FROM Bug, BugMessage, Message
109 ... WHERE Bug.id = BugMessage.bug AND Message.id = BugMessage.message
110@@ -607,7 +607,7 @@
111 ... AND BugTask.product = Product.id
112 ... AND Product.name LIKE lower('%firefox%')
113 ... UNION
114- ... SELECT Bug.title, rank(Product.fti, ftq('firefox')) - 0.3
115+ ... SELECT Bug.title, ts_rank(Product.fti, ftq('firefox')) - 0.3
116 ... AS ranking
117 ... FROM Bug, BugTask, Product
118 ... WHERE Bug.id = BugTask.bug
119@@ -657,10 +657,10 @@
120
121 Implementing something similar with tsearch2 is straightforward:
122 tsearch2 to_tsquery() already removes stop-words (it also stems the
123-words). Relevance can be computed using the rank() or rank_cd()
124+words). Relevance can be computed using the ts_rank() or ts_rank_cd()
125 functions. These are not TD-IDF scoring functions, but they take into
126-account where the words appeared (in the case of rank()) or proximity
127-of the words (in the case of rank_cd()). Both scoring functions can
128+account where the words appeared (in the case of ts_rank()) or proximity
129+of the words (in the case of ts_rank_cd()). Both scoring functions can
130 normalize based on document length. So the only part left to implement
131 is the >50% filtering part. Howevert the > 50% filtering is very expensive,
132 and so is processing every single returned item (> 200000 for common queries
133
134=== modified file 'lib/lp/services/database/nl_search.py'
135--- lib/lp/services/database/nl_search.py 2011-12-30 06:14:56 +0000
136+++ lib/lp/services/database/nl_search.py 2018-05-14 09:30:49 +0000
137@@ -116,11 +116,11 @@
138 eliminated from the query. That term eliminatation is only done when there
139 are 5 candidate rows or more.
140
141- The remaining terms are then ORed together. One should use the rank() or
142- rank_cd() function to order the results from running that query. This will
143- make rows that use more of the terms and for which the terms are found
144- closer in the text at the top of the list, while still returning rows that
145- use only some of the terms.
146+ The remaining terms are then ORed together. One should use the
147+ ts_rank() or ts_rank_cd() function to order the results from running
148+ that query. This will make rows that use more of the terms and for
149+ which the terms are found closer in the text at the top of the list,
150+ while still returning rows that use only some of the terms.
151
152 :terms: Some candidate search terms.
153
154
155=== modified file 'lib/lp/services/database/stormexpr.py'
156--- lib/lp/services/database/stormexpr.py 2016-06-10 22:02:37 +0000
157+++ lib/lp/services/database/stormexpr.py 2018-05-14 09:30:49 +0000
158@@ -340,5 +340,5 @@
159 def rank_by_fti(table, text, ftq=True):
160 table, query_fragment = determine_table_and_fragment(table, ftq)
161 return SQL(
162- '-rank(%s.fti, %s)' % (table.name, query_fragment), params=(text,),
163+ '-ts_rank(%s.fti, %s)' % (table.name, query_fragment), params=(text,),
164 tables=(table,))