Merge lp:~mhr3/gwibber/proper-lens-search into lp:gwibber

Proposed by Michal Hruby
Status: Merged
Merged at revision: 1281
Proposed branch: lp:~mhr3/gwibber/proper-lens-search
Merge into: lp:gwibber
Diff against target: 363 lines (+175/-114)
1 file modified
lens/src/daemon.vala (+175/-114)
To merge this branch: bzr merge lp:~mhr3/gwibber/proper-lens-search
Reviewer Review Type Date Requested Status
Ken VanDine Pending
Review via email: mp+93315@code.launchpad.net

Description of the change

This branch creates a TreeIndex on top of the model, which is used for searching. By using Dee's TextAnalyzer it properly tokenizes all the data and even performs folding of utf8 characters into ascii (which means you can search for accented characters using their non-accented counterparts).

Another benefit is a proper multiword search, so searching for "one two three" will find items containing for example "oh dear the timer shows three.. two.. one..".

One drawback is that single letter searches might be slower, but basically anything else should be faster.

Please note that this code revealed a few issues in DeeTreeIndex implementation (namely #933111 and #933131) and fixes for these aren't yet in upstream dee.

To post a comment you must log in.
lp:~mhr3/gwibber/proper-lens-search updated
1267. By Michal Hruby

Use TreeIndex for searches

Revision history for this message
Ken VanDine (ken-vandine) wrote :

Looks great, thanks!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lens/src/daemon.vala'
2--- lens/src/daemon.vala 2012-01-10 18:21:59 +0000
3+++ lens/src/daemon.vala 2012-02-15 22:55:23 +0000
4@@ -47,6 +47,9 @@
5 private unowned Dee.ModelIter _stream_iter_first = null;
6 private unowned Dee.ModelIter _stream_iter_last = null;
7
8+ private Dee.Analyzer _analyzer;
9+ private Dee.Index _index;
10+ private Dee.ICUTermFilter _ascii_filter;
11
12 construct
13 {
14@@ -75,6 +78,10 @@
15 }
16 });
17
18+ scope.generate_search_key.connect ((lens_search) =>
19+ {
20+ return lens_search.search_string.strip ();
21+ });
22 scope.search_changed.connect ((lens_search, search_type, cancellable) =>
23 {
24 if (search_type == SearchType.DEFAULT)
25@@ -113,6 +120,27 @@
26 Intl.setlocale(LocaleCategory.COLLATE, "C");
27 _sort_filter = Dee.Filter.new_collator_desc (StreamModelColumn.TIMESTAMP);
28 _model = new Dee.FilterModel (_streams_model, _sort_filter);
29+
30+ _ascii_filter = new Dee.ICUTermFilter.ascii_folder ();
31+ _analyzer = new Dee.TextAnalyzer ();
32+ _analyzer.add_term_filter ((terms_in, terms_out) =>
33+ {
34+ for (uint i = 0; i < terms_in.num_terms (); i++)
35+ {
36+ unowned string term = terms_in.get_term (i);
37+ var folded = _ascii_filter.apply (term);
38+ terms_out.add_term (term);
39+ if (folded != term) terms_out.add_term (folded);
40+ }
41+ });
42+ var reader = Dee.ModelReader.new ((model, iter) =>
43+ {
44+ var sender_col = StreamModelColumn.SENDER;
45+ var msg_col = StreamModelColumn.MESSAGE;
46+ return "%s\n%s".printf (model.get_string (iter, sender_col),
47+ model.get_string (iter, msg_col));
48+ });
49+ _index = new Dee.TreeIndex (_model, _analyzer, reader);
50 }
51
52 private void populate_filters ()
53@@ -219,41 +247,23 @@
54 }
55
56
57- private bool search_is_invalid (LensSearch? search)
58- {
59- /* This boolean expression is unfolded as we seem to get
60- * some null dereference if we join them in a big || expression */
61- if (search == null)
62- return true;
63- else if (search.search_string == null)
64- return true;
65-
66- return search.search_string.strip() == "";
67- }
68-
69- private string prepare_search_string (LensSearch? search)
70- {
71- var s = search.search_string;
72-
73- if (s.has_suffix (" "))
74- s = s.strip ();
75-
76- return s;
77- }
78-
79+ private bool is_empty_search (LensSearch search)
80+ {
81+ return search.search_string.strip () == "";
82+ }
83
84 private async void update_global_search (LensSearch search, Cancellable cancellable)
85 {
86 var results_model = scope.global_results_model;
87
88- if (search_is_invalid (search))
89- {
90- return;
91- }
92-
93- var search_string = prepare_search_string (search);
94-
95- update_results_model (results_model, search_string, null);
96+ // FIXME: no results for home screen of the dash?
97+ if (is_empty_search (search))
98+ {
99+ search.finished ();
100+ return;
101+ }
102+
103+ update_results_model (results_model, search.search_string, null);
104
105 search.finished ();
106 }
107@@ -262,10 +272,8 @@
108 {
109 var results_model = search.results_model;
110
111- var search_string = prepare_search_string (search);
112-
113- update_results_model (results_model, search_string, null);
114-
115+ update_results_model (results_model, search.search_string, null);
116+ message ("%u results", results_model.get_n_rows ());
117 search.finished ();
118 }
119
120@@ -276,26 +284,30 @@
121 string? search, Categories? category)
122 {
123 unowned Dee.ModelIter iter, end;
124- string search_string = null;
125- Categories group = Categories.MESSAGES;
126
127 var stream_ids = new Gee.ArrayList<string> ();
128 var filter = scope.get_filter("stream") as CheckOptionFilter;
129- foreach (Unity.FilterOption option in filter.options)
130+ if (filter.filtering)
131 {
132- if (option.active)
133+ foreach (Unity.FilterOption option in filter.options)
134 {
135- stream_ids.add (option.id);
136+ if (option.active)
137+ {
138+ stream_ids.add (option.id);
139+ }
140 }
141 }
142
143 var account_ids = new Gee.ArrayList<string> ();
144 filter = scope.get_filter("account_id") as CheckOptionFilter;
145- foreach (Unity.FilterOption option in filter.options)
146+ if (filter.filtering)
147 {
148- if (option.active)
149+ foreach (Unity.FilterOption option in filter.options)
150 {
151- account_ids.add (option.id);
152+ if (option.active)
153+ {
154+ account_ids.add (option.id);
155+ }
156 }
157 }
158
159@@ -306,83 +318,132 @@
160
161 _stream_iter_first = _model.get_first_iter ();
162 _stream_iter_last = end;
163-
164- if (search != null)
165- {
166- search_string = search.down();
167- }
168-
169+
170+ var term_list = Object.new (typeof (Dee.TermList)) as Dee.TermList;
171+ // search only the folded terms, FIXME: is that a good idea?
172+ _analyzer.tokenize (_ascii_filter.apply (search), term_list);
173+
174+ var matches = new Sequence<Dee.ModelIter> ();
175+ for (uint i = 0; i < term_list.num_terms (); i++)
176+ {
177+ // FIXME: use PREFIX search only for the last term?
178+ var result_set = _index.lookup (term_list.get_term (i),
179+ Dee.TermMatchFlag.PREFIX);
180+ bool first_pass = i == 0;
181+ CompareDataFunc<Dee.ModelIter> cmp_func = (a, b) =>
182+ {
183+ return a == b ? 0 : ((void*) a > (void*) b ? 1 : -1);
184+ };
185+ // intersect the results (cause we want to AND the terms)
186+ var remaining = new Sequence<Dee.ModelIter> ();
187+ foreach (var item in result_set)
188+ {
189+ if (first_pass)
190+ matches.insert_sorted (item, cmp_func);
191+ else if (matches.lookup (item, cmp_func) != null)
192+ remaining.insert_sorted (item, cmp_func);
193+ }
194+ if (!first_pass) matches = (owned) remaining;
195+ // final result set empty already?
196+ if (matches.get_begin_iter () == matches.get_end_iter ()) break;
197+ }
198+
199+ matches.sort ((a, b) =>
200+ {
201+ var col = StreamModelColumn.TIMESTAMP;
202+ return _model.get_string (b, col).collate (_model.get_string (a, col));
203+ });
204+
205+ var match_iter = matches.get_begin_iter ();
206+ var match_end_iter = matches.get_end_iter ();
207+ while (match_iter != match_end_iter)
208+ {
209+ iter = match_iter.get ();
210+
211+ if (matches_filters (_model, iter, stream_ids, account_ids))
212+ {
213+ add_result (_model, iter, results_model);
214+ }
215+
216+ match_iter = match_iter.next ();
217+ }
218+
219+ if (term_list.num_terms () > 0) return;
220+
221+ /* Go over the whole model if we had empty search */
222 while (iter != end)
223 {
224- bool stream_match = true;
225- bool account_match = true;
226- if (stream_ids != null)
227- {
228- if (stream_ids.size > 0)
229- {
230- if (!(_model.get_string(iter, StreamModelColumn.STREAM) in stream_ids))
231- {
232- stream_match = false;
233- }
234- }
235- }
236- if (account_ids.size > 0)
237- {
238- string[] _accounts_array = (string[])_model.get_value (iter, StreamModelColumn.ACCOUNTS);
239- foreach (var a in _accounts_array)
240- {
241- string _account = a.split(":")[0];
242- if (!(_account in account_ids))
243- account_match = false;
244- }
245- }
246- if (!(account_match && stream_match))
247- {
248- iter = _model.next (iter);
249- continue;
250- }
251-
252- if (search_string == null || search_string in _model.get_string(iter, StreamModelColumn.SENDER).down() || search_string in _model.get_string(iter, StreamModelColumn.MESSAGE).down())
253- {
254- if (_model.get_string(iter, StreamModelColumn.STREAM) == "messages")
255- group = Categories.MESSAGES;
256- else if (_model.get_string(iter, StreamModelColumn.STREAM) == "replies")
257- group = Categories.REPLIES;
258- else if (_model.get_string(iter, StreamModelColumn.STREAM) == "images")
259- group = Categories.IMAGES;
260- else if (_model.get_string(iter, StreamModelColumn.STREAM) == "videos")
261- group = Categories.VIDEOS;
262- else if (_model.get_string(iter, StreamModelColumn.STREAM) == "links")
263- group = Categories.LINKS;
264- else if (_model.get_string(iter, StreamModelColumn.STREAM) == "private")
265- group = Categories.PRIVATE;
266- else if (_model.get_string(iter, StreamModelColumn.STREAM) == "public")
267- group = Categories.PUBLIC;
268-
269- string _icon_uri = _model.get_string(iter, StreamModelColumn.ICON_URI);
270- var _avatar_cache_image = utils.avatar_path(_icon_uri);
271- if (_avatar_cache_image == null)
272- {
273- try
274- {
275- _avatar_cache_image = service.avatar_path (_icon_uri);
276- } catch (GLib.Error e)
277- {
278- }
279- if (_avatar_cache_image == null)
280- _avatar_cache_image = _icon_uri;
281- }
282-
283- results_model.append(_model.get_string(iter, StreamModelColumn.URL), _avatar_cache_image, group, "text/html", _model.get_string(iter, StreamModelColumn.SENDER), _model.get_string(iter, StreamModelColumn.MESSAGE));
284- }
285- if (iter == end)
286- iter = null;
287- else
288- iter = _model.next (iter);
289+ if (matches_filters (_model, iter, stream_ids, account_ids))
290+ {
291+ add_result (_model, iter, results_model);
292+ }
293+ iter = _model.next (iter);
294 }
295-
296+
297 //debug ("Results has %u rows", results_model.get_n_rows());
298 }
299
300+ private bool matches_filters (Dee.Model model, Dee.ModelIter iter,
301+ Gee.List<string> stream_ids,
302+ Gee.List<string> account_ids)
303+ {
304+ bool stream_match = true;
305+ bool account_match = true;
306+ if (stream_ids.size > 0)
307+ {
308+ stream_match = model.get_string (iter, StreamModelColumn.STREAM) in stream_ids;
309+ }
310+ if (account_ids.size > 0)
311+ {
312+ string[] _accounts_array = (string[])model.get_value (iter, StreamModelColumn.ACCOUNTS);
313+ foreach (var a in _accounts_array)
314+ {
315+ string _account = a.split(":")[0];
316+ if (!(_account in account_ids))
317+ account_match = false;
318+ }
319+ }
320+ return account_match && stream_match;
321+ }
322+
323+ private void add_result (Dee.Model model, Dee.ModelIter iter, Dee.Model results_model)
324+ {
325+ Categories group = Categories.MESSAGES;
326+
327+ unowned string stream_id =
328+ model.get_string (iter, StreamModelColumn.STREAM);
329+ switch (stream_id)
330+ {
331+ case "messages": group = Categories.MESSAGES; break;
332+ case "replies": group = Categories.REPLIES; break;
333+ case "images": group = Categories.IMAGES; break;
334+ case "videos": group = Categories.VIDEOS; break;
335+ case "links": group = Categories.LINKS; break;
336+ case "private": group = Categories.PRIVATE; break;
337+ case "public": group = Categories.PUBLIC; break;
338+ }
339+
340+ string _icon_uri = model.get_string (iter, StreamModelColumn.ICON_URI);
341+ var _avatar_cache_image = utils.avatar_path (_icon_uri);
342+ if (_avatar_cache_image == null)
343+ {
344+ try
345+ {
346+ _avatar_cache_image = service.avatar_path (_icon_uri);
347+ } catch (GLib.Error e)
348+ {
349+ }
350+ if (_avatar_cache_image == null)
351+ _avatar_cache_image = _icon_uri;
352+ }
353+
354+ results_model.append (model.get_string(iter, StreamModelColumn.URL),
355+ _avatar_cache_image,
356+ group,
357+ "text/html",
358+ _model.get_string(iter, StreamModelColumn.SENDER),
359+ _model.get_string(iter, StreamModelColumn.MESSAGE));
360+ }
361+
362 } /* End Daemon class */
363 } /* end Gwibber namespace */