Merge lp:~thekorn/zeitgeist/flexible.engine.backend into lp:zeitgeist/0.1

Proposed by Markus Korn
Status: Merged
Approved by: Siegfried Gevatter
Approved revision: 1052
Merged at revision: not available
Proposed branch: lp:~thekorn/zeitgeist/flexible.engine.backend
Merge into: lp:zeitgeist/0.1
Diff against target: None lines
To merge this branch: bzr merge lp:~thekorn/zeitgeist/flexible.engine.backend
Reviewer Review Type Date Requested Status
Mikkel Kamstrup Erlandsen Approve
Siegfried Gevatter Approve
Seif Lotfy Pending
Zeitgeist Framework Team Pending
Review via email: mp+9499@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Markus Korn (thekorn) wrote :

This branch implements the flexible engine framework for zeitgeist, as discussed on IRC recently.
The most important changes are:
  * get_default_engine() is now in _zeitgeist.engine instead of _zeitgeist.engine.engine
  * get_default_engine() either returns the running engine instance, or creates a new one. To get the type of the new engine it looks in the ZEITGEIST_ENGINE environment variable, if not set it uses a hardcoded default, which is storm for now
  * new engine implementations can be added by creating _zeitgeist/engine/SOMENAME_engine.py, where SOMENAME is the type of the engine (like sqlite3, sqlite3_querymancer, couchdb etc.)
  * all engine tests are running using the strom engine implementation

Revision history for this message
Siegfried Gevatter (rainct) wrote :

Just had a quick look at the diff, but looks good.

review: Approve
Revision history for this message
Mikkel Kamstrup Erlandsen (kamstrup) wrote :

Review: Approve
Looks good, but I didn't run it to double check

> --
> https://code.launchpad.net/~thekorn/zeitgeist/flexible.engine.backend/+merge/9499
> You are requested to review the proposed merge of lp:~thekorn/zeitgeist/flexible.engine.backend into lp:zeitgeist.
>

--
Cheers,
Mikkel

Revision history for this message
Mikkel Kamstrup Erlandsen (kamstrup) wrote :

Looks good, but I didn't do a test run of it

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file '_zeitgeist/engine/__init__.py'
--- _zeitgeist/engine/__init__.py 2008-11-19 18:43:27 +0000
+++ _zeitgeist/engine/__init__.py 2009-07-31 09:33:22 +0000
@@ -0,0 +1,49 @@
1import os
2import logging
3
4ENGINE_FALLBACK = "storm"
5
6_engine = None
7def create_engine(engine_type=None):
8 """ Creates an engine instance of the type defined by 'engine_type'.
9 If 'engine_type' is None 'ENGINE_FALLBACK' is used.
10 This function looks at _zeitgeist.engine to find the engine implementation.
11 Each engine implementation has to follow the following conventions:
12 1.) it has to be in _zeitgeist/engine/SOMENAME_engine.py
13 (where SOMENAME defines the type)
14 2.) the name of the class has to be ZeitgeistEngine and the class
15 itself has to be a sublass of _zeitgeist.engine.engine_base.BaseEngine
16 """
17 global _engine
18 if engine_type is None:
19 engine_type = ENGINE_FALLBACK
20 engine_type = engine_type.lower()
21 if _engine is not None:
22 running_type = engine.__module__.split(".").pop().lower()
23 if not running_type == engine_type:
24 raise RuntimeError(
25 ("There is already a zeitgeist engine running. But this "
26 "engine has another than the requested type "
27 "(requested='%s', running='%s')" %(engine_type, running_type))
28 )
29 return _engine
30 try:
31 engine_cls = __import__(
32 "_zeitgeist.engine.%s_engine" %engine_type,
33 globals(), locals(), ["ZeitgeistEngine",], -1
34 )
35 except ImportError, err:
36 logging.exception("Could not load engine implementation for %r" %engine_type)
37 raise RuntimeError("Could not load engine implementation for %r" %engine_type)
38 _engine = engine_cls.ZeitgeistEngine()
39 return _engine
40
41def get_default_engine():
42 """ Get the running engine instance or create a new one.
43 To get the type of the new engine instance it looks at the 'ZEITGEIST_ENGINE'
44 environment variable. If this is not defined, it uses the engine type
45 defined by ENGINE_FALLBACK.
46 """
47 if _engine is not None:
48 return _engine
49 return create_engine(engine_type=os.environ.get("ZEITGEIST_ENGINE", ENGINE_FALLBACK))
050
=== added file '_zeitgeist/engine/engine_base.py'
--- _zeitgeist/engine/engine_base.py 1970-01-01 00:00:00 +0000
+++ _zeitgeist/engine/engine_base.py 2009-07-31 06:35:29 +0000
@@ -0,0 +1,144 @@
1import time
2import logging
3import gobject
4import sys
5
6from functools import wraps
7
8from _zeitgeist.lrucache import LRUCache
9
10logging.basicConfig(level=logging.DEBUG)
11log = logging.getLogger("zeitgeist.engine.engine_base")
12
13def time_insert(function):
14 @wraps(function)
15 def wrapper(*args, **kwargs):
16 t1 = time.time()
17 result = function(*args, **kwargs)
18 log.debug("Inserted %s items in %.5f s." % (len(result), time.time() - t1))
19 return result
20 return wrapper
21
22class BaseEngine(gobject.GObject):
23
24 ALLOWED_FILTER_KEYS = set(["name", "uri", "tags", "mimetypes",
25 "source", "content", "application", "bookmarked"])
26
27 def __init__(self):
28
29 gobject.GObject.__init__(self)
30
31 self._apps = set()
32 self._last_time_from_app = {}
33 self._applications = LRUCache(100)
34
35 def insert_event(self, ritem, commit=True, force=False):
36 """
37 Inserts an item into the database. Returns a positive number on success,
38 zero otherwise (for example, if the item already is in the
39 database). In case the positive number is 1, the inserted event is new,
40 in case it's 2 the event already existed and was updated (this only
41 happens when `force' is True).
42 """
43
44 # check for required items and make sure all items have the correct type
45 EventDict.check_missing_items(ritem, True)
46
47 # FIXME: uri, content, source are now required items, the statement above
48 # will raise a KeyError if they are not there. What about mimetype?
49 # and why are we printing a warning and returning False here instead of raising
50 # an error at all? - Markus Korn
51 if not ritem["uri"].strip():
52 raise ValueError("Discarding item without a URI: %s" % ritem)
53 if not ritem["content"].strip():
54 raise ValueError("Discarding item without a Content type: %s" % ritem)
55 if not ritem["source"].strip():
56 raise ValueError("Discarding item without a Source type: %s" % ritem)
57 if not ritem["mimetype"].strip():
58 raise ValueError("Discarding item without a mimetype: %s" % ritem)
59 return 0
60
61 @time_insert
62 def insert_events(self, items):
63 """
64 Inserts items into the database and returns those items which were
65 successfully inserted. If an item fails, that's usually because it
66 already was in the database.
67 """
68
69 inserted_items = []
70 for item in items:
71 # This is always 0 or 1, no need to consider 2 as we don't
72 # use the `force' option.
73 if self.insert_event(item, commit=False):
74 inserted_items.append(item)
75 return inserted_items
76
77 def get_item(self, uri):
78 """ Returns basic information about the indicated URI. As we are
79 fetching an item, and not an event, `timestamp' is 0 and `use'
80 and `app' are empty strings."""
81
82 raise NotImplementedError
83
84 def find_events(self, min=0, max=sys.maxint, limit=0,
85 sorting_asc=True, mode="event", filters=(), return_mode=0):
86 """
87 Returns all items from the database between the indicated
88 timestamps `min' and `max'. Optionally the argument `tags'
89 may be used to filter on tags or `mimetypes' to filter on
90 mimetypes.
91
92 Parameter `mode' can be one of "event", "item" or "mostused".
93 The first mode returns all events, the second one only returns
94 the last event when items are repeated and the "mostused" mode
95 is like "item" but returns the results sorted by the number of
96 events.
97
98 Parameter `filters' is an array of structs containing: (text
99 to search in the name, text to search in the URI, tags,
100 mimetypes, source, content). The filter between characteristics
101 inside the same struct is of type AND (all need to match), but
102 between diferent structs it is OR-like (only the conditions
103 described in one of the structs need to match for the item to
104 be returned).
105
106 Possible values for return_mode, which is an internal variable
107 not exposed in the API:
108 - 0: Return the events/items.
109 - 1: Return the amount of events/items which would be returned.
110 - 2: Return only the applications for the matching events.
111 """
112 raise NotImplementedError
113
114 def update_items(self, items):
115 raise NotImplementedError
116
117 def delete_items(self, items):
118 raise NotImplementedError
119
120 def get_types(self):
121 """
122 Returns a list of all different types in the database.
123 """
124 raise NotImplementedError
125
126 def get_tags(self, min_timestamp=0, max_timestamp=0, limit=0, name_filter=""):
127 """
128 Returns a list containing tuples with the name and the number of
129 occurencies of the tags matching `name_filter', or all existing
130 tags in case it's empty, sorted from most used to least used. `limit'
131 can base used to limit the amount of results.
132
133 Use `min_timestamp' and `max_timestamp' to limit the time frames you
134 want to consider.
135 """
136 raise NotImplementedError
137
138 def get_last_insertion_date(self, application):
139 """
140 Returns the timestamp of the last item which was inserted
141 related to the given application. If there is no such record,
142 0 is returned.
143 """
144 raise NotImplementedError
0145
=== modified file '_zeitgeist/engine/remote.py'
--- _zeitgeist/engine/remote.py 2009-07-20 17:10:41 +0000
+++ _zeitgeist/engine/remote.py 2009-07-23 17:37:06 +0000
@@ -21,7 +21,7 @@
21import dbus.service21import dbus.service
22import logging22import logging
2323
24from _zeitgeist.engine.engine import get_default_engine24from _zeitgeist.engine import get_default_engine
25from zeitgeist.dbusutils import DBusInterface25from zeitgeist.dbusutils import DBusInterface
26from _zeitgeist.singleton import SingletonApplication26from _zeitgeist.singleton import SingletonApplication
2727
2828
=== renamed file '_zeitgeist/engine/base.py' => '_zeitgeist/engine/storm_base.py'
=== renamed file '_zeitgeist/engine/engine.py' => '_zeitgeist/engine/storm_engine.py'
--- _zeitgeist/engine/engine.py 2009-07-29 12:16:43 +0000
+++ _zeitgeist/engine/storm_engine.py 2009-07-31 06:35:29 +0000
@@ -25,7 +25,6 @@
25import sys25import sys
26import os26import os
27import gettext27import gettext
28import gobject
29import logging28import logging
30from xdg import BaseDirectory29from xdg import BaseDirectory
31from xdg.DesktopEntry import DesktopEntry30from xdg.DesktopEntry import DesktopEntry
@@ -34,34 +33,22 @@
34except ImportError:33except ImportError:
35 import sqlite334 import sqlite3
3635
37from _zeitgeist.engine.base import *36from _zeitgeist.engine.storm_base import *
38from _zeitgeist.lrucache import LRUCache37from _zeitgeist.engine.engine_base import BaseEngine
39from zeitgeist.dbusutils import EventDict38from zeitgeist.dbusutils import EventDict
4039
41logging.basicConfig(level=logging.DEBUG)40logging.basicConfig(level=logging.DEBUG)
42log = logging.getLogger("zeitgeist.engine")41log = logging.getLogger("zeitgeist.engine")
4342
44class ZeitgeistEngine(gobject.GObject):43class ZeitgeistEngine(BaseEngine):
45 44
46 ALLOWED_FILTER_KEYS = set(["name", "uri", "tags", "mimetypes",45 def __init__(self, store=None):
47 "source", "content", "application", "bookmarked"])46 super(ZeitgeistEngine, self).__init__()
48 47 if store is not None:
49 def __init__(self, storm_store):48 self.store = store
50 49 else:
51 gobject.GObject.__init__(self)50 self.store = get_default_store()
52 51 assert self.store is not None
53 assert storm_store is not None
54 self.store = storm_store
55 self._apps = set()
56 self._last_time_from_app = {}
57 self._applications = LRUCache(100)
58
59 '''
60 path = BaseDirectory.save_data_path("zeitgeist")
61 database = os.path.join(path, "zeitgeist.sqlite")
62 self.connection = self._get_database(database)
63 self.cursor = self.connection.cursor()
64 '''
65 52
66 def _get_ids(self, uri, content, source): 53 def _get_ids(self, uri, content, source):
67 uri_id = URI.lookup_or_create(uri).id if uri else None54 uri_id = URI.lookup_or_create(uri).id if uri else None
@@ -203,21 +190,9 @@
203 successfully inserted. If an item fails, that's usually because it190 successfully inserted. If an item fails, that's usually because it
204 already was in the database.191 already was in the database.
205 """192 """
206 193 result = super(ZeitgeistEngine, self).insert_events(items)
207 inserted_items = []194 self.store.commit()
208 195 return result
209 time1 = time.time()
210 for item in items:
211 # This is always 0 or 1, no need to consider 2 as we don't
212 # use the `force' option.
213 if self.insert_event(item, commit=False):
214 inserted_items.append(item)
215 self.store.commit()
216 time2 = time.time()
217 log.debug("Inserted %s items in %.5f s." % (len(inserted_items),
218 time2 - time1))
219
220 return inserted_items
221 196
222 def get_item(self, uri):197 def get_item(self, uri):
223 """ Returns basic information about the indicated URI. As we are198 """ Returns basic information about the indicated URI. As we are
@@ -499,10 +474,3 @@
499 ORDER BY start DESC LIMIT 1474 ORDER BY start DESC LIMIT 1
500 """, (application,)).get_one()475 """, (application,)).get_one()
501 return query[0] if query else 0476 return query[0] if query else 0
502
503_engine = None
504def get_default_engine():
505 global _engine
506 if not _engine:
507 _engine = ZeitgeistEngine(get_default_store())
508 return _engine
509477
=== modified file 'test/benchmarks.py'
--- test/benchmarks.py 2009-07-13 07:43:29 +0000
+++ test/benchmarks.py 2009-07-23 16:58:55 +0000
@@ -5,10 +5,10 @@
5import os5import os
6sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))6sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
77
8from _zeitgeist.engine.base import create_store, set_store8from _zeitgeist.engine.storm_base import create_store, set_store
9from _zeitgeist.engine import base9from _zeitgeist.engine import storm_base as base
10from zeitgeist.datamodel import *10from zeitgeist.datamodel import *
11from _zeitgeist.engine.engine import ZeitgeistEngine11from _zeitgeist.engine.storm_engine import ZeitgeistEngine
1212
13from time import time13from time import time
14import unittest14import unittest
1515
=== modified file 'test/engine-engine-test.py'
--- test/engine-engine-test.py 2009-07-15 21:36:41 +0000
+++ test/engine-engine-test.py 2009-07-23 16:58:55 +0000
@@ -5,10 +5,10 @@
5import os5import os
6sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))6sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
77
8from _zeitgeist.engine.base import create_store, set_store8from _zeitgeist.engine.storm_base import create_store, set_store
9from _zeitgeist.engine import base9from _zeitgeist.engine import storm_base as base
10from zeitgeist.datamodel import *10from zeitgeist.datamodel import *
11from _zeitgeist.engine.engine import ZeitgeistEngine11from _zeitgeist.engine.storm_engine import ZeitgeistEngine
1212
13import unittest13import unittest
14import tempfile14import tempfile
1515
=== modified file 'zeitgeist-daemon'
--- zeitgeist-daemon 2009-07-20 17:10:41 +0000
+++ zeitgeist-daemon 2009-07-23 17:37:06 +0000
@@ -33,11 +33,8 @@
33gettext.install("zeitgeist", _config.localedir, unicode=1)33gettext.install("zeitgeist", _config.localedir, unicode=1)
34logging.basicConfig(level=logging.DEBUG)34logging.basicConfig(level=logging.DEBUG)
3535
36from _zeitgeist.engine.engine import get_default_engine
37from _zeitgeist.engine.remote import RemoteInterface36from _zeitgeist.engine.remote import RemoteInterface
3837
39engine = get_default_engine()
40
41dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)38dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
42mainloop = gobject.MainLoop()39mainloop = gobject.MainLoop()
4340

Subscribers

People subscribed via source and target branches