Merge lp:~jelmer/bzr-search/hpss into lp:bzr-search

Proposed by Jelmer Vernooij
Status: Merged
Merged at revision: 89
Proposed branch: lp:~jelmer/bzr-search/hpss
Merge into: lp:bzr-search
Diff against target: 519 lines (+446/-3)
5 files modified
__init__.py (+20/-0)
index.py (+21/-3)
remote.py (+276/-0)
tests/__init__.py (+1/-0)
tests/test_remote.py (+128/-0)
To merge this branch: bzr merge lp:~jelmer/bzr-search/hpss
Reviewer Review Type Date Requested Status
Bazaar Developers Pending
Review via email: mp+83224@code.launchpad.net

Description of the change

Add HPSS calls to allow remote access to the bzr-search index, and matching client side calls.

 * Branch.open_index
 * Branch.init_index
 * Index.index_revisions
 * Index.indexed_revisions
 * Index.suggest
 * Index.search

(now updated to not have lp:~jelmer/bzr-search/lazy merged)

To post a comment you must log in.
Revision history for this message
Robert Collins (lifeless) wrote :

Shiny. +1

Revision history for this message
Andrew Bennetts (spiv) wrote :

Robert Collins wrote:
> Shiny. +1

I haven't had time to look at the code, but the description is very shiny.
Thanks for making extending HPSS via plugins a reality! :)

-Andrew.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file '__init__.py'
--- __init__.py 2008-08-28 02:13:47 +0000
+++ __init__.py 2011-11-23 21:50:26 +0000
@@ -38,6 +38,7 @@
38import commands38import commands
39import errors39import errors
40import index40import index
41from bzrlib.smart.request import request_handlers as smart_request_handlers
4142
4243
43for command in [44for command in [
@@ -79,6 +80,25 @@
79 index.make_log_search_filter)80 index.make_log_search_filter)
8081
8182
83smart_request_handlers.register_lazy(
84 "Branch.open_index", 'bzrlib.plugins.search.remote',
85 'SmartServerBranchRequestOpenIndex')
86smart_request_handlers.register_lazy(
87 "Branch.init_index", 'bzrlib.plugins.search.remote',
88 'SmartServerBranchRequestInitIndex')
89smart_request_handlers.register_lazy(
90 "Index.index_revisions", 'bzrlib.plugins.search.remote',
91 'SmartServerIndexRequestIndexRevisions')
92smart_request_handlers.register_lazy(
93 "Index.indexed_revisions", 'bzrlib.plugins.search.remote',
94 'SmartServerIndexRequestIndexedRevisions')
95smart_request_handlers.register_lazy(
96 "Index.suggest", 'bzrlib.plugins.search.remote',
97 'SmartServerIndexRequestSuggest')
98smart_request_handlers.register_lazy(
99 "Index.search", 'bzrlib.plugins.search.remote',
100 'SmartServerIndexRequestSearch')
101
82def test_suite():102def test_suite():
83 # Thunk across to load_tests for niceness with older bzr versions103 # Thunk across to load_tests for niceness with older bzr versions
84 from bzrlib.tests import TestLoader104 from bzrlib.tests import TestLoader
85105
=== modified file 'index.py'
--- index.py 2011-09-19 23:34:37 +0000
+++ index.py 2011-11-23 21:50:26 +0000
@@ -31,8 +31,8 @@
31 NotBranchError,31 NotBranchError,
32 NoSuchFile,32 NoSuchFile,
33 UnknownFormatError,33 UnknownFormatError,
34 IncompatibleAPI,34 UnknownSmartMethod,
35)35 )
36from bzrlib.index import CombinedGraphIndex, GraphIndex, InMemoryGraphIndex36from bzrlib.index import CombinedGraphIndex, GraphIndex, InMemoryGraphIndex
37from bzrlib.lockdir import LockDir37from bzrlib.lockdir import LockDir
38try:38try:
@@ -120,6 +120,14 @@
120 transport = transport.clone(path)120 transport = transport.clone(path)
121 transport.ensure_base()121 transport.ensure_base()
122 index_transport = transport122 index_transport = transport
123 elif getattr(branch.bzrdir, "_call", None) is not None:
124 # FIXME 2011-11-17 JRV: Is there a better way to probe
125 # for smart server branches ?
126 from bzrlib.plugins.search.remote import RemoteIndex
127 try:
128 return RemoteIndex.init(branch)
129 except UnknownSmartMethod:
130 raise errors.CannotIndex(branch)
123 else:131 else:
124 raise errors.CannotIndex(branch)132 raise errors.CannotIndex(branch)
125 lockdir = LockDir(index_transport, 'names-lock')133 lockdir = LockDir(index_transport, 'names-lock')
@@ -205,6 +213,16 @@
205 path = 'bzr-search/svn-lookaside/' + uuid + '/' + branch_path213 path = 'bzr-search/svn-lookaside/' + uuid + '/' + branch_path
206 transport = transport.clone(path)214 transport = transport.clone(path)
207 commits_only = False215 commits_only = False
216 elif getattr(branch.bzrdir, "_call", None) is not None:
217 # FIXME 2011-11-17 JRV: Is there a better way to probe
218 # for smart server branches ?
219 from bzrlib.plugins.search.remote import RemoteIndex
220 try:
221 return RemoteIndex.open(branch)
222 except UnknownSmartMethod:
223 # Fall back to traditional methods...
224 transport = branch.bzrdir.transport.clone('bzr-search')
225 commits_only = False
208 else:226 else:
209 transport = branch.bzrdir.transport.clone('bzr-search')227 transport = branch.bzrdir.transport.clone('bzr-search')
210 commits_only = False228 commits_only = False
@@ -391,7 +409,7 @@
391409
392 def _add_index(self, builder, to_remove=None, allow_pack=True):410 def _add_index(self, builder, to_remove=None, allow_pack=True):
393 """Add a new component index to the list of indices.411 """Add a new component index to the list of indices.
394 412
395 :param builder: A component builder supporting the upload_index call.413 :param builder: A component builder supporting the upload_index call.
396 :param to_remove: An optional iterable of components to remove.414 :param to_remove: An optional iterable of components to remove.
397 :param allow_pack: Whether an auto pack is permitted by this operation.415 :param allow_pack: Whether an auto pack is permitted by this operation.
398416
=== added file 'remote.py'
--- remote.py 1970-01-01 00:00:00 +0000
+++ remote.py 2011-11-23 21:50:26 +0000
@@ -0,0 +1,276 @@
1# search, a bzr plugin for searching within bzr branches/repositories.
2# Copyright (C) 2011 Jelmer Vernooij
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License version 2 as published
6# by the Free Software Foundation.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16#
17
18"""Smart server integration for bzr-search."""
19
20from bzrlib import remote
21from bzrlib.controldir import ControlDir
22from bzrlib.errors import (
23 ErrorFromSmartServer,
24 UnexpectedSmartServerResponse,
25 )
26from bzrlib.smart.branch import (
27 SmartServerBranchRequest,
28 )
29from bzrlib.smart.request import SuccessfulSmartServerResponse
30
31from bzrlib.plugins.search import errors, index
32
33
34def _encode_termlist(termlist):
35 return ["\0".join([k.encode('utf-8') for k in term]) for term in termlist]
36
37def _decode_termlist(termlist):
38 return [tuple([k.decode('utf-8') for k in term.split('\0')]) for term in termlist]
39
40
41class RemoteIndex(object):
42 """Index accessed over a smart server."""
43
44 def __init__(self, client, path, branch=None):
45 self._client = client
46 self._path = path
47 self._branch = branch
48
49 def _call(self, method, *args, **err_context):
50 try:
51 return self._client.call(method, *args)
52 except ErrorFromSmartServer, err:
53 self._translate_error(err, **err_context)
54
55 def _call_expecting_body(self, method, *args, **err_context):
56 try:
57 return self._client.call_expecting_body(method, *args)
58 except ErrorFromSmartServer, err:
59 self._translate_error(err, **err_context)
60
61 def _call_with_body_bytes(self, method, args, body_bytes, **err_context):
62 try:
63 return self._client.call_with_body_bytes(method, args, body_bytes)
64 except ErrorFromSmartServer, err:
65 self._translate_error(err, **err_context)
66
67 def _call_with_body_bytes_expecting_body(self, method, args, body_bytes,
68 **err_context):
69 try:
70 return self._client.call_with_body_bytes_expecting_body(
71 method, args, body_bytes)
72 except errors.ErrorFromSmartServer, err:
73 self._translate_error(err, **err_context)
74
75 def _translate_error(self, err, **context):
76 remote._translate_error(err, index=self, **context)
77
78 @classmethod
79 def open(cls, branch):
80 # This might raise UnknownSmartMethod,
81 # but the caller should handle that.
82 response = branch._call("Branch.open_index",
83 branch._remote_path())
84 if response == ('no', ):
85 raise errors.NoSearchIndex(branch.user_transport)
86 if response != ('yes', ):
87 raise UnexpectedSmartServerResponse(response)
88 return RemoteIndex(branch._client, branch._remote_path(), branch)
89
90 @classmethod
91 def init(cls, branch):
92 response = branch._call("Branch.init_index",
93 branch._remote_path())
94 if response != ('ok', ):
95 raise errors.UnexpectedSmartServerResponse(response)
96 return RemoteIndex(branch._client, branch._remote_path(), branch)
97
98 def index_branch(self, branch, tip_revision):
99 """Index revisions from a branch.
100
101 :param branch: The branch to index.
102 :param tip_revision: The tip of the branch.
103 """
104 self.index_revisions(branch, [tip_revision])
105
106 def index_revisions(self, branch, revisions_to_index):
107 """Index some revisions from branch.
108
109 :param branch: A branch to index.
110 :param revisions_to_index: A set of revision ids to index.
111 """
112 body = "\n".join(revisions_to_index)
113 response = self._call_with_body_bytes(
114 'Index.index_revisions', (self._path, branch._remote_path(),),
115 body)
116 if response != ('ok', ):
117 raise errors.UnexpectedSmartServerResponse(response)
118
119 def indexed_revisions(self):
120 """Return the revision_keys that this index contains terms for."""
121 response, handler = self._call_expecting_body(
122 'Index.indexed_revisions', self._path)
123 if response != ('ok', ):
124 raise errors.UnexpectedSmartServerResponse(response)
125 byte_stream = handler.read_streamed_body()
126 data = ""
127 for bytes in byte_stream:
128 data += bytes
129 lines = data.split("\n")
130 data = lines.pop()
131 for revid in lines:
132 yield (revid, )
133
134 def search(self, termlist):
135 """Trivial set-based search of the index.
136
137 :param termlist: A list of terms.
138 :return: An iterator of SearchResults for documents indexed by all
139 terms in the termlist.
140 """
141 index._ensure_regexes()
142 response, handler = self._call_expecting_body('Index.search',
143 self._path, _encode_termlist(termlist))
144 if response != ('ok', ):
145 raise errors.UnexpectedSmartServerResponse(response)
146 byte_stream = handler.read_streamed_body()
147 data = ""
148 ret = []
149 for bytes in byte_stream:
150 data += bytes
151 lines = data.split("\n")
152 data = lines.pop()
153 for l in lines:
154 if l[0] == 'r':
155 hit = index.RevisionHit(self._branch.repository, (l[1:], ))
156 elif l[0] == 't':
157 hit = index.FileTextHit(self, self._branch.repository,
158 tuple(l[1:].split("\0")), termlist)
159 elif l[0] == 'p':
160 hit = index.PathHit(l[1:])
161 else:
162 raise AssertionError("Unknown hit kind %r" % l[0])
163 # We can't yield, since the caller might try to look up results
164 # over the same medium.
165 ret.append(hit)
166 return iter(ret)
167
168 def suggest(self, termlist):
169 """Generate suggestions for extending a search.
170
171 :param termlist: A list of terms.
172 :return: An iterator of terms that start with the last search term in
173 termlist, and match the rest of the search.
174 """
175 response = self._call('Index.suggest',
176 self._path, _encode_termlist(termlist))
177 if response[0] != 'ok':
178 raise UnexpectedSmartServerResponse(response)
179 return [(suggestion.decode('utf-8'),) for suggestion in response[1]]
180
181
182class SmartServerBranchRequestOpenIndex(SmartServerBranchRequest):
183 """Open an index file."""
184
185 def do_with_branch(self, branch):
186 """open an index."""
187 try:
188 idx = index.open_index_branch(branch)
189 except errors.NoSearchIndex:
190 return SuccessfulSmartServerResponse(('no', ))
191 else:
192 return SuccessfulSmartServerResponse(('yes', ))
193
194
195class SmartServerBranchRequestInitIndex(SmartServerBranchRequest):
196 """Create an index."""
197
198 def do_with_branch(self, branch, format=None):
199 """Create an index."""
200 if format is None:
201 idx = index.init_index(branch)
202 else:
203 idx = index.init_index(branch, format)
204 return SuccessfulSmartServerResponse(('ok', ))
205
206
207class SmartServerIndexRequest(SmartServerBranchRequest):
208 """Base class for index requests."""
209
210 def do_with_branch(self, branch, *args):
211 idx = index.open_index_branch(branch)
212 return self.do_with_index(idx, *args)
213
214 def do_with_index(self, index, *args):
215 raise NotImplementedError(self.do_with_index)
216
217
218class SmartServerIndexRequestIndexRevisions(SmartServerIndexRequest):
219 """Index a set of revisions."""
220
221 def do_body(self, body_bytes):
222 revids = body_bytes.split("\n")
223 self._index.index_revisions(self._branch, revids)
224 return SuccessfulSmartServerResponse(('ok', ))
225
226 def do_with_index(self, index, branch_path):
227 self._index = index
228 transport = self.transport_from_client_path(branch_path)
229 controldir = ControlDir.open_from_transport(transport)
230 if controldir.get_branch_reference() is not None:
231 raise errors.NotBranchError(transport.base)
232 self._branch = controldir.open_branch(ignore_fallbacks=True)
233 # Indicate we want a body
234 return None
235
236
237class SmartServerIndexRequestIndexedRevisions(SmartServerIndexRequest):
238 """Retrieve the set of revisions in the index."""
239
240 def body_stream(self, index):
241 for revid in index.indexed_revisions():
242 yield "%s\n" % "\0".join(revid)
243
244 def do_with_index(self, index):
245 return SuccessfulSmartServerResponse(('ok', ),
246 body_stream=self.body_stream(index))
247
248
249class SmartServerIndexRequestSuggest(SmartServerIndexRequest):
250 """Suggest alternative terms."""
251
252 def do_with_index(self, index, termlist):
253 suggestions = index.suggest(_decode_termlist(termlist))
254 return SuccessfulSmartServerResponse(
255 ('ok',
256 [suggestion.encode('utf-8') for (suggestion,) in suggestions]))
257
258
259class SmartServerIndexRequestSearch(SmartServerIndexRequest):
260 """Search for terms."""
261
262 def body_stream(self, results):
263 for hit in results:
264 if isinstance(hit, index.FileTextHit):
265 yield "t%s\0%s\n" % hit.text_key
266 elif isinstance(hit, index.RevisionHit):
267 yield "r%s\n" % hit.revision_key[0]
268 elif isinstance(hit, index.PathHit):
269 yield "p%s\n" % hit.path_utf8
270 else:
271 raise AssertionError("Unknown hit type %r" % hit)
272
273 def do_with_index(self, index, termlist):
274 results = index.search(_decode_termlist(termlist))
275 return SuccessfulSmartServerResponse(
276 ('ok',), body_stream=self.body_stream(results))
0277
=== modified file 'tests/__init__.py'
--- tests/__init__.py 2008-06-14 05:07:54 +0000
+++ tests/__init__.py 2011-11-23 21:50:26 +0000
@@ -25,6 +25,7 @@
25 'errors',25 'errors',
26 'index',26 'index',
27 'inventory',27 'inventory',
28 'remote',
28 'transport',29 'transport',
29 ]30 ]
30 standard_tests.addTests(loader.loadTestsFromModuleNames(31 standard_tests.addTests(loader.loadTestsFromModuleNames(
3132
=== added file 'tests/test_remote.py'
--- tests/test_remote.py 1970-01-01 00:00:00 +0000
+++ tests/test_remote.py 2011-11-23 21:50:26 +0000
@@ -0,0 +1,128 @@
1# search, a bzr plugin for searching within bzr branches/repositories.
2# Copyright (C) 2011 Jelmer Vernooij
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License version 2 as published
6# by the Free Software Foundation.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16#
17
18"""Tests for the smart server verbs."""
19
20from bzrlib import tests
21from bzrlib.branch import Branch
22from bzrlib.smart import (
23 request as smart_req,
24 )
25
26from bzrlib.plugins.search import (
27 errors,
28 index,
29 )
30from bzrlib.plugins.search.remote import (
31 RemoteIndex,
32 SmartServerBranchRequestOpenIndex,
33 )
34
35
36class TestSmartServerBranchRequestOpenIndex(
37 tests.TestCaseWithMemoryTransport):
38
39 def test_missing(self):
40 """For an empty branch, the result is ('no', )."""
41 backing = self.get_transport()
42 request = SmartServerBranchRequestOpenIndex(backing)
43 self.make_branch('.')
44 self.assertEqual(smart_req.SmartServerResponse(('no', )),
45 request.execute(''))
46
47 def test_present(self):
48 """For a branch with an index, ('yes', ) is returned."""
49 backing = self.get_transport()
50 request = SmartServerBranchRequestOpenIndex(backing)
51 b = self.make_branch('.')
52 index.init_index(b)
53 self.assertEqual(smart_req.SmartServerResponse(('yes', )),
54 request.execute(''))
55
56
57class TestRemoteIndex(tests.TestCaseWithTransport):
58
59 def test_no_index(self):
60 local_branch = self.make_branch('.')
61 remote_transport = self.make_smart_server('.')
62 remote_branch = Branch.open_from_transport(remote_transport)
63 self.assertRaises(errors.NoSearchIndex, RemoteIndex.open,
64 remote_branch)
65
66 def test_open(self):
67 local_branch = self.make_branch('.')
68 index.init_index(local_branch)
69 remote_transport = self.make_smart_server('.')
70 remote_branch = Branch.open_from_transport(remote_transport)
71 idx = RemoteIndex.open(remote_branch)
72 self.assertIsInstance(idx, RemoteIndex)
73
74 def test_init(self):
75 local_branch = self.make_branch('.')
76 remote_transport = self.make_smart_server('.')
77 remote_branch = Branch.open_from_transport(remote_transport)
78 idx = index.init_index(remote_branch)
79 self.assertIsInstance(idx, RemoteIndex)
80
81 def test_init_exists(self):
82 local_branch = self.make_branch('.')
83 index.init_index(local_branch)
84 remote_transport = self.make_smart_server('.')
85 remote_branch = Branch.open_from_transport(remote_transport)
86 #self.assertRaises( index.init_index, remote_branch)
87
88
89class TestWithRemoteIndex(tests.TestCaseWithTransport):
90
91 def make_remote_index(self):
92 tree = self.make_branch_and_tree('.')
93 local_branch = tree.branch
94 index.init_index(local_branch)
95 remote_transport = self.make_smart_server('.')
96 remote_branch = Branch.open_from_transport(remote_transport)
97 return tree, remote_branch, RemoteIndex.open(remote_branch)
98
99 def test_index_revisions(self):
100 tree, branch, index = self.make_remote_index()
101 tree.commit(message="message", rev_id='revid1')
102 index.index_revisions(branch, ['revid1'])
103 self.assertEquals([('revid1',)], list(index.indexed_revisions()))
104
105 def test_indexed_revisions(self):
106 tree, branch, remote_index = self.make_remote_index()
107 tree.commit(message="message", rev_id='revid1')
108 self.assertEquals([], list(remote_index.indexed_revisions()))
109 local_index = index.open_index_branch(tree.branch)
110 local_index.index_revisions(tree.branch, ['revid1'])
111 self.assertEquals([('revid1',)], list(remote_index.indexed_revisions()))
112
113 def test_suggest(self):
114 tree, branch, remote_index = self.make_remote_index()
115 tree.commit(message="first", rev_id='revid1')
116 local_index = index.open_index_branch(tree.branch)
117 local_index.index_revisions(tree.branch, ['revid1'])
118 self.assertEquals([(u'first',)], list(remote_index.suggest([(u'f',)])))
119
120 def test_search(self):
121 tree, branch, remote_index = self.make_remote_index()
122 # The double-space is a cheap smoke test for the tokeniser.
123 revid = tree.commit('first post')
124 remote_index.index_revisions(branch, [revid])
125 results = list(remote_index.search([('post',)]))
126 self.assertEqual(1, len(results))
127 self.assertIsInstance(results[0], index.RevisionHit)
128 self.assertEqual((revid,), results[0].revision_key)

Subscribers

People subscribed via source and target branches

to all changes: