Web · Wiki · Activities · Blog · Lists · Chat · Meeting · Bugs · Git · Translate · Archive · People · Donate

Commit 5dc851866520362f31fb498fbe0d03166f68dae4

Reindex entry on new commit (metadata update)

When re-indexing entries, take the git commit id into account so that
we notice entries we already know about but where the metadata has
changed.

The first re-index run after an update will need to process all
entries in order to add the commit_id value to the index. This may
take a long time. This change is backwards compatible.
  
542542 old_metadata = self._index.retrieve(object_id)['metadata']
543543 metadata['creation_time'] = old_metadata['creation_time']
544544
545 self._update_metadata_in_git(object_id, metadata)
546 self._index.store(object_id, metadata)
545 commit_id = self._update_metadata_in_git(object_id, metadata)
546 self._index.store(object_id, metadata, commit_id)
547547 self._invoke_callbacks('change_metadata', object_id, metadata)
548548
549549 def delete(self, object_id):
653653
654654 # TODO: check metadata for validity first (index?)
655655 self._log_store((tree_id, child_id))
656 self._store_entry_in_git(tree_id, child_id, parent_id, path, metadata)
657 self._index.store((tree_id, child_id), metadata)
656 commit_id = self._store_entry_in_git(tree_id, child_id, parent_id,
657 path, metadata)
658 self._index.store((tree_id, child_id), metadata, commit_id)
658659 self._invoke_callbacks('save', tree_id, child_id, parent_id, metadata)
659660
660661 if delete_after and path:
734734 Log the last object after finishing the rebuild.
735735 """
736736 last_object_id = None
737 for object_id in self._get_object_ids_from_git():
737 for object_id, commit_id in self._get_object_ids_from_git():
738738 last_object_id = object_id
739 logging.debug('reindex(): checking entry %r', object_id)
740 if self._index.contains(object_id):
739 logging.debug('reindex(): checking entry %r (commit %r)',
740 object_id, commit_id)
741 if self._index.contains(object_id, commit_id):
741742 continue
742743
743 logging.debug('reindex(): adding entry %r from git', object_id)
744 logging.debug('reindex(): (re-)adding entry %r from git',
745 object_id)
744746 metadata = self._get_metadata_from_git(object_id)
745 self._index.store(object_id, metadata)
747 self._index.store(object_id, metadata, commit_id)
746748
747749 if last_object_id:
748750 self._log_store(last_object_id)
804804 input=commit_message).strip()
805805 self._git_call('update-ref', [_format_ref(tree_id, version_id),
806806 commit_hash])
807 return commit_hash
807808
808809 def _write_tree(self, path):
809810 if not path:
827827 commit_hash = self._git_call('commit-tree', ['-p', ref, tree_hash],
828828 input=commit_message).strip()
829829 self._git_call('update-ref', [ref, commit_hash])
830 return commit_hash
830831
831832 def _get_tree_hash(self, object_id):
832833 args = ['commit', _format_ref(*object_id)]
835835 return self._git_call('cat-file', args).split('\n', 1)[0].split(' ')[1]
836836
837837 def _get_object_ids_from_git(self):
838 args = ['--sort=committerdate', '--format=%(refname)',
838 args = ['--sort=committerdate', '--format=%(refname) %(objectname)',
839839 'refs/gdatastore/*/*']
840 return [tuple(line.rsplit('/', 2)[1:])
841 for line in self._git_call('for-each-ref', args).split()]
840 lines = self._git_call('for-each-ref', args).strip().split('\n')
841 ref_commits = [line.split(' ') for line in lines]
842 return [(tuple(ref.rsplit('/', 2)[1:]), commit_id)
843 for ref, commit_id in ref_commits]
842844
843845 def _get_metadata_from_git(self, object_id):
844846 args = ['commit', _format_ref(*object_id)]
  
4242_VALUE_MTIME = 2
4343_VALUE_SIZE = 3
4444_VALUE_CTIME = 4
45_VALUE_COMMIT_ID = 5
4546_STANDARD_VALUES = {
4647 'creation_time': {'number': _VALUE_CTIME, 'type': float},
4748 'filesize': {'number': _VALUE_SIZE, 'type': int},
241241 self._database.close()
242242 self._database = None
243243
244 def contains(self, object_id):
244 def contains(self, object_id, commit_id=None):
245245 postings = self._database.postlist(_object_id_term(object_id))
246246 try:
247 _ = postings.next()
247 doc_id = postings.next().docid
248248 except StopIteration:
249249 return False
250 return True
250 if not commit_id:
251 return True
252 document = self._database.get_document(doc_id)
253 return document.get_value(_VALUE_COMMIT_ID) == commit_id
251254
252255 def delete(self, object_id):
253256 writable_db = self._get_writable_db()
335335 # global_doc_id = (local_doc_id - 1) * num_databases + db_index + 1
336336 ds_index = (doc_id - 1) % len(self._data_stores)
337337 return {'metadata': deserialise_metadata(document.get_data()),
338 'data_store': self._data_stores[ds_index]}
338 'data_store': self._data_stores[ds_index],
339 'commit_id': document.get_value(_VALUE_COMMIT_ID)}
339340
340 def store(self, object_id, properties):
341 def store(self, object_id, properties, commit_id):
341342 logging.debug('store(%r, %r)', object_id, properties)
342343 assert (properties['tree_id'], properties['version_id']) == object_id
343344 id_term = _object_id_term(object_id)
349349 term_generator = TermGenerator()
350350 term_generator.index_document(document, properties)
351351 assert (document.get_value(_VALUE_TREE_ID), document.get_value(_VALUE_VERSION_ID)) == object_id
352 document.add_value(_VALUE_COMMIT_ID, commit_id)
352353 writable_db = self._get_writable_db()
353354 writable_db.replace_document(id_term, document)
354355 writable_db.commit()