Marre de se faire chier avec des lubies de geeks…

[Plinn.git] / catalog.py
diff --git a/catalog.py b/catalog.py

index fbe323f..69bb701 100644 (file)
--- a/catalog.py
+++ b/catalog.py
@@ -5,101 +5,350 @@ from Products.CMFCore.interfaces import IIndexableObject
  from Products.CMFCore.CatalogTool import CatalogTool as BaseCatalogTool
  from Products.CMFCore.CatalogTool import IndexableObjectWrapper
  from Products.PageTemplates.PageTemplateFile import PageTemplateFile
  from Products.CMFCore.CatalogTool import CatalogTool as BaseCatalogTool
  from Products.CMFCore.CatalogTool import IndexableObjectWrapper
  from Products.PageTemplates.PageTemplateFile import PageTemplateFile
-from Products.CMFCore.permissions import ModifyPortalContent
+from Products.CMFCore.permissions import ModifyPortalContent, ManagePortal
  from zope.component import queryMultiAdapter
  from Products.ZCatalog.Catalog import Catalog
  import transaction
  from solr import *
  
  from zope.component import queryMultiAdapter
  from Products.ZCatalog.Catalog import Catalog
  import transaction
  from solr import *
  
+# imports for Catalog class
+from Products.PluginIndexes.interfaces import ILimitedResultIndex
+from Products.ZCatalog.Lazy import LazyMap, LazyCat, LazyValues
+from BTrees.IIBTree import intersection, IISet
+from BTrees.IIBTree import weightedIntersection
+import warnings
+
+_VOLATILE_SOLR_NAME = '_v_solrConnection'
+
  class SolrTransactionHook :
      ''' commit solr couplé sur le commit de la ZODB '''
  class SolrTransactionHook :
      ''' commit solr couplé sur le commit de la ZODB '''
-    def __init__(self, connection) :
-        self.connection = connection
+    def __init__(self, context, con) :
+        self.context = context
+        self.con = con
      
      def __call__(self, status) :
          if status :
      
      def __call__(self, status) :
          if status :
-            self.connection.commit()
-            self.connection.close()
+            self.con.commit()
+            self.con.close()
          else :
          else :
-            self.connection.close()
+            self.con.close()
+        try :
+            delattr(self.context, _VOLATILE_SOLR_NAME)
+        except AttributeError :
+            pass
  
  class CatalogTool(BaseCatalogTool) :
  
  class CatalogTool(BaseCatalogTool) :
-    meta_type = 'Legivoc Catalog'
+    meta_type = 'Plinn Catalog'
      security = ClassSecurityInfo()
      manage_options = (BaseCatalogTool.manage_options[:5] +
                        ({'label' : 'Solr', 'action' : 'manage_solr'},) +
                        BaseCatalogTool.manage_options[5:])
      security = ClassSecurityInfo()
      manage_options = (BaseCatalogTool.manage_options[:5] +
                        ({'label' : 'Solr', 'action' : 'manage_solr'},) +
                        BaseCatalogTool.manage_options[5:])
-    manage_solr = PageTemplateFile('www/manage_solr', globals())
+    manage_solr = PageTemplateFile('www/manage_solr.pt', globals(), __name__='manage_solr')
+    
      
      
      def __init__(self, idxs=[]) :
          super(CatalogTool, self).__init__()
      
      
      def __init__(self, idxs=[]) :
          super(CatalogTool, self).__init__()
-        self._catalog = DelegatedCatalog()
+        self._catalog = DelegatedCatalog(self)
          self.solr_url = 'http://localhost:8983/solr'
          self.delegatedIndexes = ('Title', 'Description', 'SearchableText')
      
          self.solr_url = 'http://localhost:8983/solr'
          self.delegatedIndexes = ('Title', 'Description', 'SearchableText')
      
+    security.declarePublic('getDelegatedIndexes')
+    def getDelegatedIndexes(self) :
+        """ read the method name """
+        return self.delegatedIndexes
+    
+    security.declareProtected(ManagePortal, 'setDelegatedIndexes')
+    def setDelegatedIndexes(self, indexes, REQUEST=None) :
+        """setDelegatedIndexes documentation"""
+        self.delegatedIndexes = tuple([i.strip() for i in indexes if i.strip()])
+        if REQUEST :
+            REQUEST.RESPONSE.redirect(self.absolute_url() + '/manage_solr?manage_tabs_message=Saved changes.')
+    
+    def _getSolrConnection(self) :
+        if not hasattr(self, _VOLATILE_SOLR_NAME) :
+            con = SolrConnection(self.solr_url)
+            setattr(self, _VOLATILE_SOLR_NAME, con)
+            txn = transaction.get()
+            txn.addAfterCommitHook(SolrTransactionHook(self, con))
+        return getattr(self, _VOLATILE_SOLR_NAME)
+    
      security.declarePrivate('solrAdd')
      security.declarePrivate('solrAdd')
-    def solrAdd(self, object, idxs=[], uid=None) :
-        if IIndexableObject.providedBy(object):
-            w = object
-        else:
-            w = queryMultiAdapter( (object, self), IIndexableObject )
-            if w is None:
-                # BBB
-                w = IndexableObjectWrapper(object, self)
-        
-        uid = uid if uid else self.__url(object)
-        idxs = idxs if idxs !=[] else self.delegatedIndexes
+    def solrAdd(self, w, uid, idxs) :
+        idxs = idxs if idxs else self.delegatedIndexes
+        # Filter out delegated indexes
+        idxs = [i for i in idxs if i in self.delegatedIndexes]
          data = {'id' : uid}
          for name in idxs :
              attr = getattr(w, name, '')
              data[name] = attr() if callable(attr) else attr
          data = {'id' : uid}
          for name in idxs :
              attr = getattr(w, name, '')
              data[name] = attr() if callable(attr) else attr
-        c = SolrConnection(self.solr_url)
+        c = self._getSolrConnection()
          c.add(**data)
          c.add(**data)
-        txn = transaction.get()
-        txn.addAfterCommitHook(SolrTransactionHook(c))
-    
      
      # PortalCatalog api overloads
      
      # PortalCatalog api overloads
-    security.declareProtected(ModifyPortalContent, 'indexObject')
-    def indexObject(self, object) :
-        """ Add to catalog and send to Solr """
-        super(CatalogTool, self).indexObject(object)
-        self.solrAdd(object)
-
-    security.declarePrivate('reindexObject')
-    def reindexObject(self, object, idxs=[], update_metadata=1, uid=None):
-        super(CatalogTool, self).reindexObject(object,
-                                               idxs=idxs,
-                                               update_metadata=update_metadata,
-                                               uid=uid)
-        if idxs != []:
+    def catalog_object(self, obj, uid=None, idxs=None, update_metadata=1,
+                       pghandler=None):
+        # Wraps the object with workflow and accessibility
+        # information just before cataloging.
+        if IIndexableObject.providedBy(obj):
+            w = obj
+        else:
+            w = queryMultiAdapter( (obj, self), IIndexableObject )
+            if w is None:
+                # BBB
+                w = IndexableObjectWrapper(obj, self)
+        
+        idxs_ = idxs
+        if idxs:
              # Filter out invalid indexes.
              valid_indexes = self._catalog.indexes.keys()
              # Filter out invalid indexes.
              valid_indexes = self._catalog.indexes.keys()
-            idxs = [i for i in idxs if i in valid_indexes and i in self.delegatedIndexes]
-        else :
-            idxs = self.delegatedIndexes
+            idxs_ = [i for i in idxs if i in valid_indexes]
+        
+        super(CatalogTool, self).catalog_object(w, uid, idxs_, update_metadata, pghandler)
+        self.solrAdd(w, uid, idxs)
+    
+    security.declarePrivate('reindexObject')
+    def reindexObject(self, object, idxs=[], update_metadata=1, uid=None):
+        """Update catalog after object data has changed.
+
+        The optional idxs argument is a list of specific indexes
+        to update (all of them by default).
+
+        The update_metadata flag controls whether the object's
+        metadata record is updated as well.
+
+        If a non-None uid is passed, it will be used as the catalog uid
+        for the object instead of its physical path.
+        """
+        if uid is None:
+            uid = self.__url(object)
  
  
-        if idxs :
-            self.solrAdd(object, idxs=idxs, uid=uid)
+        self.catalog_object(object, uid, idxs, update_metadata)
      
      security.declarePrivate('unindexObject')
      def unindexObject(self, object):
          """Remove from catalog.
          """
          super(CatalogTool, self).unindexObject(object)
      
      security.declarePrivate('unindexObject')
      def unindexObject(self, object):
          """Remove from catalog.
          """
          super(CatalogTool, self).unindexObject(object)
-        c = SolrConnection(self.solr_url)
+        c = self._getSolrConnection()
          url = self.__url(object)
          c.delete(id=url)
          url = self.__url(object)
          c.delete(id=url)
-        txn = transaction.get()
-        txn.addAfterCommitHook(SolrTransactionHook(c))
          
  InitializeClass(CatalogTool)
  
  
  class DelegatedCatalog(Catalog) :
      '''C'est ici qu'on délègue effectivement à Solr '''
          
  InitializeClass(CatalogTool)
  
  
  class DelegatedCatalog(Catalog) :
      '''C'est ici qu'on délègue effectivement à Solr '''
+    
+    def __init__(self, zcat, brains=None) :
+        Catalog.__init__(self, brains=brains)
+        self.zcat = zcat
+    
+    def delegateSearch(self, query, plan) :
+        '''
+        retours faux : 
+        None signifie : pas de délégation, il faut continuer à interroger les autres index.
+        IISet() vide : pas de résultat lors de la délégation, on peut arrêter la recherche.
+        '''
+        indexes = set(query.keys()).intersection(set(self.zcat.delegatedIndexes))
+        if not indexes :
+            return None
+        delegatedQuery = {}
+        for i in indexes :
+            delegatedQuery[i] = query.pop(i)
+            try : plan.remove(i)
+            except ValueError : pass
+        c = SolrConnection(self.zcat.solr_url)
+        q =' AND '.join(['%s:"%s"' % item for item in delegatedQuery.items()])
+        resp = c.query(q, fields='id', rows=len(self))
+        c.close()
+        return IISet(filter(None, [self.uids.get(r['id']) for r in resp.results])) 
+    
      def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
      def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
-        return Catalog.search(self, query,
-                              sort_index=sort_index,
-                              reverse=reverse, limit=limit, merge=merge)
-\ No newline at end of file
+        """Iterate through the indexes, applying the query to each one. If
+        merge is true then return a lazy result set (sorted if appropriate)
+        otherwise return the raw (possibly scored) results for later merging.
+        Limit is used in conjuntion with sorting or scored results to inform
+        the catalog how many results you are really interested in. The catalog
+        can then use optimizations to save time and memory. The number of
+        results is not guaranteed to fall within the limit however, you should
+        still slice or batch the results as usual."""
+
+        rs = None # resultset
+
+        # Indexes fulfill a fairly large contract here. We hand each
+        # index the query mapping we are given (which may be composed
+        # of some combination of web request, kw mappings or plain old dicts)
+        # and the index decides what to do with it. If the index finds work
+        # for itself in the query, it returns the results and a tuple of
+        # the attributes that were used. If the index finds nothing for it
+        # to do then it returns None.
+
+        # Canonicalize the request into a sensible query before passing it on
+        query = self.make_query(query)
+
+        cr = self.getCatalogPlan(query)
+        cr.start()
+
+        plan = cr.plan()
+        if not plan:
+            plan = self._sorted_search_indexes(query)
+        
+        # délégation
+        rs = self.delegateSearch(query, plan)
+        if rs is not None and not rs :
+            return LazyCat([])
+
+        indexes = self.indexes.keys()
+        for i in plan:
+            if i not in indexes:
+                # We can have bogus keys or the plan can contain index names
+                # that have been removed in the meantime
+                continue
+
+            index = self.getIndex(i)
+            _apply_index = getattr(index, "_apply_index", None)
+            if _apply_index is None:
+                continue
+
+            cr.start_split(i)
+            limit_result = ILimitedResultIndex.providedBy(index)
+            if limit_result:
+                r = _apply_index(query, rs)
+            else:
+                r = _apply_index(query)
+
+            if r is not None:
+                r, u = r
+                # Short circuit if empty result
+                # BBB: We can remove the "r is not None" check in Zope 2.14
+                # once we don't need to support the "return everything" case
+                # anymore
+                if r is not None and not r:
+                    cr.stop_split(i, result=None, limit=limit_result)
+                    return LazyCat([])
+
+                # provide detailed info about the pure intersection time
+                intersect_id = i + '#intersection'
+                cr.start_split(intersect_id)
+                # weightedIntersection preserves the values from any mappings
+                # we get, as some indexes don't return simple sets
+                if hasattr(rs, 'items') or hasattr(r, 'items'):
+                    _, rs = weightedIntersection(rs, r)
+                else:
+                    rs = intersection(rs, r)
+
+                cr.stop_split(intersect_id)
+
+                # consider the time it takes to intersect the index result with
+                # the total resultset to be part of the index time
+                cr.stop_split(i, result=r, limit=limit_result)
+                if not rs:
+                    break
+            else:
+                cr.stop_split(i, result=None, limit=limit_result)
+
+        # Try to deduce the sort limit from batching arguments
+        b_start = int(query.get('b_start', 0))
+        b_size = query.get('b_size', None)
+        if b_size is not None:
+            b_size = int(b_size)
+
+        if b_size is not None:
+            limit = b_start + b_size
+        elif limit and b_size is None:
+            b_size = limit
+
+        if rs is None:
+            # None of the indexes found anything to do with the query
+            # We take this to mean that the query was empty (an empty filter)
+            # and so we return everything in the catalog
+            warnings.warn('Your query %s produced no query restriction. '
+                          'Currently the entire catalog content is returned. '
+                          'In Zope 2.14 this will result in an empty LazyCat '
+                          'to be returned.' % repr(cr.make_key(query)),
+                          DeprecationWarning, stacklevel=3)
+
+            rlen = len(self)
+            if sort_index is None:
+                sequence, slen = self._limit_sequence(self.data.items(), rlen,
+                    b_start, b_size)
+                result = LazyMap(self.instantiate, sequence, slen,
+                    actual_result_count=rlen)
+            else:
+                cr.start_split('sort_on')
+                result = self.sortResults(
+                    self.data, sort_index, reverse, limit, merge,
+                        actual_result_count=rlen, b_start=b_start,
+                        b_size=b_size)
+                cr.stop_split('sort_on', None)
+        elif rs:
+            # We got some results from the indexes.
+            # Sort and convert to sequences.
+            # XXX: The check for 'values' is really stupid since we call
+            # items() and *not* values()
+            rlen = len(rs)
+            if sort_index is None and hasattr(rs, 'items'):
+                # having a 'items' means we have a data structure with
+                # scores.  Build a new result set, sort it by score, reverse
+                # it, compute the normalized score, and Lazify it.
+
+                if not merge:
+                    # Don't bother to sort here, return a list of
+                    # three tuples to be passed later to mergeResults
+                    # note that data_record_normalized_score_ cannot be
+                    # calculated and will always be 1 in this case
+                    getitem = self.__getitem__
+                    result = [(score, (1, score, rid), getitem)
+                            for rid, score in rs.items()]
+                else:
+                    cr.start_split('sort_on')
+
+                    rs = rs.byValue(0) # sort it by score
+                    max = float(rs[0][0])
+
+                    # Here we define our getter function inline so that
+                    # we can conveniently store the max value as a default arg
+                    # and make the normalized score computation lazy
+                    def getScoredResult(item, max=max, self=self):
+                        """
+                        Returns instances of self._v_brains, or whatever is
+                        passed into self.useBrains.
+                        """
+                        score, key = item
+                        r=self._v_result_class(self.data[key])\
+                              .__of__(aq_parent(self))
+                        r.data_record_id_ = key
+                        r.data_record_score_ = score
+                        r.data_record_normalized_score_ = int(100. * score / max)
+                        return r
+
+                    sequence, slen = self._limit_sequence(rs, rlen, b_start,
+                        b_size)
+                    result = LazyMap(getScoredResult, sequence, slen,
+                        actual_result_count=rlen)
+                    cr.stop_split('sort_on', None)
+
+            elif sort_index is None and not hasattr(rs, 'values'):
+                # no scores
+                if hasattr(rs, 'keys'):
+                    rs = rs.keys()
+                sequence, slen = self._limit_sequence(rs, rlen, b_start,
+                    b_size)
+                result = LazyMap(self.__getitem__, sequence, slen,
+                    actual_result_count=rlen)
+            else:
+                # sort.  If there are scores, then this block is not
+                # reached, therefore 'sort-on' does not happen in the
+                # context of a text index query.  This should probably
+                # sort by relevance first, then the 'sort-on' attribute.
+                cr.start_split('sort_on')
+                result = self.sortResults(rs, sort_index, reverse, limit,
+                    merge, actual_result_count=rlen, b_start=b_start,
+                    b_size=b_size)
+                cr.stop_split('sort_on', None)
+        else:
+            # Empty result set
+            result = LazyCat([])
+        cr.stop()
+        return result