[opencms-dev] Lucene-search: stop words aren't displayed in search resultlist

Christian Steinert christian_steinert at web.de
Sun May 28 15:56:25 CEST 2006


Dear List,

I stumbled over a strange problem, when starting to work with the search 
function.
The search itself is working fine, but the preview snippets in the 
result listing will not contain words like "and" and "or", which of 
course makes things impossible to read.

Did anyone experience something similar? Or is there anyone, for whom it 
just worked instead?
I mean I understand, that lucene will not *search* for these words, but 
is there a way to get them *displayed* in the preview anyway?

any ideas or any working code/config-combinations are appreciated.

thanks a lot.
christian





My search config is rather trivial, especially I did not change any 
analyzers or indexers, but anyway - maybe I just post my search config 
and search code here (though both are not terribly modified from their 
standard versions...)


search code ====
<%@ page buffer="none" import="org.opencms.main.*, org.opencms.search.*, 
org.opencms.file.*, org.opencms.jsp.*, java.util.*" %><%  
   
    // Create a JSP action element
    org.opencms.jsp.CmsJspActionElement cms = new 
CmsJspActionElement(pageContext, request, response);
   
    // Get the search manager
    CmsSearchManager searchManager = OpenCms.getSearchManager();
%>

<jsp:useBean id="search" scope="request" 
class="org.opencms.search.CmsSearch">
<!--    <jsp:setProperty name = "search" property="matchesPerPage" 
param="matchesperpage"/>-->
<!--    <jsp:setProperty name = "search" property="displayPages" 
param="displaypages"/>-->
    <jsp:setProperty name = "search" property="matchesPerPage" value="10"/>
    <jsp:setProperty name = "search" property="displayPages" value="10"/>
    <jsp:setProperty name = "search" property="*"/>
    <%
        search.init(cms.getCmsObject());        
        search.setField( new String[]{ 
"title","keywords","description","content" } );
    %>
</jsp:useBean>

<html>
<head>
<title>Search result</title>
</head>

<body>
<h1>Search result</h1>

<%
    int pageno = 1;
        String srchPageParam = request.getParameter("searchPage");

    if (srchPageParam!=null) {       
        pageno = Integer.parseInt(srchPageParam);
    }

    int itemsPerPage = search.getMatchesPerPage();
        List result = search.getSearchResult();
    int firstResultNr = ((pageno-1)*itemsPerPage)+1;
        int lastResultNr = firstResultNr+result.size()-1;
        int totalResultCount = search.getSearchResultCount();

    String fields = search.getFields();
        if (fields==null) {
         fields = request.getParameter("fields");
        }

        if (result == null  && search.getLastException() != null) {
          %>
          <h3>Error</h3>
          <%= search.getLastException().toString() %>
          <%
        } else if ( totalResultCount==0 ) {
          %><p>There are no documents matching your query <strong><%= 
search.getQuery() %></strong>.</p>
            <p>Suggestions: <ul><li>Check for possible spelling errors 
in your search,</li><li>Try searching for different or less specific 
terms.</li></p>
          <%

        } else {
          //ListIterator iterator = result.listIterator();
          %><p>Showing results <%=firstResultNr %> to <%=lastResultNr%> 
of <%=totalResultCount%> for <strong><%= search.getQuery() %></strong></p>
          <%
            //while (iterator.hasNext()) {
            for (int i=0;i<result.size();i++){
              CmsSearchResult entry = (CmsSearchResult)result.get(i); 
//(CmsSearchResult)iterator.next();
          %>
          <p><a href="<%= 
cms.link(cms.getRequestContext().removeSiteRoot(entry.getPath())) 
%>"><%= entry.getTitle() %></a><br />
            <%--
              entry.getKeywords();
              entry.getDescription()
              entry.getDateLastModified()
            --%>
            <%= entry.getExcerpt() %>
          </p>
          <%
          }
        }

        %><p><%
      if (search.getPreviousUrl() != null) {
            %><a href="<%= cms.link(search.getPreviousUrl()) 
%>&fields=<%= fields %>"><< Previous</a> <%
           }
      Map pageLinks = search.getPageLinks();
      Iterator i=pageLinks.keySet().iterator();
      while (i.hasNext()) {
        int pageNumber = ((Integer)i.next()).intValue();
        String pageLink = cms.link((String)pageLinks.get(new 
Integer(pageNumber)));              
        if (pageNumber != pageno) {
                  %><a href="<%= pageLink %>&fields=<%= fields %>"><%= 
pageNumber %></a> <%
        } else {
                  %><span class="currentpage"><%= pageNumber %></span> <%
        }
    }
    if (search.getNextUrl()!= null) {
                %><a href="<%= cms.link(search.getNextUrl()) 
%>&fields=<%= fields %>">Next >></a><%
    }
%></p>
</body>


search-config.xml ====
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE opencms SYSTEM 
"http://www.opencms.org/dtd/6.0/opencms-search.dtd">

<opencms>
    <search>
        <cache>8</cache>
        <directory>index</directory>
        <timeout>60000</timeout>
        <excerpt>1024</excerpt>
        
<highlighter>org.opencms.search.documents.CmsTermHighlighterHtml</highlighter>
        <documenttypes>
            <documenttype>
                <name>generic</name>
                
<class>org.opencms.search.documents.CmsDocumentGeneric</class>
                <mimetypes/>
                <resourcetypes>
                    <resourcetype>*</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>html</name>
                <class>org.opencms.search.documents.CmsDocumentHtml</class>
                <mimetypes>
                    <mimetype>text/html</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>plain</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>image</name>
                
<class>org.opencms.search.documents.CmsDocumentGeneric</class>
                <mimetypes/>
                <resourcetypes>
                    <resourcetype>image</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>jsp</name>
                
<class>org.opencms.search.documents.CmsDocumentPlainText</class>
                <mimetypes/>
                <resourcetypes>
                    <resourcetype>jsp</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>msexcel</name>
                
<class>org.opencms.search.documents.CmsDocumentMsExcel</class>
                <mimetypes>
                    <mimetype>application/vnd.ms-excel</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>binary</resourcetype>
                    <resourcetype>plain</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>mspowerpoint</name>
                
<class>org.opencms.search.documents.CmsDocumentMsPowerPoint</class>
                <mimetypes>
                    <mimetype>application/vnd.ms-powerpoint</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>binary</resourcetype>
                    <resourcetype>plain</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>msword</name>
                
<class>org.opencms.search.documents.CmsDocumentMsWord</class>
                <mimetypes>
                    <mimetype>application/msword</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>binary</resourcetype>
                    <resourcetype>plain</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>pdf</name>
                <class>org.opencms.search.documents.CmsDocumentPdf</class>
                <mimetypes>
                    <mimetype>application/pdf</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>binary</resourcetype>
                    <resourcetype>plain</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>rtf</name>
                <class>org.opencms.search.documents.CmsDocumentRtf</class>
                <mimetypes>
                    <mimetype>text/rtf</mimetype>
                    <mimetype>application/rtf</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>binary</resourcetype>
                    <resourcetype>plain</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>text</name>
                
<class>org.opencms.search.documents.CmsDocumentPlainText</class>
                <mimetypes>
                    <mimetype>text/html</mimetype>
                    <mimetype>text/plain</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>plain</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>xmlcontent</name>
                
<class>org.opencms.search.documents.CmsDocumentXmlContent</class>
                <mimetypes/>
                <resourcetypes>
                    <resourcetype>*</resourcetype>
                </resourcetypes>
            </documenttype>
            <documenttype>
                <name>xmlpage</name>
                
<class>org.opencms.search.documents.CmsDocumentXmlPage</class>
                <mimetypes>
                    <mimetype>text/html</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>xmlpage</resourcetype>
                </resourcetypes>
            </documenttype>


            <documenttype>
                <name>ba_audio</name>
                
<class>org.opencms.search.documents.CmsDocumentPlainText</class>
                <mimetypes>
                    <mimetype>text/plain</mimetype>
                </mimetypes>
                <resourcetypes>
                    <resourcetype>ba_audio</resourcetype>
                </resourcetypes>
            </documenttype>
        </documenttypes>
        <analyzers>
            <analyzer>
                <class>org.apache.lucene.analysis.de.GermanAnalyzer</class>
                <locale>de</locale>
            </analyzer>
            <analyzer>
                
<class>org.apache.lucene.analysis.standard.StandardAnalyzer</class>
                <locale>en</locale>
            </analyzer>
            <analyzer>
                
<class>org.apache.lucene.analysis.snowball.SnowballAnalyzer</class>
                <stemmer>French</stemmer>
                <locale>fr</locale>
            </analyzer>
            <analyzer>
                
<class>org.apache.lucene.analysis.snowball.SnowballAnalyzer</class>
                <stemmer>Italian</stemmer>
                <locale>it</locale>
            </analyzer>
        </analyzers>
        <indexes>
            <index>
                <name>de</name>
                <rebuild>auto</rebuild>
                <project>Online</project>
                <locale>de</locale>
                <sources>
                    <source>de</source>
                </sources>
            </index>
            <index>
                <name>en</name>
                <rebuild>auto</rebuild>
                <project>Online</project>
                <locale>en</locale>
                <sources>
                    <source>en</source>
                </sources>
            </index>
        </indexes>
        <indexsources>
            <indexsource>
                <name>de</name>
                <indexer class="org.opencms.search.CmsVfsIndexer"/>
                <resources>
                    <resource>/sites/default/de/about/</resource>
                    <resource>/sites/default/de/archives/</resource>
                </resources>
                <documenttypes-indexed>
                    <name>html</name>
                    <name>image</name>
                    <name>msexcel</name>
                    <name>mspowerpoint</name>
                    <name>msword</name>
                    <name>pdf</name>
                    <name>rtf</name>
                    <name>xmlcontent</name>
                    <name>xmlpage</name>
                </documenttypes-indexed>
            </indexsource>
            <indexsource>
                <name>en</name>
                <indexer class="org.opencms.search.CmsVfsIndexer"/>
                <resources>
                    <resource>/sites/default/en/about/</resource>
                    <resource>/sites/default/en/archives/</resource>
                </resources>
                <documenttypes-indexed>
                    <name>xmlpage</name>
                    <name>xmlcontent</name>
                    <name>rtf</name>
                    <name>pdf</name>
                    <name>msword</name>
                    <name>mspowerpoint</name>
                    <name>msexcel</name>
                    <name>image</name>
                    <name>html</name>
            <name>ba_article</name>
                </documenttypes-indexed>
            </indexsource>
        </indexsources>
    </search>
</opencms>





More information about the opencms-dev mailing list