diff options
author | Andreas Gohr <andi@splitbrain.org> | 2005-09-04 00:02:29 +0200 |
---|---|---|
committer | Andreas Gohr <andi@splitbrain.org> | 2005-09-04 00:02:29 +0200 |
commit | 506fa8936561993b7f70aa507d0c39a44a6ebab9 (patch) | |
tree | d5df1af7e8c1de99397e68e2e7ba39e63ab25c0c | |
parent | b5a8175656b99ced69244b3ce593c00fb61ab825 (diff) | |
download | dokuwiki-506fa8936561993b7f70aa507d0c39a44a6ebab9.tar.gz dokuwiki-506fa8936561993b7f70aa507d0c39a44a6ebab9.zip |
the search now uses the index
darcs-hash:20050903220229-7ad00-5d95f905eaeb3f6b867aa3ee43c2a8bccc533c00.gz
-rw-r--r-- | conf/dokuwiki.php | 2 | ||||
-rw-r--r-- | inc/fulltext.php | 74 | ||||
-rw-r--r-- | inc/html.php | 24 | ||||
-rw-r--r-- | inc/search.php | 3 | ||||
-rw-r--r-- | lib/exe/ajax.php | 13 |
5 files changed, 92 insertions, 24 deletions
diff --git a/conf/dokuwiki.php b/conf/dokuwiki.php index a264e8717..3da4ca3cb 100644 --- a/conf/dokuwiki.php +++ b/conf/dokuwiki.php @@ -84,7 +84,7 @@ $conf['proxy']['host'] = ''; $conf['proxy']['port'] = ''; $conf['proxy']['user'] = ''; $conf['proxy']['pass'] = ''; -$conf['proxy']['ssl'] = ''; +$conf['proxy']['ssl'] = 0; /* Safemode Hack */ $conf['safemodehack'] = 0; //read http://wiki.splitbrain.org/wiki:safemodehack ! diff --git a/inc/fulltext.php b/inc/fulltext.php index 8549a67c1..6c4e148a2 100644 --- a/inc/fulltext.php +++ b/inc/fulltext.php @@ -14,15 +14,16 @@ * The fulltext search * * Returns a list of matching documents for the given query + * */ -function ft_pageSearch($query){ +function ft_pageSearch($query,&$poswords){ $q = ft_queryParser($query); + // use this for higlighting later: + $poswords = join(' ',$q['and']); + // lookup all words found in the query $words = array_merge($q['and'],$q['not']); - foreach($q['phrases'] as $phrase){ - $words = array_merge($words,$phrase['words']); - } if(!count($words)) return array(); $result = idx_lookup($words); @@ -36,8 +37,7 @@ function ft_pageSearch($query){ $not = array_merge($not,array_keys($result[$w])); } - - // combine and words + // combine and-words if(count($q['and']) > 1){ $docs = ft_resultCombine($q['and']); }else{ @@ -52,7 +52,6 @@ function ft_pageSearch($query){ if(!count($docs)) return array(); - // handle phrases if(count($q['phrases'])){ //build a regexp @@ -63,7 +62,7 @@ function ft_pageSearch($query){ // check the source of all documents for the exact phrases foreach(array_keys($docs) as $id){ $text = utf8_strtolower(rawWiki($id)); - if(!preg_match_all('/'.$regex.'/usi',$text)){ + if(!preg_match('/'.$regex.'/usi',$text)){ unset($docs[$id]); // no hit - remove } } @@ -78,6 +77,63 @@ function ft_pageSearch($query){ } /** + * Quicksearch for pagenames + * + * By default it only matches the pagename and ignores the + * namespace. This can be changed with the second parameter + * + * @author Andreas Gohr <andi@splitbrain.org> + */ +function ft_pageLookup($id,$pageonly=true){ + global $conf; + $id = preg_quote($id,'/'); + $pages = file($conf['cachedir'].'/page.idx'); + $pages = array_values(preg_grep('/'.$id.'/',$pages)); + + $cnt = count($pages); + for($i=0; $i<$cnt; $i++){ + if($pageonly){ + if(!preg_match('/'.$id.'/',noNS($pages[$i]))){ + unset($pages[$i]); + continue; + } + } + if(!@file_exists(wikiFN($pages[$i]))){ + unset($pages[$i]); + continue; + } + } + sort($pages); + return $pages; +} + +/** + * Creates a snippet extract + * + * @author Andreas Gohr <andi@splitbrain.org> + */ +function ft_snippet($id,$poswords){ + $poswords = preg_quote($poswords,'#'); + $re = '('.str_replace(' ','|',$poswords).')'; + $text = rawWiki($id); + //FIXME caseinsensitive matching doesn't work with UTF-8!? + preg_match_all('#(.{0,50})'.$re.'(.{0,50})#iu',$text,$matches,PREG_SET_ORDER); + + $cnt = 0; + $snippet = ''; + foreach($matches as $match){ + $snippet .= '...'.htmlspecialchars($match[1]); + $snippet .= '<span class="search_hit">'; + $snippet .= htmlspecialchars($match[2]); + $snippet .= '</span>'; + $snippet .= htmlspecialchars($match[3]).'... '; + if($cnt++ == 2) break; + } + + return $snippet; +} + +/** * Combine found documents and sum up their scores * * This function is used to combine searched words with a logical @@ -144,4 +200,4 @@ function ft_queryParser($query){ return $q; } - +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/inc/html.php b/inc/html.php index dcd11feb1..b73eebf8c 100644 --- a/inc/html.php +++ b/inc/html.php @@ -295,6 +295,7 @@ function html_hilight($html,$query){ */ function html_search(){ require_once(DOKU_INC.'inc/search.php'); + require_once(DOKU_INC.'inc/fulltext.php'); global $conf; global $QUERY; global $ID; @@ -312,14 +313,14 @@ function html_search(){ //do quick pagesearch $data = array(); - search($data,$conf['datadir'],'search_pagename',array(query => cleanID($QUERY))); + $data = ft_pageLookup(cleanID($QUERY)); if(count($data)){ sort($data); print '<div class="search_quickresult">'; print '<b>'.$lang[quickhits].':</b><br />'; - foreach($data as $row){ + foreach($data as $id){ print '<div class="search_quickhits">'; - print html_wikilink(':'.$row['id'],$conf['useheading']?NULL:$row['id']); + print html_wikilink(':'.$id,$conf['useheading']?NULL:$id); print '</div> '; } //clear float (see http://www.complexspiral.com/publications/containing-floats/) @@ -329,16 +330,19 @@ function html_search(){ flush(); //do fulltext search - $data = array(); - search($data,$conf['datadir'],'search_fulltext',array(query => utf8_strtolower($QUERY))); + $data = ft_pageSearch($QUERY,$poswords); if(count($data)){ - usort($data,'sort_search_fulltext'); - foreach($data as $row){ + $num = 1; + foreach($data as $id => $cnt){ print '<div class="search_result">'; - print html_wikilink(':'.$row['id'],$conf['useheading']?NULL:$row['id'],$row['poswords']); - print ': <span class="search_cnt">'.$row['count'].' '.$lang['hits'].'</span><br />'; - print '<div class="search_snippet">'.$row['snippet'].'</div>'; + print html_wikilink(':'.$id,$conf['useheading']?NULL:$id,$poswords); + print ': <span class="search_cnt">'.$cnt.' '.$lang['hits'].'</span><br />'; + if($num < 15){ // create snippets for the first number of matches only #FIXME add to conf ? + print '<div class="search_snippet">'.ft_snippet($id,$poswords).'</div>'; + } print '</div>'; + flush(); + $num++; } }else{ print '<div class="nothing">'.$lang['nothingfound'].'</div>'; diff --git a/inc/search.php b/inc/search.php index 3604db15e..ea20c4f3b 100644 --- a/inc/search.php +++ b/inc/search.php @@ -283,6 +283,7 @@ function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){ * $opts['query'] is the search query * * @author Andreas Gohr <andi@splitbrain.org> + * @deprecated - fulltext indexer is used instead */ function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ //we do nothing with directories @@ -383,6 +384,8 @@ function search_reference(&$data,$base,$file,$type,$lvl,$opts){ * * @author Andreas Gohr <andi@splitbrain.org> * @author Matthias Grimm <matthiasgrimm@users.sourceforge.net> + * + * @deprecated - fulltext indexer is used instead */ function search_regex(&$data,$base,$file,$reg,$words){ diff --git a/lib/exe/ajax.php b/lib/exe/ajax.php index 5b0c1633e..28d064fc9 100644 --- a/lib/exe/ajax.php +++ b/lib/exe/ajax.php @@ -42,17 +42,22 @@ function ajax_qsearch(){ $query = cleanID($_POST['q']); if(empty($query)) return; - $nsdir = str_replace(':','/',getNS($query)); - require_once(DOKU_INC.'inc/search.php'); require_once(DOKU_INC.'inc/html.php'); + require_once(DOKU_INC.'inc/fulltext.php'); $data = array(); - search($data,$conf['datadir'],'search_qsearch',array(query => $query),$nsdir); + $data = ft_pageLookup($query); if(!count($data)) return; print '<b>'.$lang['quickhits'].'</b>'; - print html_buildlist($data,'qsearch','html_list_index'); + print '<ul>'; + foreach($data as $id){ + print '<li>'; + print html_wikilink(':'.$id,$conf['useheading']?NULL:$id); + print '</li>'; + } + print '</ul>'; } //Setup VIM: ex: et ts=2 enc=utf-8 : |