aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/inc/fulltext.php
diff options
context:
space:
mode:
authorAndreas Gohr <andi@splitbrain.org>2023-02-14 17:58:50 +0100
committerAndreas Gohr <andi@splitbrain.org>2023-02-14 17:58:50 +0100
commit7fb26b8e77126eb9b9f64f546b1fc9658bee2f3b (patch)
tree15696cb3704f7321bf1ba9b63d7cab7f6f5677d7 /inc/fulltext.php
parent4dc66d1dae1e5946475fd432b08a95bb4844429a (diff)
downloaddokuwiki-7fb26b8e77126eb9b9f64f546b1fc9658bee2f3b.tar.gz
dokuwiki-7fb26b8e77126eb9b9f64f546b1fc9658bee2f3b.zip
Properly quick search in titles with UTF-8 chars.
Fixes #3808
Diffstat (limited to 'inc/fulltext.php')
-rw-r--r--inc/fulltext.php31
1 files changed, 21 insertions, 10 deletions
diff --git a/inc/fulltext.php b/inc/fulltext.php
index 7c28a5962..d2e579544 100644
--- a/inc/fulltext.php
+++ b/inc/fulltext.php
@@ -7,6 +7,8 @@
*/
use dokuwiki\Extension\Event;
+use dokuwiki\Utf8\Clean;
+use dokuwiki\Utf8\PhpString;
use dokuwiki\Utf8\Sort;
/**
@@ -100,7 +102,7 @@ function _ft_pageSearch(&$data) {
);
$evt = new Event('FULLTEXT_PHRASE_MATCH',$evdata);
if ($evt->advise_before() && $evt->result !== true) {
- $text = \dokuwiki\Utf8\PhpString::strtolower($evdata['text']);
+ $text = PhpString::strtolower($evdata['text']);
if (strpos($text, $phrase) !== false) {
$evt->result = true;
}
@@ -365,7 +367,16 @@ function _ft_filterResultsByTime(array $results, $after, $before) {
* @return bool
*/
function _ft_pageLookupTitleCompare($search, $title) {
- return stripos($title, $search) !== false;
+ if (Clean::isASCII($search)) {
+ $pos = stripos($title, $search);
+ } else {
+ $pos = PhpString::strpos(
+ PhpString::strtolower($title),
+ PhpString::strtolower($search)
+ );
+ }
+
+ return $pos !== false;
}
/**
@@ -427,7 +438,7 @@ function ft_snippet($id,$highlight){
$match = array();
$snippets = array();
$utf8_offset = $offset = $end = 0;
- $len = \dokuwiki\Utf8\PhpString::strlen($text);
+ $len = PhpString::strlen($text);
// build a regexp from the phrases to highlight
$re1 = '(' .
@@ -457,8 +468,8 @@ function ft_snippet($id,$highlight){
list($str,$idx) = $match[0];
// convert $idx (a byte offset) into a utf8 character offset
- $utf8_idx = \dokuwiki\Utf8\PhpString::strlen(substr($text,0,$idx));
- $utf8_len = \dokuwiki\Utf8\PhpString::strlen($str);
+ $utf8_idx = PhpString::strlen(substr($text,0,$idx));
+ $utf8_len = PhpString::strlen($str);
// establish context, 100 bytes surrounding the match string
// first look to see if we can go 100 either side,
@@ -487,9 +498,9 @@ function ft_snippet($id,$highlight){
$end = $utf8_idx + $utf8_len + $post; // now set it to the end of this context
if ($append) {
- $snippets[count($snippets)-1] .= \dokuwiki\Utf8\PhpString::substr($text,$append,$end-$append);
+ $snippets[count($snippets)-1] .= PhpString::substr($text,$append,$end-$append);
} else {
- $snippets[] = \dokuwiki\Utf8\PhpString::substr($text,$start,$end-$start);
+ $snippets[] = PhpString::substr($text,$start,$end-$start);
}
// set $offset for next match attempt
@@ -498,8 +509,8 @@ function ft_snippet($id,$highlight){
// this prevents further matching of this snippet but for possible matches of length
// smaller than match length + context (at least 50 characters) this match is part of the context
$utf8_offset = $utf8_idx + $utf8_len;
- $offset = $idx + strlen(\dokuwiki\Utf8\PhpString::substr($text,$utf8_idx,$utf8_len));
- $offset = \dokuwiki\Utf8\Clean::correctIdx($text,$offset);
+ $offset = $idx + strlen(PhpString::substr($text,$utf8_idx,$utf8_len));
+ $offset = Clean::correctIdx($text,$offset);
}
$m = "\1";
@@ -687,7 +698,7 @@ function ft_queryParser($Indexer, $query){
*/
$parsed_query = '';
$parens_level = 0;
- $terms = preg_split('/(-?".*?")/u', \dokuwiki\Utf8\PhpString::strtolower($query),
+ $terms = preg_split('/(-?".*?")/u', PhpString::strtolower($query),
-1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
foreach ($terms as $term) {