1
0
Fork 0
mirror of https://github.com/YunoHost-Apps/piwigo_ynh.git synced 2024-09-03 20:06:03 +02:00
piwigo_ynh/sources/include/functions_search.inc.php

832 lines
23 KiB
PHP
Raw Normal View History

2014-07-06 13:21:10 +02:00
<?php
// +-----------------------------------------------------------------------+
// | Piwigo - a PHP based photo gallery |
// +-----------------------------------------------------------------------+
// | Copyright(C) 2008-2014 Piwigo Team http://piwigo.org |
// | Copyright(C) 2003-2008 PhpWebGallery Team http://phpwebgallery.net |
// | Copyright(C) 2002-2003 Pierrick LE GALL http://le-gall.net/pierrick |
// +-----------------------------------------------------------------------+
// | This program is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU General Public License as published by |
// | the Free Software Foundation |
// | |
// | This program is distributed in the hope that it will be useful, but |
// | WITHOUT ANY WARRANTY; without even the implied warranty of |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
// | General Public License for more details. |
// | |
// | You should have received a copy of the GNU General Public License |
// | along with this program; if not, write to the Free Software |
// | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
// | USA. |
// +-----------------------------------------------------------------------+
/**
* @package functions\search
*/
/**
* Returns search rules stored into a serialized array in "search"
* table. Each search rules set is numericaly identified.
*
* @param int $search_id
* @return array
*/
function get_search_array($search_id)
{
if (!is_numeric($search_id))
{
die('Search id must be an integer');
}
$query = '
SELECT rules
FROM '.SEARCH_TABLE.'
WHERE id = '.$search_id.'
;';
list($serialized_rules) = pwg_db_fetch_row(pwg_query($query));
return unserialize($serialized_rules);
}
/**
* Returns the SQL clause for a search.
* Transforms the array returned by get_search_array() into SQL sub-query.
*
* @param array $search
* @return string
*/
function get_sql_search_clause($search)
{
// SQL where clauses are stored in $clauses array during query
// construction
$clauses = array();
foreach (array('file','name','comment','author') as $textfield)
{
if (isset($search['fields'][$textfield]))
{
$local_clauses = array();
foreach ($search['fields'][$textfield]['words'] as $word)
{
$local_clauses[] = $textfield." LIKE '%".$word."%'";
}
// adds brackets around where clauses
$local_clauses = prepend_append_array_items($local_clauses, '(', ')');
$clauses[] = implode(
' '.$search['fields'][$textfield]['mode'].' ',
$local_clauses
);
}
}
if (isset($search['fields']['allwords']))
{
$fields = array('file', 'name', 'comment', 'author');
// in the OR mode, request bust be :
// ((field1 LIKE '%word1%' OR field2 LIKE '%word1%')
// OR (field1 LIKE '%word2%' OR field2 LIKE '%word2%'))
//
// in the AND mode :
// ((field1 LIKE '%word1%' OR field2 LIKE '%word1%')
// AND (field1 LIKE '%word2%' OR field2 LIKE '%word2%'))
$word_clauses = array();
foreach ($search['fields']['allwords']['words'] as $word)
{
$field_clauses = array();
foreach ($fields as $field)
{
$field_clauses[] = $field." LIKE '%".$word."%'";
}
// adds brackets around where clauses
$word_clauses[] = implode(
"\n OR ",
$field_clauses
);
}
array_walk(
$word_clauses,
create_function('&$s','$s="(".$s.")";')
);
// make sure the "mode" is either OR or AND
if ($search['fields']['allwords']['mode'] != 'AND' and $search['fields']['allwords']['mode'] != 'OR')
{
$search['fields']['allwords']['mode'] = 'AND';
}
$clauses[] = "\n ".
implode(
"\n ". $search['fields']['allwords']['mode']. "\n ",
$word_clauses
);
}
foreach (array('date_available', 'date_creation') as $datefield)
{
if (isset($search['fields'][$datefield]))
{
$clauses[] = $datefield." = '".$search['fields'][$datefield]['date']."'";
}
foreach (array('after','before') as $suffix)
{
$key = $datefield.'-'.$suffix;
if (isset($search['fields'][$key]))
{
$clauses[] = $datefield.
($suffix == 'after' ? ' >' : ' <').
($search['fields'][$key]['inc'] ? '=' : '').
" '".$search['fields'][$key]['date']."'";
}
}
}
if (isset($search['fields']['cat']))
{
if ($search['fields']['cat']['sub_inc'])
{
// searching all the categories id of sub-categories
$cat_ids = get_subcat_ids($search['fields']['cat']['words']);
}
else
{
$cat_ids = $search['fields']['cat']['words'];
}
$local_clause = 'category_id IN ('.implode(',', $cat_ids).')';
$clauses[] = $local_clause;
}
// adds brackets around where clauses
$clauses = prepend_append_array_items($clauses, '(', ')');
$where_separator =
implode(
"\n ".$search['mode'].' ',
$clauses
);
$search_clause = $where_separator;
return $search_clause;
}
/**
* Returns the list of items corresponding to the advanced search array.
*
* @param array $search
* @param string $images_where optional additional restriction on images table
* @return array
*/
function get_regular_search_results($search, $images_where='')
{
global $conf;
$forbidden = get_sql_condition_FandF(
array
(
'forbidden_categories' => 'category_id',
'visible_categories' => 'category_id',
'visible_images' => 'id'
),
"\n AND"
);
$items = array();
$tag_items = array();
if (isset($search['fields']['tags']))
{
$tag_items = get_image_ids_for_tags(
$search['fields']['tags']['words'],
$search['fields']['tags']['mode']
);
}
$search_clause = get_sql_search_clause($search);
if (!empty($search_clause))
{
$query = '
SELECT DISTINCT(id)
FROM '.IMAGES_TABLE.' i
INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
WHERE '.$search_clause;
if (!empty($images_where))
{
$query .= "\n AND ".$images_where;
}
$query .= $forbidden.'
'.$conf['order_by'];
$items = array_from_query($query, 'id');
}
if ( !empty($tag_items) )
{
switch ($search['mode'])
{
case 'AND':
if (empty($search_clause))
{
$items = $tag_items;
}
else
{
$items = array_values( array_intersect($items, $tag_items) );
}
break;
case 'OR':
$before_count = count($items);
$items = array_unique(
array_merge(
$items,
$tag_items
)
);
break;
}
}
return $items;
}
/**
* Finds if a char is a letter, a figure or any char of the extended ASCII table (>127).
*
* @param char $ch
* @return bool
*/
function is_word_char($ch)
{
return ($ch>='0' && $ch<='9') || ($ch>='a' && $ch<='z') || ($ch>='A' && $ch<='Z') || ord($ch)>127;
}
/**
* Finds if a char is a special token for word start: [{<=*+
*
* @param char $ch
* @return bool
*/
function is_odd_wbreak_begin($ch)
{
return strpos('[{<=*+', $ch)===false ? false:true;
}
/**
* Finds if a char is a special token for word end: ]}>=*+
*
* @param char $ch
* @return bool
*/
function is_odd_wbreak_end($ch)
{
return strpos(']}>=*+', $ch)===false ? false:true;
}
define('QST_QUOTED', 0x01);
define('QST_NOT', 0x02);
define('QST_WILDCARD_BEGIN', 0x04);
define('QST_WILDCARD_END', 0x08);
define('QST_WILDCARD', QST_WILDCARD_BEGIN|QST_WILDCARD_END);
/**
* Analyzes and splits the quick/query search query $q into tokens.
* q='john bill' => 2 tokens 'john' 'bill'
* Special characters for MySql full text search (+,<,>,~) appear in the token modifiers.
* The query can contain a phrase: 'Pierre "New York"' will return 'pierre' qnd 'new york'.
*
* @param string $q
* @param array &$qtokens
* @param array &$qtoken_modifiers
*/
function analyse_qsearch($q, &$qtokens, &$qtoken_modifiers)
{
$q = stripslashes($q);
$tokens = array();
$token_modifiers = array();
$crt_token = "";
$crt_token_modifier = 0;
for ($i=0; $i<strlen($q); $i++)
{
$ch = $q[$i];
if ( ($crt_token_modifier&QST_QUOTED)==0)
{
if ($ch=='"')
{
if (strlen($crt_token))
{
$tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
$crt_token = ""; $crt_token_modifier = 0;
}
$crt_token_modifier |= QST_QUOTED;
}
elseif ( strcspn($ch, '*+-><~')==0 )
{ //special full text modifier
if (strlen($crt_token))
{
$crt_token .= $ch;
}
else
{
if ( $ch=='*' )
$crt_token_modifier |= QST_WILDCARD_BEGIN;
if ( $ch=='-' )
$crt_token_modifier |= QST_NOT;
}
}
elseif (preg_match('/[\s,.;!\?]+/', $ch))
{ // white space
if (strlen($crt_token))
{
$tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
$crt_token = "";
}
$crt_token_modifier = 0;
}
else
{
$crt_token .= $ch;
}
}
else // qualified with quotes
{
if ($ch=='"')
{
if ($i+1 < strlen($q) && $q[$i+1]=='*')
{
$crt_token_modifier |= QST_WILDCARD_END;
$i++;
}
$tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
$crt_token = ""; $crt_token_modifier = 0;
$state=0;
}
else
$crt_token .= $ch;
}
}
if (strlen($crt_token))
{
$tokens[] = $crt_token;
$token_modifiers[] = $crt_token_modifier;
}
$qtokens = array();
$qtoken_modifiers = array();
for ($i=0; $i<count($tokens); $i++)
{
if ( !($token_modifiers[$i] & QST_QUOTED) )
{
if ( substr($tokens[$i], -1)=='*' )
{
$tokens[$i] = rtrim($tokens[$i], '*');
$token_modifiers[$i] |= QST_WILDCARD_END;
}
}
if ( strlen($tokens[$i])==0)
continue;
$qtokens[] = $tokens[$i];
$qtoken_modifiers[] = $token_modifiers[$i];
}
}
/**
* Returns the LIKE SQL clause corresponding to the quick search query
* that has been split into tokens.
* for example file LIKE '%john%' OR file LIKE '%bill%'.
*
* @param array $tokens
* @param array $token_modifiers
* @param string $field
* @return string|null
*/
function get_qsearch_like_clause($tokens, $token_modifiers, $field)
{
$clauses = array();
for ($i=0; $i<count($tokens); $i++)
{
$token = trim($tokens[$i], '%');
if ($token_modifiers[$i]&QST_NOT)
continue;
if ( strlen($token)==0 )
continue;
$token = addslashes($token);
$token = str_replace( array('%','_'), array('\\%','\\_'), $token); // escape LIKE specials %_
$clauses[] = $field.' LIKE \'%'.$token.'%\'';
}
return count($clauses) ? '('.implode(' OR ', $clauses).')' : null;
}
/**
* Returns tags corresponding to the quick search query that has been split into tokens.
*
* @param array $tokens
* @param array $token_modifiers
* @param array &$token_tag_ids
* @param array &$not_tag_ids
* @param array &$all_tags
*/
function get_qsearch_tags($tokens, $token_modifiers, &$token_tag_ids, &$not_tag_ids, &$all_tags)
{
$token_tag_ids = array_fill(0, count($tokens), array() );
$not_tag_ids = $all_tags = array();
$token_tag_scores = $token_tag_ids;
$transliterated_tokens = array();
foreach ($tokens as $token)
{
$transliterated_tokens[] = transliterate($token);
}
$query = '
SELECT t.*, COUNT(image_id) AS counter
FROM '.TAGS_TABLE.' t
INNER JOIN '.IMAGE_TAG_TABLE.' ON id=tag_id
GROUP BY id';
$result = pwg_query($query);
while ($tag = pwg_db_fetch_assoc($result))
{
$transliterated_tag = transliterate($tag['name']);
// find how this tag matches query tokens
for ($i=0; $i<count($tokens); $i++)
{
$transliterated_token = $transliterated_tokens[$i];
$match = false;
$pos = 0;
while ( ($pos = strpos($transliterated_tag, $transliterated_token, $pos)) !== false)
{
if ( ($token_modifiers[$i]&QST_WILDCARD)==QST_WILDCARD )
{// wildcard in this token
$match = 1;
break;
}
$token_len = strlen($transliterated_token);
// search begin of word
$wbegin_len=0; $wbegin_char=' ';
while ($pos-$wbegin_len > 0)
{
if (! is_word_char($transliterated_tag[$pos-$wbegin_len-1]) )
{
$wbegin_char = $transliterated_tag[$pos-$wbegin_len-1];
break;
}
$wbegin_len++;
}
// search end of word
$wend_len=0; $wend_char=' ';
while ($pos+$token_len+$wend_len < strlen($transliterated_tag))
{
if (! is_word_char($transliterated_tag[$pos+$token_len+$wend_len]) )
{
$wend_char = $transliterated_tag[$pos+$token_len+$wend_len];
break;
}
$wend_len++;
}
$this_score = 0;
if ( ($token_modifiers[$i]&QST_WILDCARD)==0 )
{// no wildcard begin or end
if ($token_len <= 2)
{// search for 1 or 2 characters must match exactly to avoid retrieving too much data
if ($wbegin_len==0 && $wend_len==0 && !is_odd_wbreak_begin($wbegin_char) && !is_odd_wbreak_end($wend_char) )
$this_score = 1;
}
elseif ($token_len == 3)
{
if ($wbegin_len==0)
$this_score = $token_len / ($token_len + $wend_len);
}
else
{
$this_score = $token_len / ($token_len + 1.1 * $wbegin_len + 0.9 * $wend_len);
}
}
if ($this_score>0)
$match = max($match, $this_score );
$pos++;
}
if ($match)
{
$tag_id = (int)$tag['id'];
$all_tags[$tag_id] = $tag;
$token_tag_ids[$i][] = $tag_id;
$token_tag_scores[$i][] = $match;
}
}
}
// process not tags
for ($i=0; $i<count($tokens); $i++)
{
if ( ! ($token_modifiers[$i]&QST_NOT) )
continue;
array_multisort($token_tag_scores[$i], SORT_DESC|SORT_NUMERIC, $token_tag_ids[$i]);
for ($j=0; $j<count($token_tag_scores[$i]); $j++)
{
if ($token_tag_scores[$i][$j] < 0.8)
break;
if ($j>0 && $token_tag_scores[$i][$j] < $token_tag_scores[$i][0])
break;
$tag_id = $token_tag_ids[$i][$j];
if ( isset($all_tags[$tag_id]) )
{
unset($all_tags[$tag_id]);
$not_tag_ids[] = $tag_id;
}
}
$token_tag_ids[$i] = array();
}
// process regular tags
for ($i=0; $i<count($tokens); $i++)
{
if ( $token_modifiers[$i]&QST_NOT )
continue;
array_multisort($token_tag_scores[$i], SORT_DESC|SORT_NUMERIC, $token_tag_ids[$i]);
$counter = 0;
for ($j=0; $j<count($token_tag_scores[$i]); $j++)
{
$tag_id = $token_tag_ids[$i][$j];
if ( ! isset($all_tags[$tag_id]) )
{
array_splice($token_tag_ids[$i], $j, 1);
array_splice($token_tag_scores[$i], $j, 1);
$j--;
continue;
}
$counter += $all_tags[$tag_id]['counter'];
if ($counter > 200 && $j>0 && $token_tag_scores[$i][0] > $token_tag_scores[$i][$j] )
{// "many" images in previous tags and starting from this tag is less relevent
array_splice($token_tag_ids[$i], $j);
array_splice($token_tag_scores[$i], $j);
break;
}
}
}
usort($all_tags, 'tag_alpha_compare');
foreach ( $all_tags as &$tag )
{
$tag['name'] = trigger_event('render_tag_name', $tag['name'], $tag);
}
}
/**
* Returns the search results corresponding to a quick/query search.
* A quick/query search returns many items (search is not strict), but results
* are sorted by relevance unless $super_order_by is true. Returns:
* array (
* 'items' => array of matching images
* 'qs' => array(
* 'matching_tags' => array of matching tags
* 'matching_cats' => array of matching categories
* 'matching_cats_no_images' =>array(99) - matching categories without images
* )
* )
*
* @param string $q
* @param bool $super_order_by
* @param string $images_where optional additional restriction on images table
* @return array
*/
function get_quick_search_results($q, $super_order_by, $images_where='')
{
global $user, $conf;
$search_results =
array(
'items' => array(),
'qs' => array('q'=>stripslashes($q)),
);
$q = trim($q);
analyse_qsearch($q, $tokens, $token_modifiers);
if (count($tokens)==0)
{
return $search_results;
}
$debug[] = '<!--'.count($tokens).' tokens';
$q_like_field = '@@__db_field__@@'; //something never in a search
$q_like_clause = get_qsearch_like_clause($tokens, $token_modifiers, $q_like_field );
// Step 1 - first we find matches in #images table ===========================
$where_clauses='MATCH(i.name, i.comment) AGAINST( \''.$q.'\' IN BOOLEAN MODE)';
if (!empty($q_like_clause))
{
$where_clauses .= '
OR '. str_replace($q_like_field, 'CONVERT(file, CHAR)', $q_like_clause);
$where_clauses = '('.$where_clauses.')';
}
$where_clauses = array($where_clauses);
if (!empty($images_where))
{
$where_clauses[]='('.$images_where.')';
}
$where_clauses[] .= get_sql_condition_FandF
(
array( 'visible_images' => 'i.id' ), null, true
);
$query = '
SELECT i.id,
MATCH(i.name, i.comment) AGAINST( \''.$q.'\' IN BOOLEAN MODE) AS weight
FROM '.IMAGES_TABLE.' i
WHERE '.implode("\n AND ", $where_clauses);
$by_weights=array();
$result = pwg_query($query);
while ($row = pwg_db_fetch_assoc($result))
{ // weight is important when sorting images by relevance
if ($row['weight'])
{
$by_weights[(int)$row['id']] = 2*$row['weight'];
}
else
{//full text does not match but file name match
$by_weights[(int)$row['id']] = 2;
}
}
$debug[] = count($by_weights).' fulltext';
if (!empty($by_weights))
{
$debug[] = 'ft score min:'.min($by_weights).' max:'.max($by_weights);
}
// Step 2 - get the tags and the images for tags
get_qsearch_tags($tokens, $token_modifiers, $token_tag_ids, $not_tag_ids, $search_results['qs']['matching_tags']);
$debug[] = count($search_results['qs']['matching_tags']).' tags';
for ($i=0; $i<count($token_tag_ids); $i++)
{
$tag_ids = $token_tag_ids[$i];
$debug[] = count($tag_ids).' unique tags';
if (!empty($tag_ids))
{
$tag_photo_count=0;
$query = '
SELECT image_id FROM '.IMAGE_TAG_TABLE.'
WHERE tag_id IN ('.implode(',',$tag_ids).')
GROUP BY image_id';
$result = pwg_query($query);
while ($row = pwg_db_fetch_assoc($result))
{ // weight is important when sorting images by relevance
$image_id=(int)$row['image_id'];
@$by_weights[$image_id] += 1;
$tag_photo_count++;
}
$debug[] = $tag_photo_count.' photos for tag';
$debug[] = count($by_weights).' photos after';
}
}
// Step 3 - search categories corresponding to the query $q ==================
$query = '
SELECT id, name, permalink, nb_images
FROM '.CATEGORIES_TABLE.'
INNER JOIN '.USER_CACHE_CATEGORIES_TABLE.' ON id=cat_id
WHERE user_id='.$user['id'].'
AND MATCH(name, comment) AGAINST( \''.$q.'\' IN BOOLEAN MODE)'.
get_sql_condition_FandF (
array( 'visible_categories' => 'cat_id' ), "\n AND"
);
$result = pwg_query($query);
while ($row = pwg_db_fetch_assoc($result))
{ // weight is important when sorting images by relevance
if ($row['nb_images']==0)
{
$search_results['qs']['matching_cats_no_images'][] = $row;
}
else
{
$search_results['qs']['matching_cats'][$row['id']] = $row;
}
}
$debug[] = count(@$search_results['qs']['matching_cats']).' albums with images';
if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) )
{
return $search_results;
}
if (!empty($not_tag_ids))
{
$query = '
SELECT image_id FROM '.IMAGE_TAG_TABLE.'
WHERE tag_id IN ('.implode(',',$not_tag_ids).')
GROUP BY image_id';
$result = pwg_query($query);
while ($row = pwg_db_fetch_row($result))
{
$id = $row[0];
unset($by_weights[$id]);
}
$debug[] = count($by_weights).' after not tags';
}
// Step 4 - now we have $by_weights ( array image id => weight ) that need
// permission checks and/or matching categories to get images from
$where_clauses = array();
if ( !empty($by_weights) )
{
$where_clauses[]='i.id IN ('
. implode(',', array_keys($by_weights)) . ')';
}
if ( !empty($search_results['qs']['matching_cats']) )
{
$where_clauses[]='category_id IN ('.
implode(',',array_keys($search_results['qs']['matching_cats'])).')';
}
$where_clauses = array( '('.implode("\n OR ",$where_clauses).')' );
if (!empty($images_where))
{
$where_clauses[]='('.$images_where.')';
}
$where_clauses[] = get_sql_condition_FandF(
array
(
'forbidden_categories' => 'category_id',
'visible_categories' => 'category_id',
'visible_images' => 'i.id'
),
null,true
);
$query = '
SELECT DISTINCT(id)
FROM '.IMAGES_TABLE.' i
INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
WHERE '.implode("\n AND ", $where_clauses)."\n".
$conf['order_by'];
$allowed_images = array_from_query( $query, 'id');
$debug[] = count($allowed_images).' final photo count -->';
global $template;
$template->append('footer_elements', implode(', ', $debug) );
if ( $super_order_by or empty($by_weights) )
{
$search_results['items'] = $allowed_images;
return $search_results;
}
$allowed_images = array_flip( $allowed_images );
$divisor = 5.0 * count($allowed_images);
foreach ($allowed_images as $id=> &$rank )
{
$weight = isset($by_weights[$id]) ? $by_weights[$id] : 1;
$weight -= $rank/$divisor;
$rank = $weight;
}
unset($rank);
arsort($allowed_images, SORT_NUMERIC);
$search_results['items'] = array_keys($allowed_images);
return $search_results;
}
/**
* Returns an array of 'items' corresponding to the search id.
* It can be either a quick search or a regular search.
*
* @param int $search_id
* @param bool $super_order_by
* @param string $images_where optional aditional restriction on images table
* @return array
*/
function get_search_results($search_id, $super_order_by, $images_where='')
{
$search = get_search_array($search_id);
if ( !isset($search['q']) )
{
$result['items'] = get_regular_search_results($search, $images_where);
return $result;
}
else
{
return get_quick_search_results($search['q'], $super_order_by, $images_where);
}
}
?>