<?php
// +-----------------------------------------------------------------------+
// | Piwigo - a PHP based photo gallery                                    |
// +-----------------------------------------------------------------------+
// | Copyright(C) 2008-2014 Piwigo Team                  http://piwigo.org |
// | Copyright(C) 2003-2008 PhpWebGallery Team    http://phpwebgallery.net |
// | Copyright(C) 2002-2003 Pierrick LE GALL   http://le-gall.net/pierrick |
// +-----------------------------------------------------------------------+
// | This program is free software; you can redistribute it and/or modify  |
// | it under the terms of the GNU General Public License as published by  |
// | the Free Software Foundation                                          |
// |                                                                       |
// | This program is distributed in the hope that it will be useful, but   |
// | WITHOUT ANY WARRANTY; without even the implied warranty of            |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU      |
// | General Public License for more details.                              |
// |                                                                       |
// | You should have received a copy of the GNU General Public License     |
// | along with this program; if not, write to the Free Software           |
// | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
// | USA.                                                                  |
// +-----------------------------------------------------------------------+

/**
 * @package functions\search
 */


/**
 * Returns search rules stored into a serialized array in "search"
 * table. Each search rules set is numericaly identified.
 *
 * @param int $search_id
 * @return array
 */
function get_search_array($search_id)
{
  if (!is_numeric($search_id))
  {
    die('Search id must be an integer');
  }

  $query = '
SELECT rules
  FROM '.SEARCH_TABLE.'
  WHERE id = '.$search_id.'
;';
  list($serialized_rules) = pwg_db_fetch_row(pwg_query($query));

  return unserialize($serialized_rules);
}

/**
 * Returns the SQL clause for a search.
 * Transforms the array returned by get_search_array() into SQL sub-query.
 *
 * @param array $search
 * @return string
 */
function get_sql_search_clause($search)
{
  // SQL where clauses are stored in $clauses array during query
  // construction
  $clauses = array();

  foreach (array('file','name','comment','author') as $textfield)
  {
    if (isset($search['fields'][$textfield]))
    {
      $local_clauses = array();
      foreach ($search['fields'][$textfield]['words'] as $word)
      {
        $local_clauses[] = $textfield." LIKE '%".$word."%'";
      }

      // adds brackets around where clauses
      $local_clauses = prepend_append_array_items($local_clauses, '(', ')');

      $clauses[] = implode(
        ' '.$search['fields'][$textfield]['mode'].' ',
        $local_clauses
        );
    }
  }

  if (isset($search['fields']['allwords']))
  {
    $fields = array('file', 'name', 'comment', 'author');
    // in the OR mode, request bust be :
    // ((field1 LIKE '%word1%' OR field2 LIKE '%word1%')
    // OR (field1 LIKE '%word2%' OR field2 LIKE '%word2%'))
    //
    // in the AND mode :
    // ((field1 LIKE '%word1%' OR field2 LIKE '%word1%')
    // AND (field1 LIKE '%word2%' OR field2 LIKE '%word2%'))
    $word_clauses = array();
    foreach ($search['fields']['allwords']['words'] as $word)
    {
      $field_clauses = array();
      foreach ($fields as $field)
      {
        $field_clauses[] = $field." LIKE '%".$word."%'";
      }
      // adds brackets around where clauses
      $word_clauses[] = implode(
        "\n          OR ",
        $field_clauses
        );
    }

    array_walk(
      $word_clauses,
      create_function('&$s','$s="(".$s.")";')
      );

    // make sure the "mode" is either OR or AND
    if ($search['fields']['allwords']['mode'] != 'AND' and $search['fields']['allwords']['mode'] != 'OR')
    {
      $search['fields']['allwords']['mode'] = 'AND';
    }

    $clauses[] = "\n         ".
      implode(
        "\n         ". $search['fields']['allwords']['mode']. "\n         ",
        $word_clauses
        );
  }

  foreach (array('date_available', 'date_creation') as $datefield)
  {
    if (isset($search['fields'][$datefield]))
    {
      $clauses[] = $datefield." = '".$search['fields'][$datefield]['date']."'";
    }

    foreach (array('after','before') as $suffix)
    {
      $key = $datefield.'-'.$suffix;

      if (isset($search['fields'][$key]))
      {
        $clauses[] = $datefield.
          ($suffix == 'after'             ? ' >' : ' <').
          ($search['fields'][$key]['inc'] ? '='  : '').
          " '".$search['fields'][$key]['date']."'";
      }
    }
  }

  if (isset($search['fields']['cat']))
  {
    if ($search['fields']['cat']['sub_inc'])
    {
      // searching all the categories id of sub-categories
      $cat_ids = get_subcat_ids($search['fields']['cat']['words']);
    }
    else
    {
      $cat_ids = $search['fields']['cat']['words'];
    }

    $local_clause = 'category_id IN ('.implode(',', $cat_ids).')';
    $clauses[] = $local_clause;
  }

  // adds brackets around where clauses
  $clauses = prepend_append_array_items($clauses, '(', ')');

  $where_separator =
    implode(
      "\n    ".$search['mode'].' ',
      $clauses
      );

  $search_clause = $where_separator;

  return $search_clause;
}

/**
 * Returns the list of items corresponding to the advanced search array.
 *
 * @param array $search
 * @param string $images_where optional additional restriction on images table
 * @return array
 */
function get_regular_search_results($search, $images_where='')
{
  global $conf;
  $forbidden = get_sql_condition_FandF(
        array
          (
            'forbidden_categories' => 'category_id',
            'visible_categories' => 'category_id',
            'visible_images' => 'id'
          ),
        "\n  AND"
    );

  $items = array();
  $tag_items = array();

  if (isset($search['fields']['tags']))
  {
    $tag_items = get_image_ids_for_tags(
      $search['fields']['tags']['words'],
      $search['fields']['tags']['mode']
      );
  }

  $search_clause = get_sql_search_clause($search);

  if (!empty($search_clause))
  {
    $query = '
SELECT DISTINCT(id)
  FROM '.IMAGES_TABLE.' i
    INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
  WHERE '.$search_clause;
    if (!empty($images_where))
    {
      $query .= "\n  AND ".$images_where;
    }
    $query .= $forbidden.'
  '.$conf['order_by'];
    $items = array_from_query($query, 'id');
  }

  if ( !empty($tag_items) )
  {
    switch ($search['mode'])
    {
      case 'AND':
        if (empty($search_clause))
        {
          $items = $tag_items;
        }
        else
        {
          $items = array_values( array_intersect($items, $tag_items) );
        }
        break;
      case 'OR':
        $before_count = count($items);
        $items = array_unique(
          array_merge(
            $items,
            $tag_items
            )
          );
        break;
    }
  }

  return $items;
}

/**
 * Finds if a char is a letter, a figure or any char of the extended ASCII table (>127).
 *
 * @param char $ch
 * @return bool
 */
function is_word_char($ch)
{
  return ($ch>='0' && $ch<='9') || ($ch>='a' && $ch<='z') || ($ch>='A' && $ch<='Z') || ord($ch)>127;
}

/**
 * Finds if a char is a special token for word start: [{<=*+
 *
 * @param char $ch
 * @return bool
 */
function is_odd_wbreak_begin($ch)
{
  return strpos('[{<=*+', $ch)===false ? false:true;
}

/**
 * Finds if a char is a special token for word end: ]}>=*+
 *
 * @param char $ch
 * @return bool
 */
function is_odd_wbreak_end($ch)
{
  return strpos(']}>=*+', $ch)===false ? false:true;
}


define('QST_QUOTED',         0x01);
define('QST_NOT',            0x02);
define('QST_WILDCARD_BEGIN', 0x04);
define('QST_WILDCARD_END',   0x08);
define('QST_WILDCARD', QST_WILDCARD_BEGIN|QST_WILDCARD_END);

/**
 * Analyzes and splits the quick/query search query $q into tokens.
 * q='john bill' => 2 tokens 'john' 'bill'
 * Special characters for MySql full text search (+,<,>,~) appear in the token modifiers.
 * The query can contain a phrase: 'Pierre "New York"' will return 'pierre' qnd 'new york'.
 *
 * @param string $q
 * @param array &$qtokens
 * @param array &$qtoken_modifiers
 */
function analyse_qsearch($q, &$qtokens, &$qtoken_modifiers)
{
  $q = stripslashes($q);
  $tokens = array();
  $token_modifiers = array();
  $crt_token = "";
  $crt_token_modifier = 0;

  for ($i=0; $i<strlen($q); $i++)
  {
    $ch = $q[$i];
    if ( ($crt_token_modifier&QST_QUOTED)==0)
    {
        if ($ch=='"')
        {
          if (strlen($crt_token))
          {
            $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
            $crt_token = ""; $crt_token_modifier = 0;
          }
          $crt_token_modifier |= QST_QUOTED;
        }
        elseif ( strcspn($ch, '*+-><~')==0 )
        { //special full text modifier
          if (strlen($crt_token))
          {
            $crt_token .= $ch;
          }
          else
          {
            if ( $ch=='*' )
              $crt_token_modifier |= QST_WILDCARD_BEGIN;
            if ( $ch=='-' )
              $crt_token_modifier |= QST_NOT;
          }
        }
        elseif (preg_match('/[\s,.;!\?]+/', $ch))
        { // white space
          if (strlen($crt_token))
          {
            $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
            $crt_token = "";
          }
          $crt_token_modifier = 0;
        }
        else
        {
          $crt_token .= $ch;
        }
    }
    else // qualified with quotes
    {
      if ($ch=='"')
      {
        if ($i+1 < strlen($q) && $q[$i+1]=='*')
        {
          $crt_token_modifier |= QST_WILDCARD_END;
          $i++;
        }
        $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
        $crt_token = ""; $crt_token_modifier = 0;
        $state=0;
      }
      else
        $crt_token .= $ch;
    }
  }

  if (strlen($crt_token))
  {
    $tokens[] = $crt_token;
    $token_modifiers[] = $crt_token_modifier;
  }

  $qtokens = array();
  $qtoken_modifiers = array();
  for ($i=0; $i<count($tokens); $i++)
  {
    if ( !($token_modifiers[$i] & QST_QUOTED) )
    {
      if ( substr($tokens[$i], -1)=='*' )
      {
        $tokens[$i] = rtrim($tokens[$i], '*');
        $token_modifiers[$i] |= QST_WILDCARD_END;
      }
    }
    if ( strlen($tokens[$i])==0)
      continue;
    $qtokens[] = $tokens[$i];
    $qtoken_modifiers[] = $token_modifiers[$i];
  }
}

/**
 * Returns the LIKE SQL clause corresponding to the quick search query
 * that has been split into tokens.
 * for example file LIKE '%john%' OR file LIKE '%bill%'.
 *
 * @param array $tokens
 * @param array $token_modifiers
 * @param string $field
 * @return string|null
 */
function get_qsearch_like_clause($tokens, $token_modifiers, $field)
{
  $clauses = array();
  for ($i=0; $i<count($tokens); $i++)
  {
    $token = trim($tokens[$i], '%');
    if ($token_modifiers[$i]&QST_NOT)
      continue;
    if ( strlen($token)==0 )
      continue;
    $token = addslashes($token);
    $token = str_replace( array('%','_'), array('\\%','\\_'), $token); // escape LIKE specials %_
    $clauses[] = $field.' LIKE \'%'.$token.'%\'';
  }

  return count($clauses) ? '('.implode(' OR ', $clauses).')' : null;
}

/**
 * Returns tags corresponding to the quick search query that has been split into tokens.
 *
 * @param array $tokens
 * @param array $token_modifiers
 * @param array &$token_tag_ids
 * @param array &$not_tag_ids
 * @param array &$all_tags
 */
function get_qsearch_tags($tokens, $token_modifiers, &$token_tag_ids, &$not_tag_ids, &$all_tags)
{
  $token_tag_ids = array_fill(0, count($tokens), array() );
  $not_tag_ids = $all_tags = array();

  $token_tag_scores = $token_tag_ids;
  $transliterated_tokens = array();
  foreach ($tokens as $token)
  {
    $transliterated_tokens[] = transliterate($token);
  }

  $query = '
SELECT t.*, COUNT(image_id) AS counter
  FROM '.TAGS_TABLE.' t
    INNER JOIN '.IMAGE_TAG_TABLE.' ON id=tag_id
  GROUP BY id';
  $result = pwg_query($query);
  while ($tag = pwg_db_fetch_assoc($result))
  {
    $transliterated_tag = transliterate($tag['name']);

    // find how this tag matches query tokens
    for ($i=0; $i<count($tokens); $i++)
    {
      $transliterated_token = $transliterated_tokens[$i];

      $match = false;
      $pos = 0;
      while ( ($pos = strpos($transliterated_tag, $transliterated_token, $pos)) !== false)
      {
        if ( ($token_modifiers[$i]&QST_WILDCARD)==QST_WILDCARD )
        {// wildcard in this token
          $match = 1;
          break;
        }
        $token_len = strlen($transliterated_token);

        // search begin of word
        $wbegin_len=0; $wbegin_char=' ';
        while ($pos-$wbegin_len > 0)
        {
          if (! is_word_char($transliterated_tag[$pos-$wbegin_len-1]) )
          {
            $wbegin_char = $transliterated_tag[$pos-$wbegin_len-1];
            break;
          }
          $wbegin_len++;
        }

        // search end of word
        $wend_len=0; $wend_char=' ';
        while ($pos+$token_len+$wend_len < strlen($transliterated_tag))
        {
          if (! is_word_char($transliterated_tag[$pos+$token_len+$wend_len]) )
          {
            $wend_char = $transliterated_tag[$pos+$token_len+$wend_len];
            break;
          }
          $wend_len++;
        }

        $this_score = 0;
        if ( ($token_modifiers[$i]&QST_WILDCARD)==0 )
        {// no wildcard begin or end
          if ($token_len <= 2)
          {// search for 1 or 2 characters must match exactly to avoid retrieving too much data
            if ($wbegin_len==0 && $wend_len==0 && !is_odd_wbreak_begin($wbegin_char) && !is_odd_wbreak_end($wend_char) )
              $this_score = 1;
          }
          elseif ($token_len == 3)
          {
            if ($wbegin_len==0)
              $this_score = $token_len / ($token_len + $wend_len);
          }
          else
          {
            $this_score = $token_len / ($token_len + 1.1 * $wbegin_len + 0.9 * $wend_len);
          }
        }

        if ($this_score>0)
          $match = max($match, $this_score );
        $pos++;
      }

      if ($match)
      {
        $tag_id = (int)$tag['id'];
        $all_tags[$tag_id] = $tag;
        $token_tag_ids[$i][] = $tag_id;
        $token_tag_scores[$i][] = $match;
      }
    }
  }

  // process not tags
  for ($i=0; $i<count($tokens); $i++)
  {
    if ( ! ($token_modifiers[$i]&QST_NOT) )
      continue;

    array_multisort($token_tag_scores[$i], SORT_DESC|SORT_NUMERIC, $token_tag_ids[$i]);

    for ($j=0; $j<count($token_tag_scores[$i]); $j++)
    {
      if ($token_tag_scores[$i][$j] < 0.8)
        break;
      if ($j>0 && $token_tag_scores[$i][$j] < $token_tag_scores[$i][0])
        break;
      $tag_id = $token_tag_ids[$i][$j];
      if ( isset($all_tags[$tag_id]) )
      {
        unset($all_tags[$tag_id]);
        $not_tag_ids[] = $tag_id;
      }
    }
    $token_tag_ids[$i] = array();
  }

  // process regular tags
  for ($i=0; $i<count($tokens); $i++)
  {
    if ( $token_modifiers[$i]&QST_NOT )
      continue;

    array_multisort($token_tag_scores[$i], SORT_DESC|SORT_NUMERIC, $token_tag_ids[$i]);

    $counter = 0;
    for ($j=0; $j<count($token_tag_scores[$i]); $j++)
    {
      $tag_id = $token_tag_ids[$i][$j];
      if ( ! isset($all_tags[$tag_id]) )
      {
        array_splice($token_tag_ids[$i], $j, 1);
        array_splice($token_tag_scores[$i], $j, 1);
        $j--;
        continue;
      }

      $counter += $all_tags[$tag_id]['counter'];
      if ($counter > 200 && $j>0 && $token_tag_scores[$i][0] > $token_tag_scores[$i][$j] )
      {// "many" images in previous tags and starting from this tag is less relevent
        array_splice($token_tag_ids[$i], $j);
        array_splice($token_tag_scores[$i], $j);
        break;
      }
    }
  }

  usort($all_tags, 'tag_alpha_compare');
  foreach ( $all_tags as &$tag )
  {
    $tag['name'] = trigger_event('render_tag_name', $tag['name'], $tag);
  }
}

/**
 * Returns the search results corresponding to a quick/query search.
 * A quick/query search returns many items (search is not strict), but results
 * are sorted by relevance unless $super_order_by is true. Returns:
 *  array (
 *    'items' => array of matching images
 *    'qs'    => array(
 *      'matching_tags' => array of matching tags
 *      'matching_cats' => array of matching categories
 *      'matching_cats_no_images' =>array(99) - matching categories without images
 *      )
 *    )
 *
 * @param string $q
 * @param bool $super_order_by
 * @param string $images_where optional additional restriction on images table
 * @return array
 */
function get_quick_search_results($q, $super_order_by, $images_where='')
{
  global $user, $conf;

  $search_results =
    array(
      'items' => array(),
      'qs' => array('q'=>stripslashes($q)),
    );
  $q = trim($q);
  analyse_qsearch($q, $tokens, $token_modifiers);
  if (count($tokens)==0)
  {
    return $search_results;
  }
  $debug[] = '<!--'.count($tokens).' tokens';

  $q_like_field = '@@__db_field__@@'; //something never in a search
  $q_like_clause = get_qsearch_like_clause($tokens, $token_modifiers, $q_like_field );

  // Step 1 - first we find matches in #images table ===========================
  $where_clauses='MATCH(i.name, i.comment) AGAINST( \''.$q.'\' IN BOOLEAN MODE)';
  if (!empty($q_like_clause))
  {
    $where_clauses .= '
    OR '. str_replace($q_like_field, 'CONVERT(file, CHAR)', $q_like_clause);
    $where_clauses = '('.$where_clauses.')';
  }
  $where_clauses = array($where_clauses);
  if (!empty($images_where))
  {
    $where_clauses[]='('.$images_where.')';
  }
  $where_clauses[] .= get_sql_condition_FandF
      (
        array( 'visible_images' => 'i.id' ), null, true
      );
  $query = '
SELECT i.id,
    MATCH(i.name, i.comment) AGAINST( \''.$q.'\' IN BOOLEAN MODE) AS weight
  FROM '.IMAGES_TABLE.' i
  WHERE '.implode("\n AND ", $where_clauses);

  $by_weights=array();
  $result = pwg_query($query);
  while ($row = pwg_db_fetch_assoc($result))
  { // weight is important when sorting images by relevance
    if ($row['weight'])
    {
      $by_weights[(int)$row['id']] =  2*$row['weight'];
    }
    else
    {//full text does not match but file name match
      $by_weights[(int)$row['id']] =  2;
    }
  }
  $debug[] = count($by_weights).' fulltext';
  if (!empty($by_weights))
  {
    $debug[] = 'ft score min:'.min($by_weights).' max:'.max($by_weights);
  }


  // Step 2 - get the tags and the images for tags
  get_qsearch_tags($tokens, $token_modifiers, $token_tag_ids, $not_tag_ids, $search_results['qs']['matching_tags']);
  $debug[] = count($search_results['qs']['matching_tags']).' tags';

  for ($i=0; $i<count($token_tag_ids); $i++)
  {
    $tag_ids = $token_tag_ids[$i];
    $debug[] = count($tag_ids).' unique tags';

    if (!empty($tag_ids))
    {
      $tag_photo_count=0;
      $query = '
SELECT image_id FROM '.IMAGE_TAG_TABLE.'
  WHERE tag_id IN ('.implode(',',$tag_ids).')
  GROUP BY image_id';
      $result = pwg_query($query);
      while ($row = pwg_db_fetch_assoc($result))
      { // weight is important when sorting images by relevance
        $image_id=(int)$row['image_id'];
        @$by_weights[$image_id] += 1;
        $tag_photo_count++;
      }
      $debug[] = $tag_photo_count.' photos for tag';
      $debug[] = count($by_weights).' photos after';
    }
  }

  // Step 3 - search categories corresponding to the query $q ==================
  $query = '
SELECT id, name, permalink, nb_images
  FROM '.CATEGORIES_TABLE.'
    INNER JOIN '.USER_CACHE_CATEGORIES_TABLE.' ON id=cat_id
  WHERE user_id='.$user['id'].'
    AND MATCH(name, comment) AGAINST( \''.$q.'\' IN BOOLEAN MODE)'.
  get_sql_condition_FandF (
      array( 'visible_categories' => 'cat_id' ), "\n    AND"
    );
  $result = pwg_query($query);
  while ($row = pwg_db_fetch_assoc($result))
  { // weight is important when sorting images by relevance
    if ($row['nb_images']==0)
    {
      $search_results['qs']['matching_cats_no_images'][] = $row;
    }
    else
    {
      $search_results['qs']['matching_cats'][$row['id']] = $row;
    }
  }
  $debug[] = count(@$search_results['qs']['matching_cats']).' albums with images';

  if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) )
  {
    return $search_results;
  }

  if (!empty($not_tag_ids))
  {
    $query = '
SELECT image_id FROM '.IMAGE_TAG_TABLE.'
  WHERE tag_id IN ('.implode(',',$not_tag_ids).')
  GROUP BY image_id';
      $result = pwg_query($query);
      while ($row = pwg_db_fetch_row($result))
      {
        $id = $row[0];
        unset($by_weights[$id]);
      }
      $debug[] = count($by_weights).' after not tags';
  }
  // Step 4 - now we have $by_weights ( array image id => weight ) that need
  // permission checks and/or matching categories to get images from
  $where_clauses = array();
  if ( !empty($by_weights) )
  {
    $where_clauses[]='i.id IN ('
      . implode(',', array_keys($by_weights)) . ')';
  }
  if ( !empty($search_results['qs']['matching_cats']) )
  {
    $where_clauses[]='category_id IN ('.
      implode(',',array_keys($search_results['qs']['matching_cats'])).')';
  }
  $where_clauses = array( '('.implode("\n    OR ",$where_clauses).')' );
  if (!empty($images_where))
  {
    $where_clauses[]='('.$images_where.')';
  }
  $where_clauses[] = get_sql_condition_FandF(
      array
        (
          'forbidden_categories' => 'category_id',
          'visible_categories' => 'category_id',
          'visible_images' => 'i.id'
        ),
      null,true
    );

  $query = '
SELECT DISTINCT(id)
  FROM '.IMAGES_TABLE.' i
    INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
  WHERE '.implode("\n AND ", $where_clauses)."\n".
  $conf['order_by'];

  $allowed_images = array_from_query( $query, 'id');

  $debug[] = count($allowed_images).' final photo count -->';
  global $template;
  $template->append('footer_elements', implode(', ', $debug) );

  if ( $super_order_by or empty($by_weights) )
  {
    $search_results['items'] = $allowed_images;
    return $search_results;
  }

  $allowed_images = array_flip( $allowed_images );
  $divisor = 5.0 * count($allowed_images);
  foreach ($allowed_images as $id=> &$rank )
  {
    $weight = isset($by_weights[$id]) ? $by_weights[$id] : 1;
    $weight -= $rank/$divisor;
    $rank = $weight;
  }
  unset($rank);

  arsort($allowed_images, SORT_NUMERIC);
  $search_results['items'] = array_keys($allowed_images);
  return $search_results;
}

/**
 * Returns an array of 'items' corresponding to the search id.
 * It can be either a quick search or a regular search.
 *
 * @param int $search_id
 * @param bool $super_order_by
 * @param string $images_where optional aditional restriction on images table
 * @return array
 */
function get_search_results($search_id, $super_order_by, $images_where='')
{
  $search = get_search_array($search_id);
  if ( !isset($search['q']) )
  {
    $result['items'] = get_regular_search_results($search, $images_where);
    return $result;
  }
  else
  {
    return get_quick_search_results($search['q'], $super_order_by, $images_where);
  }
}

?>