diff --git a/conf/settings.yml b/conf/settings.yml index 3ddf222..c7f659e 100644 --- a/conf/settings.yml +++ b/conf/settings.yml @@ -3,13 +3,13 @@ general: search: safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict - autocomplete : "duckduckgo" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default + autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default server: port : 8888 bind_address : "127.0.0.1" # address to listen on secret_key : "ultrasecretkey" # change this! - base_url : ynhbaseurl # Set custom base_url. Possible values: False or "https://your.custom.host/location/" + base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" image_proxy : False # Proxying image results through searx ui: @@ -274,6 +274,11 @@ engines: engine : yahoo shortcut : yh + - name : yandex + engine : yandex + shortcut : yn + disabled : True + - name : yahoo news engine : yahoo_news shortcut : yhn @@ -311,7 +316,7 @@ engines: locales: en : English de : Deutsch - he : Hebrew + he : עברית hu : Magyar fr : Français es : Español diff --git a/sources/AUTHORS.rst b/sources/AUTHORS.rst index 3e719fe..632e7f0 100644 --- a/sources/AUTHORS.rst +++ b/sources/AUTHORS.rst @@ -38,3 +38,6 @@ generally made searx better: - Niklas Haas - @underr - Emmanuel Benazera +- @GreenLunar +- Noemi Vanyi +- Kang-min Liu diff --git a/sources/CHANGELOG.rst b/sources/CHANGELOG.rst index f2c192d..2e333f0 100644 --- a/sources/CHANGELOG.rst +++ b/sources/CHANGELOG.rst @@ -1,3 +1,23 @@ +0.8.1 2015.12.22 +================ + +- More efficient result parsing +- Rewritten google engine to prevent app crashes +- Other engine fixes/tweaks + + - Bing news + - Btdigg + - Gigablast + - Google images + - Startpage + + +News +~~~~ + +New documentation page is available: https://asciimoo.github.io/searx + + 0.8.0 2015.09.08 ================ @@ -44,6 +64,7 @@ News @dalf joined the maintainer "team" + 0.7.0 2015.02.03 ================ diff --git a/sources/Makefile b/sources/Makefile index 0568555..5573e54 100644 --- a/sources/Makefile +++ b/sources/Makefile @@ -60,7 +60,7 @@ locales: @pybabel compile -d searx/translations clean: - @rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \ - searx.egg-info lib include .coverage coverage searx/static/themes/default/css/*.css + @rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs eggs \ + searx.egg-info lib include .coverage coverage .PHONY: all tests robot flake8 coverage production minimal styles locales clean diff --git a/sources/README.rst b/sources/README.rst index 669741e..cf1263c 100644 --- a/sources/README.rst +++ b/sources/README.rst @@ -96,7 +96,7 @@ remember 'untested code is broken code'. Runs robot (Selenium) tests, you must have ``firefox`` installed because this functional tests actually run the browser and perform operations on it. Also searx is executed with -`settings\_robot `__. +`settings\_robot `__. ``make flake8`` ''''''''''''''' diff --git a/sources/base.cfg b/sources/base.cfg index 4ed6683..6e46e9e 100644 --- a/sources/base.cfg +++ b/sources/base.cfg @@ -1,16 +1,10 @@ [buildout] extends = versions.cfg -versions = versions unzip = true newest = false -extends = versions.cfg -versions = versions prefer-final = true develop = . -extensions = - buildout_versions - eggs = searx diff --git a/sources/bootstrap.py b/sources/bootstrap.py index d5e8be1..a459921 100644 --- a/sources/bootstrap.py +++ b/sources/bootstrap.py @@ -18,75 +18,17 @@ The script accepts buildout command-line options, so you can use the -c option to specify an alternate configuration file. """ -import os, shutil, sys, tempfile, urllib, urllib2, subprocess +import os +import shutil +import sys +import tempfile + from optparse import OptionParser -if sys.platform == 'win32': - def quote(c): - if ' ' in c: - return '"%s"' % c # work around spawn lamosity on windows - else: - return c -else: - quote = str +__version__ = '2015-07-01' +# See zc.buildout's changelog if this version is up to date. -# See zc.buildout.easy_install._has_broken_dash_S for motivation and comments. -stdout, stderr = subprocess.Popen( - [sys.executable, '-Sc', - 'try:\n' - ' import ConfigParser\n' - 'except ImportError:\n' - ' print 1\n' - 'else:\n' - ' print 0\n'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() -has_broken_dash_S = bool(int(stdout.strip())) - -# In order to be more robust in the face of system Pythons, we want to -# run without site-packages loaded. This is somewhat tricky, in -# particular because Python 2.6's distutils imports site, so starting -# with the -S flag is not sufficient. However, we'll start with that: -if not has_broken_dash_S and 'site' in sys.modules: - # We will restart with python -S. - args = sys.argv[:] - args[0:0] = [sys.executable, '-S'] - args = map(quote, args) - os.execv(sys.executable, args) -# Now we are running with -S. We'll get the clean sys.path, import site -# because distutils will do it later, and then reset the path and clean -# out any namespace packages from site-packages that might have been -# loaded by .pth files. -clean_path = sys.path[:] -import site # imported because of its side effects -sys.path[:] = clean_path -for k, v in sys.modules.items(): - if k in ('setuptools', 'pkg_resources') or ( - hasattr(v, '__path__') and - len(v.__path__) == 1 and - not os.path.exists(os.path.join(v.__path__[0], '__init__.py'))): - # This is a namespace package. Remove it. - sys.modules.pop(k) - -is_jython = sys.platform.startswith('java') - -setuptools_source = 'http://peak.telecommunity.com/dist/ez_setup.py' -distribute_source = 'http://python-distribute.org/distribute_setup.py' -distribute_source = 'https://bitbucket.org/pypa/setuptools/raw/f657df1f1ed46596d236376649c99a470662b4ba/distribute_setup.py' - -# parsing arguments -def normalize_to_url(option, opt_str, value, parser): - if value: - if '://' not in value: # It doesn't smell like a URL. - value = 'file://%s' % ( - urllib.pathname2url( - os.path.abspath(os.path.expanduser(value))),) - if opt_str == '--download-base' and not value.endswith('/'): - # Download base needs a trailing slash to make the world happy. - value += '/' - else: - value = None - name = opt_str[2:].replace('-', '_') - setattr(parser.values, name, value) +tmpeggs = tempfile.mkdtemp(prefix='bootstrap-') usage = '''\ [DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options] @@ -96,31 +38,14 @@ Bootstraps a buildout-based project. Simply run this script in a directory containing a buildout.cfg, using the Python that you want bin/buildout to use. -Note that by using --setup-source and --download-base to point to -local resources, you can keep this script from going over the network. +Note that by using --find-links to point to local resources, you can keep +this script from going over the network. ''' parser = OptionParser(usage=usage) -parser.add_option("-v", "--version", dest="version", - help="use a specific zc.buildout version") -parser.add_option("-d", "--distribute", - action="store_true", dest="use_distribute", default=False, - help="Use Distribute rather than Setuptools.") -parser.add_option("--setup-source", action="callback", dest="setup_source", - callback=normalize_to_url, nargs=1, type="string", - help=("Specify a URL or file location for the setup file. " - "If you use Setuptools, this will default to " + - setuptools_source + "; if you use Distribute, this " - "will default to " + distribute_source + ".")) -parser.add_option("--download-base", action="callback", dest="download_base", - callback=normalize_to_url, nargs=1, type="string", - help=("Specify a URL or directory for downloading " - "zc.buildout and either Setuptools or Distribute. " - "Defaults to PyPI.")) -parser.add_option("--eggs", - help=("Specify a directory for storing eggs. Defaults to " - "a temporary directory that is deleted when the " - "bootstrap script completes.")) +parser.add_option("--version", + action="store_true", default=False, + help=("Return bootstrap.py version.")) parser.add_option("-t", "--accept-buildout-test-releases", dest='accept_buildout_test_releases', action="store_true", default=False, @@ -130,95 +55,117 @@ parser.add_option("-t", "--accept-buildout-test-releases", "extensions for you. If you use this flag, " "bootstrap and buildout will get the newest releases " "even if they are alphas or betas.")) -parser.add_option("-c", None, action="store", dest="config_file", - help=("Specify the path to the buildout configuration " - "file to be used.")) +parser.add_option("-c", "--config-file", + help=("Specify the path to the buildout configuration " + "file to be used.")) +parser.add_option("-f", "--find-links", + help=("Specify a URL to search for buildout releases")) +parser.add_option("--allow-site-packages", + action="store_true", default=False, + help=("Let bootstrap.py use existing site packages")) +parser.add_option("--buildout-version", + help="Use a specific zc.buildout version") +parser.add_option("--setuptools-version", + help="Use a specific setuptools version") +parser.add_option("--setuptools-to-dir", + help=("Allow for re-use of existing directory of " + "setuptools versions")) options, args = parser.parse_args() +if options.version: + print("bootstrap.py version %s" % __version__) + sys.exit(0) -if options.eggs: - eggs_dir = os.path.abspath(os.path.expanduser(options.eggs)) -else: - eggs_dir = tempfile.mkdtemp() -if options.setup_source is None: - if options.use_distribute: - options.setup_source = distribute_source - else: - options.setup_source = setuptools_source - -if options.accept_buildout_test_releases: - args.insert(0, 'buildout:accept-buildout-test-releases=true') +###################################################################### +# load/install setuptools try: - import pkg_resources - import setuptools # A flag. Sometimes pkg_resources is installed alone. - if not hasattr(pkg_resources, '_distribute'): - raise ImportError + from urllib.request import urlopen except ImportError: - ez_code = urllib2.urlopen( - options.setup_source).read().replace('\r\n', '\n') - ez = {} - exec ez_code in ez - setup_args = dict(to_dir=eggs_dir, download_delay=0) - if options.download_base: - setup_args['download_base'] = options.download_base - if options.use_distribute: - setup_args['no_fake'] = True - if sys.version_info[:2] == (2, 4): - setup_args['version'] = '0.6.32' - ez['use_setuptools'](**setup_args) - if 'pkg_resources' in sys.modules: - reload(sys.modules['pkg_resources']) - import pkg_resources - # This does not (always?) update the default working set. We will - # do it. - for path in sys.path: - if path not in pkg_resources.working_set.entries: - pkg_resources.working_set.add_entry(path) + from urllib2 import urlopen -cmd = [quote(sys.executable), - '-c', - quote('from setuptools.command.easy_install import main; main()'), - '-mqNxd', - quote(eggs_dir)] - -if not has_broken_dash_S: - cmd.insert(1, '-S') - -find_links = options.download_base -if not find_links: - find_links = os.environ.get('bootstrap-testing-find-links') -if not find_links and options.accept_buildout_test_releases: - find_links = 'http://downloads.buildout.org/' -if find_links: - cmd.extend(['-f', quote(find_links)]) - -if options.use_distribute: - setup_requirement = 'distribute' +ez = {} +if os.path.exists('ez_setup.py'): + exec(open('ez_setup.py').read(), ez) else: - setup_requirement = 'setuptools' + exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez) + +if not options.allow_site_packages: + # ez_setup imports site, which adds site packages + # this will remove them from the path to ensure that incompatible versions + # of setuptools are not in the path + import site + # inside a virtualenv, there is no 'getsitepackages'. + # We can't remove these reliably + if hasattr(site, 'getsitepackages'): + for sitepackage_path in site.getsitepackages(): + # Strip all site-packages directories from sys.path that + # are not sys.prefix; this is because on Windows + # sys.prefix is a site-package directory. + if sitepackage_path != sys.prefix: + sys.path[:] = [x for x in sys.path + if sitepackage_path not in x] + +setup_args = dict(to_dir=tmpeggs, download_delay=0) + +if options.setuptools_version is not None: + setup_args['version'] = options.setuptools_version +if options.setuptools_to_dir is not None: + setup_args['to_dir'] = options.setuptools_to_dir + +ez['use_setuptools'](**setup_args) +import setuptools +import pkg_resources + +# This does not (always?) update the default working set. We will +# do it. +for path in sys.path: + if path not in pkg_resources.working_set.entries: + pkg_resources.working_set.add_entry(path) + +###################################################################### +# Install buildout + ws = pkg_resources.working_set -setup_requirement_path = ws.find( - pkg_resources.Requirement.parse(setup_requirement)).location -env = dict( - os.environ, - PYTHONPATH=setup_requirement_path) + +setuptools_path = ws.find( + pkg_resources.Requirement.parse('setuptools')).location + +# Fix sys.path here as easy_install.pth added before PYTHONPATH +cmd = [sys.executable, '-c', + 'import sys; sys.path[0:0] = [%r]; ' % setuptools_path + + 'from setuptools.command.easy_install import main; main()', + '-mZqNxd', tmpeggs] + +find_links = os.environ.get( + 'bootstrap-testing-find-links', + options.find_links or + ('http://downloads.buildout.org/' + if options.accept_buildout_test_releases else None) + ) +if find_links: + cmd.extend(['-f', find_links]) requirement = 'zc.buildout' -version = options.version +version = options.buildout_version if version is None and not options.accept_buildout_test_releases: # Figure out the most recent final version of zc.buildout. import setuptools.package_index _final_parts = '*final-', '*final' def _final_version(parsed_version): - for part in parsed_version: - if (part[:1] == '*') and (part not in _final_parts): - return False - return True + try: + return not parsed_version.is_prerelease + except AttributeError: + # Older setuptools + for part in parsed_version: + if (part[:1] == '*') and (part not in _final_parts): + return False + return True + index = setuptools.package_index.PackageIndex( - search_path=[setup_requirement_path]) + search_path=[setuptools_path]) if find_links: index.add_find_links((find_links,)) req = pkg_resources.Requirement.parse(requirement) @@ -227,8 +174,6 @@ if version is None and not options.accept_buildout_test_releases: bestv = None for dist in index[req.project_name]: distv = dist.parsed_version - if distv >= pkg_resources.parse_version('2dev'): - continue if _final_version(distv): if bestv is None or distv > bestv: best = [dist] @@ -238,40 +183,28 @@ if version is None and not options.accept_buildout_test_releases: if best: best.sort() version = best[-1].version - if version: - requirement += '=='+version -else: - requirement += '<2dev' - + requirement = '=='.join((requirement, version)) cmd.append(requirement) -if is_jython: - import subprocess - exitcode = subprocess.Popen(cmd, env=env).wait() -else: # Windows prefers this, apparently; otherwise we would prefer subprocess - exitcode = os.spawnle(*([os.P_WAIT, sys.executable] + cmd + [env])) -if exitcode != 0: - sys.stdout.flush() - sys.stderr.flush() - print ("An error occurred when trying to install zc.buildout. " - "Look above this message for any errors that " - "were output by easy_install.") - sys.exit(exitcode) +import subprocess +if subprocess.call(cmd) != 0: + raise Exception( + "Failed to execute command:\n%s" % repr(cmd)[1:-1]) -ws.add_entry(eggs_dir) +###################################################################### +# Import and run buildout + +ws.add_entry(tmpeggs) ws.require(requirement) import zc.buildout.buildout -# If there isn't already a command in the args, add bootstrap if not [a for a in args if '=' not in a]: args.append('bootstrap') - -# if -c was provided, we push it back into args for buildout's main function +# if -c was provided, we push it back into args for buildout' main function if options.config_file is not None: args[0:0] = ['-c', options.config_file] zc.buildout.buildout.main(args) -if not options.eggs: # clean up temporary egg directory - shutil.rmtree(eggs_dir) +shutil.rmtree(tmpeggs) diff --git a/sources/searx/engines/bing_news.py b/sources/searx/engines/bing_news.py index 943bf88..a2397c4 100644 --- a/sources/searx/engines/bing_news.py +++ b/sources/searx/engines/bing_news.py @@ -68,7 +68,7 @@ def request(query, params): def response(resp): results = [] - rss = etree.fromstring(resp.text) + rss = etree.fromstring(resp.content) ns = rss.nsmap diff --git a/sources/searx/engines/btdigg.py b/sources/searx/engines/btdigg.py index bde8661..192ed6e 100644 --- a/sources/searx/engines/btdigg.py +++ b/sources/searx/engines/btdigg.py @@ -38,7 +38,7 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.text) + dom = html.fromstring(resp.content) search_res = dom.xpath('//div[@id="search_res"]/table/tr') diff --git a/sources/searx/engines/gigablast.py b/sources/searx/engines/gigablast.py index b852de9..3fef102 100644 --- a/sources/searx/engines/gigablast.py +++ b/sources/searx/engines/gigablast.py @@ -1,8 +1,8 @@ """ Gigablast (Web) - @website http://gigablast.com - @provide-api yes (http://gigablast.com/api.html) + @website https://gigablast.com + @provide-api yes (https://gigablast.com/api.html) @using-api yes @results XML @@ -13,6 +13,8 @@ from urllib import urlencode from cgi import escape from lxml import etree +from random import randint +from time import time # engine dependent config categories = ['general'] @@ -20,8 +22,8 @@ paging = True number_of_results = 5 # search-url, invalid HTTPS certificate -base_url = 'http://gigablast.com/' -search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' +base_url = 'https://gigablast.com/' +search_string = 'search?{query}&n={number_of_results}&s={offset}&format=xml&qh=0&rxiyd={rxiyd}&rand={rand}' # specific xpath variables results_xpath = '//response//result' @@ -37,7 +39,9 @@ def request(query, params): search_path = search_string.format( query=urlencode({'q': query}), offset=offset, - number_of_results=number_of_results) + number_of_results=number_of_results, + rxiyd=randint(10000, 10000000), + rand=int(time())) params['url'] = base_url + search_path diff --git a/sources/searx/engines/google.py b/sources/searx/engines/google.py index 0e78a9e..e822603 100644 --- a/sources/searx/engines/google.py +++ b/sources/searx/engines/google.py @@ -9,11 +9,14 @@ # @parse url, title, content, suggestion import re +from cgi import escape from urllib import urlencode from urlparse import urlparse, parse_qsl -from lxml import html -from searx.poolrequests import get +from lxml import html, etree from searx.engines.xpath import extract_text, extract_url +from searx.search import logger + +logger = logger.getChild('google engine') # engine dependent config @@ -87,7 +90,7 @@ url_map = 'https://www.openstreetmap.org/'\ search_path = '/search' search_url = ('https://{hostname}' + search_path + - '?{query}&start={offset}&gbv=1') + '?{query}&start={offset}&gbv=1&gws_rd=cr') # other URLs map_hostname_start = 'maps.google.' @@ -125,27 +128,6 @@ image_img_src_xpath = './img/@src' property_address = "Address" property_phone = "Phone number" -# cookies -pref_cookie = '' -nid_cookie = {} - - -# see https://support.google.com/websearch/answer/873?hl=en -def get_google_pref_cookie(): - global pref_cookie - if pref_cookie == '': - resp = get('https://www.google.com/ncr', allow_redirects=False) - pref_cookie = resp.cookies["PREF"] - return pref_cookie - - -def get_google_nid_cookie(google_hostname): - global nid_cookie - if google_hostname not in nid_cookie: - resp = get('https://' + google_hostname) - nid_cookie[google_hostname] = resp.cookies.get("NID", None) - return nid_cookie[google_hostname] - # remove google-specific tracking-url def parse_url(url_string, google_hostname): @@ -167,7 +149,7 @@ def parse_url(url_string, google_hostname): def extract_text_from_dom(result, xpath): r = result.xpath(xpath) if len(r) > 0: - return extract_text(r[0]) + return escape(extract_text(r[0])) return None @@ -197,9 +179,6 @@ def request(query, params): params['headers']['Accept-Language'] = language params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' - if google_hostname == default_hostname: - params['cookies']['PREF'] = get_google_pref_cookie() - params['cookies']['NID'] = get_google_nid_cookie(google_hostname) params['google_hostname'] = google_hostname @@ -224,8 +203,8 @@ def response(resp): # parse results for result in dom.xpath(results_xpath): - title = extract_text(result.xpath(title_xpath)[0]) try: + title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) @@ -268,12 +247,13 @@ def response(resp): 'content': content }) except: + logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion - results.append({'suggestion': extract_text(suggestion)}) + results.append({'suggestion': escape(extract_text(suggestion))}) # return results return results diff --git a/sources/searx/engines/google_images.py b/sources/searx/engines/google_images.py index 85963a1..9d51428 100644 --- a/sources/searx/engines/google_images.py +++ b/sources/searx/engines/google_images.py @@ -2,41 +2,42 @@ Google (Images) @website https://www.google.com - @provide-api yes (https://developers.google.com/web-search/docs/), - deprecated! + @provide-api yes (https://developers.google.com/custom-search/) - @using-api yes - @results JSON - @stable yes (but deprecated) + @using-api no + @results HTML chunks with JSON inside + @stable no @parse url, title, img_src """ -from urllib import urlencode, unquote +from urllib import urlencode +from urlparse import parse_qs from json import loads +from lxml import html # engine dependent config categories = ['images'] paging = True safesearch = True -# search-url -url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe={safesearch}&filter=off&{query}' +search_url = 'https://www.google.com/search'\ + '?{query}'\ + '&tbm=isch'\ + '&ijn=1'\ + '&start={offset}' # do search-request def request(query, params): - offset = (params['pageno'] - 1) * 8 - - if params['safesearch'] == 0: - safesearch = 'off' - else: - safesearch = 'on' + offset = (params['pageno'] - 1) * 100 params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch) + if safesearch and params['safesearch']: + params['url'] += '&' + urlencode({'safe': 'active'}) + return params @@ -44,29 +45,26 @@ def request(query, params): def response(resp): results = [] - search_res = loads(resp.text) - - # return empty array if there are no results - if not search_res.get('responseData', {}).get('results'): - return [] + dom = html.fromstring(resp.text) # parse results - for result in search_res['responseData']['results']: - href = result['originalContextUrl'] - title = result['title'] - if 'url' not in result: - continue - thumbnail_src = result['tbUrl'] + for result in dom.xpath('//div[@data-ved]'): + data_url = result.xpath('./a/@href')[0] + data_query = {k: v[0] for k, v in parse_qs(data_url.split('?', 1)[1]).iteritems()} + + metadata = loads(result.xpath('./div[@class="rg_meta"]/text()')[0]) + + thumbnail_src = metadata['tu'] # http to https thumbnail_src = thumbnail_src.replace("http://", "https://") # append result - results.append({'url': href, - 'title': title, - 'content': result['content'], - 'thumbnail_src': thumbnail_src, - 'img_src': unquote(result['url']), + results.append({'url': data_query['imgrefurl'], + 'title': metadata['pt'], + 'content': metadata['s'], + 'thumbnail_src': metadata['tu'], + 'img_src': data_query['imgurl'], 'template': 'images.html'}) # return results diff --git a/sources/searx/engines/startpage.py b/sources/searx/engines/startpage.py index 7d58f7f..a91cafa 100644 --- a/sources/searx/engines/startpage.py +++ b/sources/searx/engines/startpage.py @@ -12,6 +12,8 @@ from lxml import html from cgi import escape +from dateutil import parser +from datetime import datetime, timedelta import re from searx.engines.xpath import extract_text @@ -79,15 +81,44 @@ def response(resp): title = escape(extract_text(link)) - if result.xpath('./p[@class="desc"]'): - content = escape(extract_text(result.xpath('./p[@class="desc"]'))) + if result.xpath('./p[@class="desc clk"]'): + content = escape(extract_text(result.xpath('./p[@class="desc clk"]'))) else: content = '' - # append result - results.append({'url': url, - 'title': title, - 'content': content}) + published_date = None + + # check if search result starts with something like: "2 Sep 2014 ... " + if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): + date_pos = content.find('...')+4 + date_string = content[0:date_pos-5] + published_date = parser.parse(date_string, dayfirst=True) + + # fix content string + content = content[date_pos:] + + # check if search result starts with something like: "5 days ago ... " + elif re.match("^[0-9]+ days? ago \.\.\. ", content): + date_pos = content.find('...')+4 + date_string = content[0:date_pos-5] + + # calculate datetime + published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) + + # fix content string + content = content[date_pos:] + + if published_date: + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'publishedDate': published_date}) + else: + # append result + results.append({'url': url, + 'title': title, + 'content': content}) # return results return results diff --git a/sources/searx/engines/yandex.py b/sources/searx/engines/yandex.py new file mode 100644 index 0000000..edc6ad5 --- /dev/null +++ b/sources/searx/engines/yandex.py @@ -0,0 +1,62 @@ +""" + Yahoo (Web) + + @website https://yandex.ru/ + @provide-api ? + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content +""" + +from urllib import urlencode +from lxml import html +from searx.search import logger + +logger = logger.getChild('yandex engine') + +# engine dependent config +categories = ['general'] +paging = True +language_support = True # TODO + +default_tld = 'com' +language_map = {'ru': 'ru', + 'ua': 'uk', + 'tr': 'com.tr'} + +# search-url +base_url = 'https://yandex.{tld}/' +search_url = 'search/?{query}&p={page}' + +results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]' +url_xpath = './/h2/a/@href' +title_xpath = './/h2/a//text()' +content_xpath = './/div[@class="serp-item__text"]//text()' + + +def request(query, params): + lang = params['language'].split('_')[0] + host = base_url.format(tld=language_map.get(lang) or default_tld) + params['url'] = host + search_url.format(page=params['pageno']-1, + query=urlencode({'text': query})) + return params + + +# get response from search-request +def response(resp): + dom = html.fromstring(resp.text) + results = [] + + for result in dom.xpath(results_xpath): + try: + res = {'url': result.xpath(url_xpath)[0], + 'title': ''.join(result.xpath(title_xpath)), + 'content': ''.join(result.xpath(content_xpath))} + except: + logger.exception('yandex parse crash') + continue + + results.append(res) + + return results diff --git a/sources/searx/plugins/self_info.py b/sources/searx/plugins/self_info.py index 5ca9945..dc6b7cd 100644 --- a/sources/searx/plugins/self_info.py +++ b/sources/searx/plugins/self_info.py @@ -35,10 +35,10 @@ def post_search(request, ctx): ip = x_forwarded_for[0] else: ip = request.remote_addr - ctx['search'].answers.clear() - ctx['search'].answers.add(ip) + ctx['search'].result_container.answers.clear() + ctx['search'].result_container.answers.add(ip) elif p.match(ctx['search'].query): ua = request.user_agent - ctx['search'].answers.clear() - ctx['search'].answers.add(ua) + ctx['search'].result_container.answers.clear() + ctx['search'].result_container.answers.add(ua) return True diff --git a/sources/searx/poolrequests.py b/sources/searx/poolrequests.py index c44bdc7..4761f6a 100644 --- a/sources/searx/poolrequests.py +++ b/sources/searx/poolrequests.py @@ -1,5 +1,7 @@ import requests + from itertools import cycle +from threading import RLock from searx import settings @@ -55,9 +57,10 @@ class SessionSinglePool(requests.Session): super(SessionSinglePool, self).__init__() # reuse the same adapters - self.adapters.clear() - self.mount('https://', next(https_adapters)) - self.mount('http://', next(http_adapters)) + with RLock(): + self.adapters.clear() + self.mount('https://', next(https_adapters)) + self.mount('http://', next(http_adapters)) def close(self): """Call super, but clear adapters since there are managed globaly""" @@ -67,7 +70,6 @@ class SessionSinglePool(requests.Session): def request(method, url, **kwargs): """same as requests/requests/api.py request(...) except it use SessionSinglePool and force proxies""" - global settings session = SessionSinglePool() kwargs['proxies'] = settings['outgoing'].get('proxies', None) response = session.request(method=method, url=url, **kwargs) diff --git a/sources/searx/results.py b/sources/searx/results.py new file mode 100644 index 0000000..bc656f2 --- /dev/null +++ b/sources/searx/results.py @@ -0,0 +1,239 @@ +import re +from collections import defaultdict +from operator import itemgetter +from threading import RLock +from urlparse import urlparse, unquote +from searx.engines import engines + +CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile('[,;:!?\./\\\\ ()-_]', re.M | re.U) +WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) + + +# return the meaningful length of the content for a result +def result_content_len(content): + if isinstance(content, basestring): + return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content)) + else: + return 0 + + +def compare_urls(url_a, url_b): + if url_a.netloc != url_b.netloc or url_a.query != url_b.query: + return False + + # remove / from the end of the url if required + path_a = url_a.path[:-1]\ + if url_a.path.endswith('/')\ + else url_a.path + path_b = url_b.path[:-1]\ + if url_b.path.endswith('/')\ + else url_b.path + + return unquote(path_a) == unquote(path_b) + + +def merge_two_infoboxes(infobox1, infobox2): + if 'urls' in infobox2: + urls1 = infobox1.get('urls', None) + if urls1 is None: + urls1 = [] + infobox1.set('urls', urls1) + + urlSet = set() + for url in infobox1.get('urls', []): + urlSet.add(url.get('url', None)) + + for url in infobox2.get('urls', []): + if url.get('url', None) not in urlSet: + urls1.append(url) + + if 'attributes' in infobox2: + attributes1 = infobox1.get('attributes', None) + if attributes1 is None: + attributes1 = [] + infobox1.set('attributes', attributes1) + + attributeSet = set() + for attribute in infobox1.get('attributes', []): + if attribute.get('label', None) not in attributeSet: + attributeSet.add(attribute.get('label', None)) + + for attribute in infobox2.get('attributes', []): + attributes1.append(attribute) + + if 'content' in infobox2: + content1 = infobox1.get('content', None) + content2 = infobox2.get('content', '') + if content1 is not None: + if result_content_len(content2) > result_content_len(content1): + infobox1['content'] = content2 + else: + infobox1.set('content', content2) + + +def result_score(result): + weight = 1.0 + + for result_engine in result['engines']: + if hasattr(engines[result_engine], 'weight'): + weight *= float(engines[result_engine].weight) + + occurences = len(result['positions']) + + return sum((occurences * weight) / position for position in result['positions']) + + +class ResultContainer(object): + """docstring for ResultContainer""" + def __init__(self): + super(ResultContainer, self).__init__() + self.results = defaultdict(list) + self._merged_results = [] + self.infoboxes = [] + self._infobox_ids = {} + self.suggestions = set() + self.answers = set() + + def extend(self, engine_name, results): + for result in list(results): + if 'suggestion' in result: + self.suggestions.add(result['suggestion']) + results.remove(result) + elif 'answer' in result: + self.answers.add(result['answer']) + results.remove(result) + elif 'infobox' in result: + self._merge_infobox(result) + results.remove(result) + + with RLock(): + engines[engine_name].stats['search_count'] += 1 + engines[engine_name].stats['result_count'] += len(results) + + if not results: + return + + self.results[engine_name].extend(results) + + for i, result in enumerate(results): + position = i + 1 + self._merge_result(result, position) + + def _merge_infobox(self, infobox): + add_infobox = True + infobox_id = infobox.get('id', None) + if infobox_id is not None: + existingIndex = self._infobox_ids.get(infobox_id, None) + if existingIndex is not None: + merge_two_infoboxes(self.infoboxes[existingIndex], infobox) + add_infobox = False + + if add_infobox: + self.infoboxes.append(infobox) + self._infobox_ids[infobox_id] = len(self.infoboxes) - 1 + + def _merge_result(self, result, position): + result['parsed_url'] = urlparse(result['url']) + + # if the result has no scheme, use http as default + if not result['parsed_url'].scheme: + result['parsed_url'] = result['parsed_url']._replace(scheme="http") + + result['host'] = result['parsed_url'].netloc + + if result['host'].startswith('www.'): + result['host'] = result['host'].replace('www.', '', 1) + + result['engines'] = [result['engine']] + + # strip multiple spaces and cariage returns from content + if result.get('content'): + result['content'] = WHITESPACE_REGEX.sub(' ', result['content']) + + # check for duplicates + duplicated = False + for merged_result in self._merged_results: + if compare_urls(result['parsed_url'], merged_result['parsed_url'])\ + and result.get('template') == merged_result.get('template'): + duplicated = merged_result + break + + # merge duplicates together + if duplicated: + # using content with more text + if result_content_len(result.get('content', '')) >\ + result_content_len(duplicated.get('content', '')): + duplicated['content'] = result['content'] + + # add the new position + duplicated['positions'].append(position) + + # add engine to list of result-engines + duplicated['engines'].append(result['engine']) + + # using https if possible + if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https': + duplicated['url'] = result['parsed_url'].geturl() + duplicated['parsed_url'] = result['parsed_url'] + + # if there is no duplicate found, append result + else: + result['positions'] = [position] + with RLock(): + self._merged_results.append(result) + + def get_ordered_results(self): + for result in self._merged_results: + score = result_score(result) + result['score'] = score + with RLock(): + for result_engine in result['engines']: + engines[result_engine].stats['score_count'] += score + + results = sorted(self._merged_results, key=itemgetter('score'), reverse=True) + + # pass 2 : group results by category and template + gresults = [] + categoryPositions = {} + + for i, res in enumerate(results): + # FIXME : handle more than one category per engine + category = engines[res['engine']].categories[0] + ':' + ''\ + if 'template' not in res\ + else res['template'] + + current = None if category not in categoryPositions\ + else categoryPositions[category] + + # group with previous results using the same category + # if the group can accept more result and is not too far + # from the current position + if current is not None and (current['count'] > 0)\ + and (len(gresults) - current['index'] < 20): + # group with the previous results using + # the same category with this one + index = current['index'] + gresults.insert(index, res) + + # update every index after the current one + # (including the current one) + for k in categoryPositions: + v = categoryPositions[k]['index'] + if v >= index: + categoryPositions[k]['index'] = v + 1 + + # update this category + current['count'] -= 1 + + else: + # same category + gresults.append(res) + + # update categoryIndex + categoryPositions[category] = {'index': len(gresults), 'count': 8} + + # return gresults + return gresults + + def results_length(self): + return len(self._merged_results) diff --git a/sources/searx/search.py b/sources/searx/search.py index 8991712..655b780 100644 --- a/sources/searx/search.py +++ b/sources/searx/search.py @@ -16,13 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' import threading -import re import searx.poolrequests as requests_lib -from itertools import izip_longest, chain -from operator import itemgetter -from Queue import Queue from time import time -from urlparse import urlparse, unquote from searx import settings from searx.engines import ( categories, engines @@ -30,6 +25,7 @@ from searx.engines import ( from searx.languages import language_codes from searx.utils import gen_useragent, get_blocked_engines from searx.query import Query +from searx.results import ResultContainer from searx import logger logger = logger.getChild('search') @@ -42,7 +38,8 @@ def search_request_wrapper(fn, url, engine_name, **kwargs): return fn(url, **kwargs) except: # increase errors stats - engines[engine_name].stats['errors'] += 1 + with threading.RLock(): + engines[engine_name].stats['errors'] += 1 # print engine name and specific error message logger.exception('engine crash: {0}'.format(engine_name)) @@ -84,7 +81,7 @@ def default_request_params(): # create a callback wrapper for the search engine results -def make_callback(engine_name, results_queue, callback, params): +def make_callback(engine_name, callback, params, result_container): # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): @@ -96,12 +93,17 @@ def make_callback(engine_name, results_queue, callback, params): response.search_params = params - timeout_overhead = 0.2 # seconds search_duration = time() - params['started'] + # update stats with current page-load-time + with threading.RLock(): + engines[engine_name].stats['page_load_time'] += search_duration + + timeout_overhead = 0.2 # seconds timeout_limit = engines[engine_name].timeout + timeout_overhead + if search_duration > timeout_limit: - engines[engine_name].stats['page_load_time'] += timeout_limit - engines[engine_name].stats['errors'] += 1 + with threading.RLock(): + engines[engine_name].stats['errors'] += 1 return # callback @@ -111,211 +113,11 @@ def make_callback(engine_name, results_queue, callback, params): for result in search_results: result['engine'] = engine_name - results_queue.put_nowait((engine_name, search_results)) - - # update stats with current page-load-time - engines[engine_name].stats['page_load_time'] += search_duration + result_container.extend(engine_name, search_results) return process_callback -# return the meaningful length of the content for a result -def content_result_len(content): - if isinstance(content, basestring): - content = re.sub('[,;:!?\./\\\\ ()-_]', '', content) - return len(content) - else: - return 0 - - -# score results and remove duplications -def score_results(results): - # calculate scoring parameters - flat_res = filter( - None, chain.from_iterable(izip_longest(*results.values()))) - flat_len = len(flat_res) - engines_len = len(results) - - results = [] - - # pass 1: deduplication + scoring - for i, res in enumerate(flat_res): - - res['parsed_url'] = urlparse(res['url']) - - res['host'] = res['parsed_url'].netloc - - if res['host'].startswith('www.'): - res['host'] = res['host'].replace('www.', '', 1) - - res['engines'] = [res['engine']] - - weight = 1.0 - - # strip multiple spaces and cariage returns from content - if res.get('content'): - res['content'] = re.sub(' +', ' ', - res['content'].strip().replace('\n', '')) - - # get weight of this engine if possible - if hasattr(engines[res['engine']], 'weight'): - weight = float(engines[res['engine']].weight) - - # calculate score for that engine - score = int((flat_len - i) / engines_len) * weight + 1 - - # check for duplicates - duplicated = False - for new_res in results: - # remove / from the end of the url if required - p1 = res['parsed_url'].path[:-1]\ - if res['parsed_url'].path.endswith('/')\ - else res['parsed_url'].path - p2 = new_res['parsed_url'].path[:-1]\ - if new_res['parsed_url'].path.endswith('/')\ - else new_res['parsed_url'].path - - # check if that result is a duplicate - if res['host'] == new_res['host'] and\ - unquote(p1) == unquote(p2) and\ - res['parsed_url'].query == new_res['parsed_url'].query and\ - res.get('template') == new_res.get('template'): - duplicated = new_res - break - - # merge duplicates together - if duplicated: - # using content with more text - if content_result_len(res.get('content', '')) >\ - content_result_len(duplicated.get('content', '')): - duplicated['content'] = res['content'] - - # increase result-score - duplicated['score'] += score - - # add engine to list of result-engines - duplicated['engines'].append(res['engine']) - - # using https if possible - if duplicated['parsed_url'].scheme == 'https': - continue - elif res['parsed_url'].scheme == 'https': - duplicated['url'] = res['parsed_url'].geturl() - duplicated['parsed_url'] = res['parsed_url'] - - # if there is no duplicate found, append result - else: - res['score'] = score - # if the result has no scheme, use http as default - if res['parsed_url'].scheme == '': - res['parsed_url'] = res['parsed_url']._replace(scheme="http") - - results.append(res) - - results = sorted(results, key=itemgetter('score'), reverse=True) - - # pass 2 : group results by category and template - gresults = [] - categoryPositions = {} - - for i, res in enumerate(results): - # FIXME : handle more than one category per engine - category = engines[res['engine']].categories[0] + ':' + ''\ - if 'template' not in res\ - else res['template'] - - current = None if category not in categoryPositions\ - else categoryPositions[category] - - # group with previous results using the same category - # if the group can accept more result and is not too far - # from the current position - if current is not None and (current['count'] > 0)\ - and (len(gresults) - current['index'] < 20): - # group with the previous results using - # the same category with this one - index = current['index'] - gresults.insert(index, res) - - # update every index after the current one - # (including the current one) - for k in categoryPositions: - v = categoryPositions[k]['index'] - if v >= index: - categoryPositions[k]['index'] = v + 1 - - # update this category - current['count'] -= 1 - - else: - # same category - gresults.append(res) - - # update categoryIndex - categoryPositions[category] = {'index': len(gresults), 'count': 8} - - # return gresults - return gresults - - -def merge_two_infoboxes(infobox1, infobox2): - if 'urls' in infobox2: - urls1 = infobox1.get('urls', None) - if urls1 is None: - urls1 = [] - infobox1.set('urls', urls1) - - urlSet = set() - for url in infobox1.get('urls', []): - urlSet.add(url.get('url', None)) - - for url in infobox2.get('urls', []): - if url.get('url', None) not in urlSet: - urls1.append(url) - - if 'attributes' in infobox2: - attributes1 = infobox1.get('attributes', None) - if attributes1 is None: - attributes1 = [] - infobox1.set('attributes', attributes1) - - attributeSet = set() - for attribute in infobox1.get('attributes', []): - if attribute.get('label', None) not in attributeSet: - attributeSet.add(attribute.get('label', None)) - - for attribute in infobox2.get('attributes', []): - attributes1.append(attribute) - - if 'content' in infobox2: - content1 = infobox1.get('content', None) - content2 = infobox2.get('content', '') - if content1 is not None: - if content_result_len(content2) > content_result_len(content1): - infobox1['content'] = content2 - else: - infobox1.set('content', content2) - - -def merge_infoboxes(infoboxes): - results = [] - infoboxes_id = {} - for infobox in infoboxes: - add_infobox = True - infobox_id = infobox.get('id', None) - if infobox_id is not None: - existingIndex = infoboxes_id.get(infobox_id, None) - if existingIndex is not None: - merge_two_infoboxes(results[existingIndex], infobox) - add_infobox = False - - if add_infobox: - results.append(infobox) - infoboxes_id[infobox_id] = len(results) - 1 - - return results - - class Search(object): """Search information container""" @@ -333,10 +135,7 @@ class Search(object): # set blocked engines self.blocked_engines = get_blocked_engines(engines, request.cookies) - self.results = [] - self.suggestions = set() - self.answers = set() - self.infoboxes = [] + self.result_container = ResultContainer() self.request_data = {} # set specific language if set @@ -357,7 +156,7 @@ class Search(object): # set pagenumber pageno_param = self.request_data.get('pageno', '1') if not pageno_param.isdigit() or int(pageno_param) < 1: - raise Exception('wrong pagenumber') + pageno_param = 1 self.pageno = int(pageno_param) @@ -448,8 +247,6 @@ class Search(object): # init vars requests = [] - results_queue = Queue() - results = {} # increase number of searches number_of_searches += 1 @@ -503,9 +300,9 @@ class Search(object): # create a callback wrapper for the search engine results callback = make_callback( selected_engine['name'], - results_queue, engine.response, - request_params) + request_params, + self.result_container) # create dictionary which contain all # informations about the request @@ -538,42 +335,5 @@ class Search(object): # send all search-request threaded_requests(requests) - while not results_queue.empty(): - engine_name, engine_results = results_queue.get_nowait() - - # TODO type checks - [self.suggestions.add(x['suggestion']) - for x in list(engine_results) - if 'suggestion' in x - and engine_results.remove(x) is None] - - [self.answers.add(x['answer']) - for x in list(engine_results) - if 'answer' in x - and engine_results.remove(x) is None] - - self.infoboxes.extend(x for x in list(engine_results) - if 'infobox' in x - and engine_results.remove(x) is None) - - results[engine_name] = engine_results - - # update engine-specific stats - for engine_name, engine_results in results.items(): - engines[engine_name].stats['search_count'] += 1 - engines[engine_name].stats['result_count'] += len(engine_results) - - # score results and remove duplications - self.results = score_results(results) - - # merge infoboxes according to their ids - self.infoboxes = merge_infoboxes(self.infoboxes) - - # update engine stats, using calculated score - for result in self.results: - for res_engine in result['engines']: - engines[result['engine']]\ - .stats['score_count'] += result['score'] - # return results, suggestions, answers and infoboxes return self diff --git a/sources/searx/settings.yml b/sources/searx/settings.yml index ffc3502..c7f659e 100644 --- a/sources/searx/settings.yml +++ b/sources/searx/settings.yml @@ -274,6 +274,11 @@ engines: engine : yahoo shortcut : yh + - name : yandex + engine : yandex + shortcut : yn + disabled : True + - name : yahoo news engine : yahoo_news shortcut : yhn @@ -311,7 +316,7 @@ engines: locales: en : English de : Deutsch - he : Hebrew + he : עברית hu : Magyar fr : Français es : Español diff --git a/sources/searx/static/themes/oscar/css/oscar.min.css b/sources/searx/static/themes/oscar/css/oscar.min.css index 63f8b76..f7aba2b 100644 --- a/sources/searx/static/themes/oscar/css/oscar.min.css +++ b/sources/searx/static/themes/oscar/css/oscar.min.css @@ -1 +1,88 @@ -html{position:relative;min-height:100%}body{margin-bottom:80px}.footer{position:absolute;bottom:0;width:100%;height:60px}input[type=checkbox]:checked+.label_hide_if_checked,input[type=checkbox]:checked+.label_hide_if_not_checked+.label_hide_if_checked{display:none}input[type=checkbox]:not(:checked)+.label_hide_if_not_checked,input[type=checkbox]:not(:checked)+.label_hide_if_checked+.label_hide_if_not_checked{display:none}.result_header{margin-bottom:5px;margin-top:20px}.result_header .favicon{margin-bottom:-3px}.result_header a{vertical-align:bottom}.result_header a .highlight{font-weight:bold}.result-content{margin-top:5px;word-wrap:break-word}.result-content .highlight{font-weight:bold}.result-default{clear:both}.result-images{float:left !important}.img-thumbnail{margin:5px;max-height:128px;min-height:128px}.result-videos{clear:both}.result-torrents{clear:both}.result-map{clear:both}.result-code{clear:both}.suggestion_item{margin:2px 5px}.result_download{margin-right:5px}#pagination{margin-top:30px;padding-bottom:50px}.infobox .infobox_part{margin-bottom:20px;word-wrap:break-word}.infobox .infobox_part:last-child{margin-bottom:0}.search_categories{margin:10px 0;text-transform:capitalize}.cursor-text{cursor:text !important}.cursor-pointer{cursor:pointer !important}.highlight .hll{background-color:#ffc}.highlight{background:#f8f8f8}.highlight .c{color:#408080;font-style:italic}.highlight .err{border:1px solid #f00}.highlight .k{color:#008000;font-weight:bold}.highlight .o{color:#666}.highlight .cm{color:#408080;font-style:italic}.highlight .cp{color:#bc7a00}.highlight .c1{color:#408080;font-style:italic}.highlight .cs{color:#408080;font-style:italic}.highlight .gd{color:#a00000}.highlight .ge{font-style:italic}.highlight .gr{color:#f00}.highlight .gh{color:#000080;font-weight:bold}.highlight .gi{color:#00a000}.highlight .go{color:#888}.highlight .gp{color:#000080;font-weight:bold}.highlight .gs{font-weight:bold}.highlight .gu{color:#800080;font-weight:bold}.highlight .gt{color:#04d}.highlight .kc{color:#008000;font-weight:bold}.highlight .kd{color:#008000;font-weight:bold}.highlight .kn{color:#008000;font-weight:bold}.highlight .kp{color:#008000}.highlight .kr{color:#008000;font-weight:bold}.highlight .kt{color:#b00040}.highlight .m{color:#666}.highlight .s{color:#ba2121}.highlight .na{color:#7d9029}.highlight .nb{color:#008000}.highlight .nc{color:#00f;font-weight:bold}.highlight .no{color:#800}.highlight .nd{color:#a2f}.highlight .ni{color:#999;font-weight:bold}.highlight .ne{color:#d2413a;font-weight:bold}.highlight .nf{color:#00f}.highlight .nl{color:#a0a000}.highlight .nn{color:#00f;font-weight:bold}.highlight .nt{color:#008000;font-weight:bold}.highlight .nv{color:#19177c}.highlight .ow{color:#a2f;font-weight:bold}.highlight .w{color:#bbb}.highlight .mf{color:#666}.highlight .mh{color:#666}.highlight .mi{color:#666}.highlight .mo{color:#666}.highlight .sb{color:#ba2121}.highlight .sc{color:#ba2121}.highlight .sd{color:#ba2121;font-style:italic}.highlight .s2{color:#ba2121}.highlight .se{color:#b62;font-weight:bold}.highlight .sh{color:#ba2121}.highlight .si{color:#b68;font-weight:bold}.highlight .sx{color:#008000}.highlight .sr{color:#b68}.highlight .s1{color:#ba2121}.highlight .ss{color:#19177c}.highlight .bp{color:#008000}.highlight .vc{color:#19177c}.highlight .vg{color:#19177c}.highlight .vi{color:#19177c}.highlight .il{color:#666}.highlight .lineno{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;cursor:default}.highlight .lineno::selection{background:transparent}.highlight .lineno::-moz-selection{background:transparent} \ No newline at end of file +html{position:relative;min-height:100%} +body{margin-bottom:80px} +.footer{position:absolute;bottom:0;width:100%;height:60px} +input[type=checkbox]:checked+.label_hide_if_checked,input[type=checkbox]:checked+.label_hide_if_not_checked+.label_hide_if_checked{display:none} +input[type=checkbox]:not(:checked)+.label_hide_if_not_checked,input[type=checkbox]:not(:checked)+.label_hide_if_checked+.label_hide_if_not_checked{display:none} +.result_header{margin-bottom:5px;margin-top:20px}.result_header .favicon{margin-bottom:-3px} +.result_header a{vertical-align:bottom}.result_header a .highlight{font-weight:bold} +.result-content{margin-top:5px;word-wrap:break-word}.result-content .highlight{font-weight:bold} +.result-default{clear:both} +.result-images{float:left !important} +.img-thumbnail{margin:5px;max-height:128px;min-height:128px} +.result-videos{clear:both} +.result-torrents{clear:both} +.result-map{clear:both} +.result-code{clear:both} +.suggestion_item{margin:2px 5px} +.result_download{margin-right:5px} +#pagination{margin-top:30px;padding-bottom:50px} +.label-default{color:#aaa;background:#fff} +.infobox .infobox_part{margin-bottom:20px;word-wrap:break-word} +.infobox .infobox_part:last-child{margin-bottom:0} +.search_categories{margin:10px 0;text-transform:capitalize} +.cursor-text{cursor:text !important} +.cursor-pointer{cursor:pointer !important} +.highlight .hll{background-color:#ffc} +.highlight{background:#f8f8f8} +.highlight .c{color:#408080;font-style:italic} +.highlight .err{border:1px solid #f00} +.highlight .k{color:#008000;font-weight:bold} +.highlight .o{color:#666} +.highlight .cm{color:#408080;font-style:italic} +.highlight .cp{color:#bc7a00} +.highlight .c1{color:#408080;font-style:italic} +.highlight .cs{color:#408080;font-style:italic} +.highlight .gd{color:#a00000} +.highlight .ge{font-style:italic} +.highlight .gr{color:#f00} +.highlight .gh{color:#000080;font-weight:bold} +.highlight .gi{color:#00a000} +.highlight .go{color:#888} +.highlight .gp{color:#000080;font-weight:bold} +.highlight .gs{font-weight:bold} +.highlight .gu{color:#800080;font-weight:bold} +.highlight .gt{color:#04d} +.highlight .kc{color:#008000;font-weight:bold} +.highlight .kd{color:#008000;font-weight:bold} +.highlight .kn{color:#008000;font-weight:bold} +.highlight .kp{color:#008000} +.highlight .kr{color:#008000;font-weight:bold} +.highlight .kt{color:#b00040} +.highlight .m{color:#666} +.highlight .s{color:#ba2121} +.highlight .na{color:#7d9029} +.highlight .nb{color:#008000} +.highlight .nc{color:#00f;font-weight:bold} +.highlight .no{color:#800} +.highlight .nd{color:#a2f} +.highlight .ni{color:#999;font-weight:bold} +.highlight .ne{color:#d2413a;font-weight:bold} +.highlight .nf{color:#00f} +.highlight .nl{color:#a0a000} +.highlight .nn{color:#00f;font-weight:bold} +.highlight .nt{color:#008000;font-weight:bold} +.highlight .nv{color:#19177c} +.highlight .ow{color:#a2f;font-weight:bold} +.highlight .w{color:#bbb} +.highlight .mf{color:#666} +.highlight .mh{color:#666} +.highlight .mi{color:#666} +.highlight .mo{color:#666} +.highlight .sb{color:#ba2121} +.highlight .sc{color:#ba2121} +.highlight .sd{color:#ba2121;font-style:italic} +.highlight .s2{color:#ba2121} +.highlight .se{color:#b62;font-weight:bold} +.highlight .sh{color:#ba2121} +.highlight .si{color:#b68;font-weight:bold} +.highlight .sx{color:#008000} +.highlight .sr{color:#b68} +.highlight .s1{color:#ba2121} +.highlight .ss{color:#19177c} +.highlight .bp{color:#008000} +.highlight .vc{color:#19177c} +.highlight .vg{color:#19177c} +.highlight .vi{color:#19177c} +.highlight .il{color:#666} +.highlight .lineno{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;cursor:default}.highlight .lineno::selection{background:transparent;} +.highlight .lineno::-moz-selection{background:transparent;} diff --git a/sources/searx/static/themes/oscar/less/oscar/results.less b/sources/searx/static/themes/oscar/less/oscar/results.less index 6ca4b4f..b3d8700 100644 --- a/sources/searx/static/themes/oscar/less/oscar/results.less +++ b/sources/searx/static/themes/oscar/less/oscar/results.less @@ -76,3 +76,8 @@ margin-top: 30px; padding-bottom: 50px; } + +.label-default { + color: #AAA; + background: #FFF; +} diff --git a/sources/searx/templates/courgette/opensearch.xml b/sources/searx/templates/courgette/opensearch.xml index ff9eac5..b85c3a7 100644 --- a/sources/searx/templates/courgette/opensearch.xml +++ b/sources/searx/templates/courgette/opensearch.xml @@ -1,7 +1,7 @@ searx - Search searx + a privacy-respecting, hackable metasearch engine UTF-8 {{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }} searx metasearch diff --git a/sources/searx/templates/default/opensearch.xml b/sources/searx/templates/default/opensearch.xml index ff9eac5..b85c3a7 100644 --- a/sources/searx/templates/default/opensearch.xml +++ b/sources/searx/templates/default/opensearch.xml @@ -1,7 +1,7 @@ searx - Search searx + a privacy-respecting, hackable metasearch engine UTF-8 {{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }} searx metasearch diff --git a/sources/searx/templates/oscar/macros.html b/sources/searx/templates/oscar/macros.html index 5866c13..cf49ce8 100644 --- a/sources/searx/templates/oscar/macros.html +++ b/sources/searx/templates/oscar/macros.html @@ -25,7 +25,11 @@ {% macro result_footer(result) -%}
- {{ result.engine }} +
+ {% for engine in result.engines %} + {{ engine }} + {% endfor %} +

{{ result.pretty_url }}

{%- endmacro %} diff --git a/sources/searx/templates/oscar/opensearch.xml b/sources/searx/templates/oscar/opensearch.xml index ff9eac5..b85c3a7 100644 --- a/sources/searx/templates/oscar/opensearch.xml +++ b/sources/searx/templates/oscar/opensearch.xml @@ -1,7 +1,7 @@ searx - Search searx + a privacy-respecting, hackable metasearch engine UTF-8 {{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }} searx metasearch diff --git a/sources/searx/templates/pix-art/preferences.html b/sources/searx/templates/pix-art/preferences.html index 0caf31b..f59497e 100644 --- a/sources/searx/templates/pix-art/preferences.html +++ b/sources/searx/templates/pix-art/preferences.html @@ -53,8 +53,8 @@ {{ _('Engine name') }} {{ _('Allow') }} / {{ _('Block') }} - {% for (categ,search_engines) in categs %} - {% for search_engine in search_engines %} + {% for categ in all_categories %} + {% for search_engine in engines_by_category[categ] %} {% if not search_engine.private %} diff --git a/sources/searx/tests/engines/test_bing_news.py b/sources/searx/tests/engines/test_bing_news.py index c6c4026..a64d59b 100644 --- a/sources/searx/tests/engines/test_bing_news.py +++ b/sources/searx/tests/engines/test_bing_news.py @@ -28,10 +28,10 @@ class TestBingNewsEngine(SearxTestCase): self.assertRaises(AttributeError, bing_news.response, '') self.assertRaises(AttributeError, bing_news.response, '[]') - response = mock.Mock(text='') + response = mock.Mock(content='') self.assertEqual(bing_news.response(response), []) - response = mock.Mock(text='') + response = mock.Mock(content='') self.assertEqual(bing_news.response(response), []) html = """ @@ -66,7 +66,7 @@ class TestBingNewsEngine(SearxTestCase): """ # noqa - response = mock.Mock(text=html) + response = mock.Mock(content=html) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -105,7 +105,7 @@ class TestBingNewsEngine(SearxTestCase): """ # noqa - response = mock.Mock(text=html) + response = mock.Mock(content=html) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -128,11 +128,11 @@ class TestBingNewsEngine(SearxTestCase): """ # noqa - response = mock.Mock(text=html) + response = mock.Mock(content=html) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) html = """gabarge""" - response = mock.Mock(text=html) + response = mock.Mock(content=html) self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response) diff --git a/sources/searx/tests/engines/test_btdigg.py b/sources/searx/tests/engines/test_btdigg.py index 4947b71..2721f4e 100644 --- a/sources/searx/tests/engines/test_btdigg.py +++ b/sources/searx/tests/engines/test_btdigg.py @@ -22,7 +22,7 @@ class TestBtdiggEngine(SearxTestCase): self.assertRaises(AttributeError, btdigg.response, '') self.assertRaises(AttributeError, btdigg.response, '[]') - response = mock.Mock(text='') + response = mock.Mock(content='') self.assertEqual(btdigg.response(response), []) html = """ @@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase): """ - response = mock.Mock(text=html) + response = mock.Mock(content=html) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -101,7 +101,7 @@ class TestBtdiggEngine(SearxTestCase): """ - response = mock.Mock(text=html) + response = mock.Mock(content=html) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) @@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase): """ - response = mock.Mock(text=html) + response = mock.Mock(content=html) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 5) diff --git a/sources/searx/tests/engines/test_google.py b/sources/searx/tests/engines/test_google.py index b706e51..04f12b8 100644 --- a/sources/searx/tests/engines/test_google.py +++ b/sources/searx/tests/engines/test_google.py @@ -9,7 +9,7 @@ from searx.testing import SearxTestCase class TestGoogleEngine(SearxTestCase): def mock_response(self, text): - response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1') + response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr') response.search_params = mock.Mock() response.search_params.get = mock.Mock(return_value='www.google.com') return response @@ -23,16 +23,12 @@ class TestGoogleEngine(SearxTestCase): self.assertIn('url', params) self.assertIn(query, params['url']) self.assertIn('google.fr', params['url']) - self.assertNotIn('PREF', params['cookies']) - self.assertIn('NID', params['cookies']) self.assertIn('fr', params['headers']['Accept-Language']) dicto['language'] = 'all' params = google.request(query, dicto) self.assertIn('google.com', params['url']) self.assertIn('en', params['headers']['Accept-Language']) - self.assertIn('PREF', params['cookies']) - self.assertIn('NID', params['cookies']) def test_response(self): self.assertRaises(AttributeError, google.response, None) diff --git a/sources/searx/tests/engines/test_google_images.py b/sources/searx/tests/engines/test_google_images.py index 9bef692..876d0af 100644 --- a/sources/searx/tests/engines/test_google_images.py +++ b/sources/searx/tests/engines/test_google_images.py @@ -10,15 +10,15 @@ class TestGoogleImagesEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 + dicto['safesearch'] = 1 params = google_images.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) - self.assertIn('googleapis.com', params['url']) - self.assertIn('safe=on', params['url']) + self.assertIn('safe=active', params['url']) dicto['safesearch'] = 0 params = google_images.request(query, dicto) - self.assertIn('safe=off', params['url']) + self.assertNotIn('safe', params['url']) def test_response(self): self.assertRaises(AttributeError, google_images.response, None) @@ -26,88 +26,33 @@ class TestGoogleImagesEngine(SearxTestCase): self.assertRaises(AttributeError, google_images.response, '') self.assertRaises(AttributeError, google_images.response, '[]') - response = mock.Mock(text='{}') + response = mock.Mock(text='
') self.assertEqual(google_images.response(response), []) - response = mock.Mock(text='{"data": []}') - self.assertEqual(google_images.response(response), []) - - json = """ - { - "responseData": { - "results": [ - { - "GsearchResultClass": "GimageSearch", - "width": "400", - "height": "400", - "imageId": "ANd9GcQbYb9FJuAbG_hT4i8FeC0O0x-P--EHdzgRIF9ao97nHLl7C2mREn6qTQ", - "tbWidth": "124", - "tbHeight": "124", - "unescapedUrl": "http://unescaped.url.jpg", - "url": "http://image.url.jpg", - "visibleUrl": "insolitebuzz.fr", - "title": "This is the title", - "titleNoFormatting": "Petit test sympa qui rend fou tout le monde ! A faire", - "originalContextUrl": "http://this.is.the.url", - "content": "test", - "contentNoFormatting": "test", - "tbUrl": "http://thumbnail.url" - } - ] - }, - "responseDetails": null, - "responseStatus": 200 - } - """ - response = mock.Mock(text=json) + html = """ +
+ +
+
+ Image result for south +
+
504 × 598 - clker.com +
+
+
+
+ {"id":"bQWQ9wz9loJmjM:","isu":"clker.com","ity":"png","md":"/search?tbs\u003dsbi:AMhZZit7u1mHyop9pQisu-5idR-8W_1Itvwc3afChmsjQYPx_1yYMzBvUZgtkcGoojqekKZ-6n_1rjX9ySH0OWA_1eO5OijFY6BBDw_1GApr6xxb1bXJcBcj-DiguMoXWW7cZSG7MRQbwnI5SoDZNXcv_1xGszy886I7NVb_1oRKSliTHtzqbXAxhvYreM","msu":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAQSEgltBZD3DP2WgiG-U42R4G0RFw","oh":598,"os":"13KB","ow":504,"pt":"South Arrow Clip Art at Clker.com - vector clip art online ...","rid":"vlONkeBtERfDuM","s":"Download this image as:","sc":1,"si":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAESEgltBZD3DP2WgiG-U42R4G0RFw","th":245,"tu":"https://thumbnail.url/","tw":206} +
+
+
+
+ """ # noqa + response = mock.Mock(text=html) results = google_images.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'This is the title') - self.assertEqual(results[0]['url'], 'http://this.is.the.url') - self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url') - self.assertEqual(results[0]['img_src'], 'http://image.url.jpg') - self.assertEqual(results[0]['content'], 'test') - - json = """ - { - "responseData": { - "results": [ - { - "GsearchResultClass": "GimageSearch", - "width": "400", - "height": "400", - "imageId": "ANd9GcQbYb9FJuAbG_hT4i8FeC0O0x-P--EHdzgRIF9ao97nHLl7C2mREn6qTQ", - "tbWidth": "124", - "tbHeight": "124", - "unescapedUrl": "http://unescaped.url.jpg", - "visibleUrl": "insolitebuzz.fr", - "title": "This is the title", - "titleNoFormatting": "Petit test sympa qui rend fou tout le monde ! A faire", - "originalContextUrl": "http://this.is.the.url", - "content": "test", - "contentNoFormatting": "test", - "tbUrl": "http://thumbnail.url" - } - ] - }, - "responseDetails": null, - "responseStatus": 200 - } - """ - response = mock.Mock(text=json) - results = google_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "responseData": {}, - "responseDetails": null, - "responseStatus": 200 - } - """ - response = mock.Mock(text=json) - results = google_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) + self.assertEqual(results[0]['title'], u'South Arrow Clip Art at Clker.com - vector clip art online ...') + self.assertEqual(results[0]['url'], 'http://www.clker.com/clipart-south-arrow.html') + self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url/') + self.assertEqual(results[0]['img_src'], 'http://www.clker.com/cliparts/H/X/l/b/0/0/south-arrow-hi.png') + self.assertEqual(results[0]['content'], 'Download this image as:') diff --git a/sources/searx/tests/engines/test_startpage.py b/sources/searx/tests/engines/test_startpage.py index 07f13ee..9a1a09b 100644 --- a/sources/searx/tests/engines/test_startpage.py +++ b/sources/searx/tests/engines/test_startpage.py @@ -42,7 +42,7 @@ class TestStartpageEngine(SearxTestCase): -

+

This should be the content.

@@ -78,7 +78,7 @@ class TestStartpageEngine(SearxTestCase): -

+

This should be the content.

@@ -101,7 +101,7 @@ class TestStartpageEngine(SearxTestCase):

-

+

This should be the content.

diff --git a/sources/searx/tests/test_plugins.py b/sources/searx/tests/test_plugins.py index c517112..98d39ec 100644 --- a/sources/searx/tests/test_plugins.py +++ b/sources/searx/tests/test_plugins.py @@ -5,6 +5,12 @@ from searx import plugins from mock import Mock +def get_search_mock(query, **kwargs): + return {'search': Mock(query=query, + result_container=Mock(answers=set()), + **kwargs)} + + class PluginStoreTest(SearxTestCase): def test_PluginStore_init(self): @@ -46,23 +52,23 @@ class SelfIPTest(SearxTestCase): request = Mock(user_plugins=store.plugins, remote_addr='127.0.0.1') request.headers.getlist.return_value = [] - ctx = {'search': Mock(answers=set(), - query='ip')} + ctx = get_search_mock(query='ip') store.call('post_search', request, ctx) - self.assertTrue('127.0.0.1' in ctx['search'].answers) + self.assertTrue('127.0.0.1' in ctx['search'].result_container.answers) # User agent test request = Mock(user_plugins=store.plugins, user_agent='Mock') request.headers.getlist.return_value = [] - ctx = {'search': Mock(answers=set(), - query='user-agent')} + + ctx = get_search_mock(query='user-agent') store.call('post_search', request, ctx) - self.assertTrue('Mock' in ctx['search'].answers) - ctx = {'search': Mock(answers=set(), - query='user agent')} + self.assertTrue('Mock' in ctx['search'].result_container.answers) + + ctx = get_search_mock(query='user-agent') store.call('post_search', request, ctx) - self.assertTrue('Mock' in ctx['search'].answers) - ctx = {'search': Mock(answers=set(), - query='What is my User-Agent?')} + self.assertTrue('Mock' in ctx['search'].result_container.answers) + + ctx = get_search_mock(query='What is my User-Agent?') store.call('post_search', request, ctx) + self.assertTrue('Mock' in ctx['search'].result_container.answers) diff --git a/sources/searx/tests/test_results.py b/sources/searx/tests/test_results.py new file mode 100644 index 0000000..274b5b3 --- /dev/null +++ b/sources/searx/tests/test_results.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +from searx.results import ResultContainer +from searx.testing import SearxTestCase + + +def fake_result(url='https://aa.bb/cc?dd=ee#ff', + title='aaa', + content='bbb', + engine='wikipedia', **kwargs): + result = {'url': url, + 'title': title, + 'content': content, + 'engine': engine} + result.update(kwargs) + return result + + +# TODO +class ResultContainerTestCase(SearxTestCase): + + def test_empty(self): + c = ResultContainer() + self.assertEqual(c.get_ordered_results(), []) + + def test_one_result(self): + c = ResultContainer() + c.extend('wikipedia', [fake_result()]) + self.assertEqual(c.results_length(), 1) + + def test_one_suggestion(self): + c = ResultContainer() + c.extend('wikipedia', [fake_result(suggestion=True)]) + self.assertEqual(len(c.suggestions), 1) + self.assertEqual(c.results_length(), 0) + + def test_result_merge(self): + c = ResultContainer() + c.extend('wikipedia', [fake_result()]) + c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')]) + self.assertEqual(c.results_length(), 2) diff --git a/sources/searx/tests/test_search.py b/sources/searx/tests/test_search.py index 89d0b62..af5fffd 100644 --- a/sources/searx/tests/test_search.py +++ b/sources/searx/tests/test_search.py @@ -1,25 +1,10 @@ # -*- coding: utf-8 -*- -from searx.search import score_results from searx.testing import SearxTestCase -def fake_result(url='https://aa.bb/cc?dd=ee#ff', - title='aaa', - content='bbb', - engine='wikipedia'): - return {'url': url, - 'title': title, - 'content': content, - 'engine': engine} +# TODO +class SearchTestCase(SearxTestCase): - -class ScoreResultsTestCase(SearxTestCase): - - def test_empty(self): - self.assertEqual(score_results(dict()), []) - - def test_urlparse(self): - results = score_results(dict(a=[fake_result(url='https://aa.bb/cc?dd=ee#ff')])) - parsed_url = results[0]['parsed_url'] - self.assertEqual(parsed_url.query, 'dd=ee') + def test_(self): + pass diff --git a/sources/searx/tests/test_webapp.py b/sources/searx/tests/test_webapp.py index 471ec2f..071c01d 100644 --- a/sources/searx/tests/test_webapp.py +++ b/sources/searx/tests/test_webapp.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import json +from mock import Mock from urlparse import ParseResult from searx import webapp from searx.testing import SearxTestCase @@ -33,7 +34,12 @@ class ViewsTestCase(SearxTestCase): ] def search_mock(search_self, *args): - search_self.results = self.test_results + search_self.result_container = Mock(get_ordered_results=lambda: self.test_results, + answers=set(), + suggestions=set(), + infoboxes=[], + results=self.test_results, + results_length=lambda: len(self.test_results)) webapp.Search.search = search_mock @@ -138,7 +144,7 @@ class ViewsTestCase(SearxTestCase): def test_opensearch_xml(self): result = self.app.get('/opensearch.xml') self.assertEqual(result.status_code, 200) - self.assertIn('Search searx', result.data) + self.assertIn('a privacy-respecting, hackable metasearch engine', result.data) def test_favicon(self): result = self.app.get('/favicon.ico') diff --git a/sources/searx/version.py b/sources/searx/version.py index ccf73ba..2153179 100644 --- a/sources/searx/version.py +++ b/sources/searx/version.py @@ -19,7 +19,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. # version of searx VERSION_MAJOR = 0 VERSION_MINOR = 8 -VERSION_BUILD = 0 +VERSION_BUILD = 1 VERSION_STRING = "{0}.{1}.{2}".format(VERSION_MAJOR, VERSION_MINOR, diff --git a/sources/searx/webapp.py b/sources/searx/webapp.py index 7f1621a..794b7ea 100644 --- a/sources/searx/webapp.py +++ b/sources/searx/webapp.py @@ -42,7 +42,7 @@ except: from datetime import datetime, timedelta from urllib import urlencode -from urlparse import urlparse +from urlparse import urlparse, urljoin from werkzeug.contrib.fixers import ProxyFix from flask import ( Flask, request, render_template, url_for, Response, make_response, @@ -383,7 +383,7 @@ def index(): plugins.call('post_search', request, locals()) - for result in search.results: + for result in search.result_container.get_ordered_results(): plugins.call('on_result', request, locals()) if not search.paging and engines[result['engine']].paging: @@ -411,7 +411,7 @@ def index(): minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: - result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) # noqa + result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) else: result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa else: @@ -419,17 +419,16 @@ def index(): if search.request_data.get('format') == 'json': return Response(json.dumps({'query': search.query, - 'results': search.results}), + 'results': search.result_container.get_ordered_results()}), mimetype='application/json') elif search.request_data.get('format') == 'csv': csv = UnicodeWriter(cStringIO.StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') - if search.results: - csv.writerow(keys) - for row in search.results: - row['host'] = row['parsed_url'].netloc - csv.writerow([row.get(key, '') for key in keys]) - csv.stream.seek(0) + csv.writerow(keys) + for row in search.result_container.get_ordered_results(): + row['host'] = row['parsed_url'].netloc + csv.writerow([row.get(key, '') for key in keys]) + csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query) response.headers.add('Content-Disposition', cont_disp) @@ -437,24 +436,24 @@ def index(): elif search.request_data.get('format') == 'rss': response_rss = render( 'opensearch_response_rss.xml', - results=search.results, + results=search.result_container.get_ordered_results(), q=search.request_data['q'], - number_of_results=len(search.results), + number_of_results=search.result_container.results_length(), base_url=get_base_url() ) return Response(response_rss, mimetype='text/xml') return render( 'results.html', - results=search.results, + results=search.result_container.get_ordered_results(), q=search.request_data['q'], selected_categories=search.categories, paging=search.paging, pageno=search.pageno, base_url=get_base_url(), - suggestions=search.suggestions, - answers=search.answers, - infoboxes=search.infoboxes, + suggestions=search.result_container.suggestions, + answers=search.result_container.answers, + infoboxes=search.result_container.infoboxes, theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] ) @@ -532,7 +531,7 @@ def preferences(): blocked_engines = [] - resp = make_response(redirect(url_for('index'))) + resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) if request.method == 'GET': blocked_engines = get_blocked_engines(engines, request.cookies) @@ -767,7 +766,7 @@ def favicon(): @app.route('/clear_cookies') def clear_cookies(): - resp = make_response(redirect(url_for('index'))) + resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) for cookie_name in request.cookies: resp.delete_cookie(cookie_name) return resp diff --git a/sources/versions.cfg b/sources/versions.cfg index 9452174..6aad7a7 100644 --- a/sources/versions.cfg +++ b/sources/versions.cfg @@ -8,7 +8,6 @@ Pygments = 2.0.2 WebOb = 1.4.1 WebTest = 2.0.18 Werkzeug = 0.10.4 -buildout-versions = 1.7 collective.recipe.omelette = 0.16 coverage = 3.7.1 decorator = 3.4.2 @@ -38,7 +37,6 @@ pyasn1 = 0.1.8 pyasn1-modules = 0.0.6 certifi = 2015.04.28 -# cffi = 1.1.2 cryptography = 0.9.1