1
0
Fork 0
mirror of https://github.com/YunoHost-Apps/searx_ynh.git synced 2024-09-03 20:16:30 +02:00

update 0.8.1

This commit is contained in:
Adrien Beudin 2015-12-29 03:26:27 -05:00
parent f4b734aba4
commit fbde09444f
38 changed files with 805 additions and 696 deletions

View file

@ -3,13 +3,13 @@ general:
search: search:
safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict
autocomplete : "duckduckgo" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default
server: server:
port : 8888 port : 8888
bind_address : "127.0.0.1" # address to listen on bind_address : "127.0.0.1" # address to listen on
secret_key : "ultrasecretkey" # change this! secret_key : "ultrasecretkey" # change this!
base_url : ynhbaseurl # Set custom base_url. Possible values: False or "https://your.custom.host/location/" base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
image_proxy : False # Proxying image results through searx image_proxy : False # Proxying image results through searx
ui: ui:
@ -274,6 +274,11 @@ engines:
engine : yahoo engine : yahoo
shortcut : yh shortcut : yh
- name : yandex
engine : yandex
shortcut : yn
disabled : True
- name : yahoo news - name : yahoo news
engine : yahoo_news engine : yahoo_news
shortcut : yhn shortcut : yhn
@ -311,7 +316,7 @@ engines:
locales: locales:
en : English en : English
de : Deutsch de : Deutsch
he : Hebrew he : עברית
hu : Magyar hu : Magyar
fr : Français fr : Français
es : Español es : Español

View file

@ -38,3 +38,6 @@ generally made searx better:
- Niklas Haas - Niklas Haas
- @underr - @underr
- Emmanuel Benazera - Emmanuel Benazera
- @GreenLunar
- Noemi Vanyi
- Kang-min Liu

View file

@ -1,3 +1,23 @@
0.8.1 2015.12.22
================
- More efficient result parsing
- Rewritten google engine to prevent app crashes
- Other engine fixes/tweaks
- Bing news
- Btdigg
- Gigablast
- Google images
- Startpage
News
~~~~
New documentation page is available: https://asciimoo.github.io/searx
0.8.0 2015.09.08 0.8.0 2015.09.08
================ ================
@ -44,6 +64,7 @@ News
@dalf joined the maintainer "team" @dalf joined the maintainer "team"
0.7.0 2015.02.03 0.7.0 2015.02.03
================ ================

View file

@ -60,7 +60,7 @@ locales:
@pybabel compile -d searx/translations @pybabel compile -d searx/translations
clean: clean:
@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \ @rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs eggs \
searx.egg-info lib include .coverage coverage searx/static/themes/default/css/*.css searx.egg-info lib include .coverage coverage
.PHONY: all tests robot flake8 coverage production minimal styles locales clean .PHONY: all tests robot flake8 coverage production minimal styles locales clean

View file

@ -96,7 +96,7 @@ remember 'untested code is broken code'.
Runs robot (Selenium) tests, you must have ``firefox`` installed because Runs robot (Selenium) tests, you must have ``firefox`` installed because
this functional tests actually run the browser and perform operations on this functional tests actually run the browser and perform operations on
it. Also searx is executed with it. Also searx is executed with
`settings\_robot <https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py>`__. `settings\_robot <https://github.com/asciimoo/searx/blob/master/searx/settings_robot.yml>`__.
``make flake8`` ``make flake8``
''''''''''''''' '''''''''''''''

View file

@ -1,16 +1,10 @@
[buildout] [buildout]
extends = versions.cfg extends = versions.cfg
versions = versions
unzip = true unzip = true
newest = false newest = false
extends = versions.cfg
versions = versions
prefer-final = true prefer-final = true
develop = . develop = .
extensions =
buildout_versions
eggs = eggs =
searx searx

View file

@ -18,75 +18,17 @@ The script accepts buildout command-line options, so you can
use the -c option to specify an alternate configuration file. use the -c option to specify an alternate configuration file.
""" """
import os, shutil, sys, tempfile, urllib, urllib2, subprocess import os
import shutil
import sys
import tempfile
from optparse import OptionParser from optparse import OptionParser
if sys.platform == 'win32': __version__ = '2015-07-01'
def quote(c): # See zc.buildout's changelog if this version is up to date.
if ' ' in c:
return '"%s"' % c # work around spawn lamosity on windows
else:
return c
else:
quote = str
# See zc.buildout.easy_install._has_broken_dash_S for motivation and comments. tmpeggs = tempfile.mkdtemp(prefix='bootstrap-')
stdout, stderr = subprocess.Popen(
[sys.executable, '-Sc',
'try:\n'
' import ConfigParser\n'
'except ImportError:\n'
' print 1\n'
'else:\n'
' print 0\n'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
has_broken_dash_S = bool(int(stdout.strip()))
# In order to be more robust in the face of system Pythons, we want to
# run without site-packages loaded. This is somewhat tricky, in
# particular because Python 2.6's distutils imports site, so starting
# with the -S flag is not sufficient. However, we'll start with that:
if not has_broken_dash_S and 'site' in sys.modules:
# We will restart with python -S.
args = sys.argv[:]
args[0:0] = [sys.executable, '-S']
args = map(quote, args)
os.execv(sys.executable, args)
# Now we are running with -S. We'll get the clean sys.path, import site
# because distutils will do it later, and then reset the path and clean
# out any namespace packages from site-packages that might have been
# loaded by .pth files.
clean_path = sys.path[:]
import site # imported because of its side effects
sys.path[:] = clean_path
for k, v in sys.modules.items():
if k in ('setuptools', 'pkg_resources') or (
hasattr(v, '__path__') and
len(v.__path__) == 1 and
not os.path.exists(os.path.join(v.__path__[0], '__init__.py'))):
# This is a namespace package. Remove it.
sys.modules.pop(k)
is_jython = sys.platform.startswith('java')
setuptools_source = 'http://peak.telecommunity.com/dist/ez_setup.py'
distribute_source = 'http://python-distribute.org/distribute_setup.py'
distribute_source = 'https://bitbucket.org/pypa/setuptools/raw/f657df1f1ed46596d236376649c99a470662b4ba/distribute_setup.py'
# parsing arguments
def normalize_to_url(option, opt_str, value, parser):
if value:
if '://' not in value: # It doesn't smell like a URL.
value = 'file://%s' % (
urllib.pathname2url(
os.path.abspath(os.path.expanduser(value))),)
if opt_str == '--download-base' and not value.endswith('/'):
# Download base needs a trailing slash to make the world happy.
value += '/'
else:
value = None
name = opt_str[2:].replace('-', '_')
setattr(parser.values, name, value)
usage = '''\ usage = '''\
[DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options] [DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options]
@ -96,31 +38,14 @@ Bootstraps a buildout-based project.
Simply run this script in a directory containing a buildout.cfg, using the Simply run this script in a directory containing a buildout.cfg, using the
Python that you want bin/buildout to use. Python that you want bin/buildout to use.
Note that by using --setup-source and --download-base to point to Note that by using --find-links to point to local resources, you can keep
local resources, you can keep this script from going over the network. this script from going over the network.
''' '''
parser = OptionParser(usage=usage) parser = OptionParser(usage=usage)
parser.add_option("-v", "--version", dest="version", parser.add_option("--version",
help="use a specific zc.buildout version") action="store_true", default=False,
parser.add_option("-d", "--distribute", help=("Return bootstrap.py version."))
action="store_true", dest="use_distribute", default=False,
help="Use Distribute rather than Setuptools.")
parser.add_option("--setup-source", action="callback", dest="setup_source",
callback=normalize_to_url, nargs=1, type="string",
help=("Specify a URL or file location for the setup file. "
"If you use Setuptools, this will default to " +
setuptools_source + "; if you use Distribute, this "
"will default to " + distribute_source + "."))
parser.add_option("--download-base", action="callback", dest="download_base",
callback=normalize_to_url, nargs=1, type="string",
help=("Specify a URL or directory for downloading "
"zc.buildout and either Setuptools or Distribute. "
"Defaults to PyPI."))
parser.add_option("--eggs",
help=("Specify a directory for storing eggs. Defaults to "
"a temporary directory that is deleted when the "
"bootstrap script completes."))
parser.add_option("-t", "--accept-buildout-test-releases", parser.add_option("-t", "--accept-buildout-test-releases",
dest='accept_buildout_test_releases', dest='accept_buildout_test_releases',
action="store_true", default=False, action="store_true", default=False,
@ -130,95 +55,117 @@ parser.add_option("-t", "--accept-buildout-test-releases",
"extensions for you. If you use this flag, " "extensions for you. If you use this flag, "
"bootstrap and buildout will get the newest releases " "bootstrap and buildout will get the newest releases "
"even if they are alphas or betas.")) "even if they are alphas or betas."))
parser.add_option("-c", None, action="store", dest="config_file", parser.add_option("-c", "--config-file",
help=("Specify the path to the buildout configuration " help=("Specify the path to the buildout configuration "
"file to be used.")) "file to be used."))
parser.add_option("-f", "--find-links",
help=("Specify a URL to search for buildout releases"))
parser.add_option("--allow-site-packages",
action="store_true", default=False,
help=("Let bootstrap.py use existing site packages"))
parser.add_option("--buildout-version",
help="Use a specific zc.buildout version")
parser.add_option("--setuptools-version",
help="Use a specific setuptools version")
parser.add_option("--setuptools-to-dir",
help=("Allow for re-use of existing directory of "
"setuptools versions"))
options, args = parser.parse_args() options, args = parser.parse_args()
if options.version:
print("bootstrap.py version %s" % __version__)
sys.exit(0)
if options.eggs:
eggs_dir = os.path.abspath(os.path.expanduser(options.eggs))
else:
eggs_dir = tempfile.mkdtemp()
if options.setup_source is None: ######################################################################
if options.use_distribute: # load/install setuptools
options.setup_source = distribute_source
else:
options.setup_source = setuptools_source
if options.accept_buildout_test_releases:
args.insert(0, 'buildout:accept-buildout-test-releases=true')
try: try:
import pkg_resources from urllib.request import urlopen
import setuptools # A flag. Sometimes pkg_resources is installed alone.
if not hasattr(pkg_resources, '_distribute'):
raise ImportError
except ImportError: except ImportError:
ez_code = urllib2.urlopen( from urllib2 import urlopen
options.setup_source).read().replace('\r\n', '\n')
ez = {} ez = {}
exec ez_code in ez if os.path.exists('ez_setup.py'):
setup_args = dict(to_dir=eggs_dir, download_delay=0) exec(open('ez_setup.py').read(), ez)
if options.download_base: else:
setup_args['download_base'] = options.download_base exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez)
if options.use_distribute:
setup_args['no_fake'] = True if not options.allow_site_packages:
if sys.version_info[:2] == (2, 4): # ez_setup imports site, which adds site packages
setup_args['version'] = '0.6.32' # this will remove them from the path to ensure that incompatible versions
ez['use_setuptools'](**setup_args) # of setuptools are not in the path
if 'pkg_resources' in sys.modules: import site
reload(sys.modules['pkg_resources']) # inside a virtualenv, there is no 'getsitepackages'.
import pkg_resources # We can't remove these reliably
# This does not (always?) update the default working set. We will if hasattr(site, 'getsitepackages'):
# do it. for sitepackage_path in site.getsitepackages():
for path in sys.path: # Strip all site-packages directories from sys.path that
# are not sys.prefix; this is because on Windows
# sys.prefix is a site-package directory.
if sitepackage_path != sys.prefix:
sys.path[:] = [x for x in sys.path
if sitepackage_path not in x]
setup_args = dict(to_dir=tmpeggs, download_delay=0)
if options.setuptools_version is not None:
setup_args['version'] = options.setuptools_version
if options.setuptools_to_dir is not None:
setup_args['to_dir'] = options.setuptools_to_dir
ez['use_setuptools'](**setup_args)
import setuptools
import pkg_resources
# This does not (always?) update the default working set. We will
# do it.
for path in sys.path:
if path not in pkg_resources.working_set.entries: if path not in pkg_resources.working_set.entries:
pkg_resources.working_set.add_entry(path) pkg_resources.working_set.add_entry(path)
cmd = [quote(sys.executable), ######################################################################
'-c', # Install buildout
quote('from setuptools.command.easy_install import main; main()'),
'-mqNxd',
quote(eggs_dir)]
if not has_broken_dash_S:
cmd.insert(1, '-S')
find_links = options.download_base
if not find_links:
find_links = os.environ.get('bootstrap-testing-find-links')
if not find_links and options.accept_buildout_test_releases:
find_links = 'http://downloads.buildout.org/'
if find_links:
cmd.extend(['-f', quote(find_links)])
if options.use_distribute:
setup_requirement = 'distribute'
else:
setup_requirement = 'setuptools'
ws = pkg_resources.working_set ws = pkg_resources.working_set
setup_requirement_path = ws.find(
pkg_resources.Requirement.parse(setup_requirement)).location setuptools_path = ws.find(
env = dict( pkg_resources.Requirement.parse('setuptools')).location
os.environ,
PYTHONPATH=setup_requirement_path) # Fix sys.path here as easy_install.pth added before PYTHONPATH
cmd = [sys.executable, '-c',
'import sys; sys.path[0:0] = [%r]; ' % setuptools_path +
'from setuptools.command.easy_install import main; main()',
'-mZqNxd', tmpeggs]
find_links = os.environ.get(
'bootstrap-testing-find-links',
options.find_links or
('http://downloads.buildout.org/'
if options.accept_buildout_test_releases else None)
)
if find_links:
cmd.extend(['-f', find_links])
requirement = 'zc.buildout' requirement = 'zc.buildout'
version = options.version version = options.buildout_version
if version is None and not options.accept_buildout_test_releases: if version is None and not options.accept_buildout_test_releases:
# Figure out the most recent final version of zc.buildout. # Figure out the most recent final version of zc.buildout.
import setuptools.package_index import setuptools.package_index
_final_parts = '*final-', '*final' _final_parts = '*final-', '*final'
def _final_version(parsed_version): def _final_version(parsed_version):
try:
return not parsed_version.is_prerelease
except AttributeError:
# Older setuptools
for part in parsed_version: for part in parsed_version:
if (part[:1] == '*') and (part not in _final_parts): if (part[:1] == '*') and (part not in _final_parts):
return False return False
return True return True
index = setuptools.package_index.PackageIndex( index = setuptools.package_index.PackageIndex(
search_path=[setup_requirement_path]) search_path=[setuptools_path])
if find_links: if find_links:
index.add_find_links((find_links,)) index.add_find_links((find_links,))
req = pkg_resources.Requirement.parse(requirement) req = pkg_resources.Requirement.parse(requirement)
@ -227,8 +174,6 @@ if version is None and not options.accept_buildout_test_releases:
bestv = None bestv = None
for dist in index[req.project_name]: for dist in index[req.project_name]:
distv = dist.parsed_version distv = dist.parsed_version
if distv >= pkg_resources.parse_version('2dev'):
continue
if _final_version(distv): if _final_version(distv):
if bestv is None or distv > bestv: if bestv is None or distv > bestv:
best = [dist] best = [dist]
@ -238,40 +183,28 @@ if version is None and not options.accept_buildout_test_releases:
if best: if best:
best.sort() best.sort()
version = best[-1].version version = best[-1].version
if version: if version:
requirement += '=='+version requirement = '=='.join((requirement, version))
else:
requirement += '<2dev'
cmd.append(requirement) cmd.append(requirement)
if is_jython: import subprocess
import subprocess if subprocess.call(cmd) != 0:
exitcode = subprocess.Popen(cmd, env=env).wait() raise Exception(
else: # Windows prefers this, apparently; otherwise we would prefer subprocess "Failed to execute command:\n%s" % repr(cmd)[1:-1])
exitcode = os.spawnle(*([os.P_WAIT, sys.executable] + cmd + [env]))
if exitcode != 0:
sys.stdout.flush()
sys.stderr.flush()
print ("An error occurred when trying to install zc.buildout. "
"Look above this message for any errors that "
"were output by easy_install.")
sys.exit(exitcode)
ws.add_entry(eggs_dir) ######################################################################
# Import and run buildout
ws.add_entry(tmpeggs)
ws.require(requirement) ws.require(requirement)
import zc.buildout.buildout import zc.buildout.buildout
# If there isn't already a command in the args, add bootstrap
if not [a for a in args if '=' not in a]: if not [a for a in args if '=' not in a]:
args.append('bootstrap') args.append('bootstrap')
# if -c was provided, we push it back into args for buildout' main function
# if -c was provided, we push it back into args for buildout's main function
if options.config_file is not None: if options.config_file is not None:
args[0:0] = ['-c', options.config_file] args[0:0] = ['-c', options.config_file]
zc.buildout.buildout.main(args) zc.buildout.buildout.main(args)
if not options.eggs: # clean up temporary egg directory shutil.rmtree(tmpeggs)
shutil.rmtree(eggs_dir)

View file

@ -68,7 +68,7 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
rss = etree.fromstring(resp.text) rss = etree.fromstring(resp.content)
ns = rss.nsmap ns = rss.nsmap

View file

@ -38,7 +38,7 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.content)
search_res = dom.xpath('//div[@id="search_res"]/table/tr') search_res = dom.xpath('//div[@id="search_res"]/table/tr')

View file

@ -1,8 +1,8 @@
""" """
Gigablast (Web) Gigablast (Web)
@website http://gigablast.com @website https://gigablast.com
@provide-api yes (http://gigablast.com/api.html) @provide-api yes (https://gigablast.com/api.html)
@using-api yes @using-api yes
@results XML @results XML
@ -13,6 +13,8 @@
from urllib import urlencode from urllib import urlencode
from cgi import escape from cgi import escape
from lxml import etree from lxml import etree
from random import randint
from time import time
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -20,8 +22,8 @@ paging = True
number_of_results = 5 number_of_results = 5
# search-url, invalid HTTPS certificate # search-url, invalid HTTPS certificate
base_url = 'http://gigablast.com/' base_url = 'https://gigablast.com/'
search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' search_string = 'search?{query}&n={number_of_results}&s={offset}&format=xml&qh=0&rxiyd={rxiyd}&rand={rand}'
# specific xpath variables # specific xpath variables
results_xpath = '//response//result' results_xpath = '//response//result'
@ -37,7 +39,9 @@ def request(query, params):
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'q': query}), query=urlencode({'q': query}),
offset=offset, offset=offset,
number_of_results=number_of_results) number_of_results=number_of_results,
rxiyd=randint(10000, 10000000),
rand=int(time()))
params['url'] = base_url + search_path params['url'] = base_url + search_path

View file

@ -9,11 +9,14 @@
# @parse url, title, content, suggestion # @parse url, title, content, suggestion
import re import re
from cgi import escape
from urllib import urlencode from urllib import urlencode
from urlparse import urlparse, parse_qsl from urlparse import urlparse, parse_qsl
from lxml import html from lxml import html, etree
from searx.poolrequests import get
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.search import logger
logger = logger.getChild('google engine')
# engine dependent config # engine dependent config
@ -87,7 +90,7 @@ url_map = 'https://www.openstreetmap.org/'\
search_path = '/search' search_path = '/search'
search_url = ('https://{hostname}' + search_url = ('https://{hostname}' +
search_path + search_path +
'?{query}&start={offset}&gbv=1') '?{query}&start={offset}&gbv=1&gws_rd=cr')
# other URLs # other URLs
map_hostname_start = 'maps.google.' map_hostname_start = 'maps.google.'
@ -125,27 +128,6 @@ image_img_src_xpath = './img/@src'
property_address = "Address" property_address = "Address"
property_phone = "Phone number" property_phone = "Phone number"
# cookies
pref_cookie = ''
nid_cookie = {}
# see https://support.google.com/websearch/answer/873?hl=en
def get_google_pref_cookie():
global pref_cookie
if pref_cookie == '':
resp = get('https://www.google.com/ncr', allow_redirects=False)
pref_cookie = resp.cookies["PREF"]
return pref_cookie
def get_google_nid_cookie(google_hostname):
global nid_cookie
if google_hostname not in nid_cookie:
resp = get('https://' + google_hostname)
nid_cookie[google_hostname] = resp.cookies.get("NID", None)
return nid_cookie[google_hostname]
# remove google-specific tracking-url # remove google-specific tracking-url
def parse_url(url_string, google_hostname): def parse_url(url_string, google_hostname):
@ -167,7 +149,7 @@ def parse_url(url_string, google_hostname):
def extract_text_from_dom(result, xpath): def extract_text_from_dom(result, xpath):
r = result.xpath(xpath) r = result.xpath(xpath)
if len(r) > 0: if len(r) > 0:
return extract_text(r[0]) return escape(extract_text(r[0]))
return None return None
@ -197,9 +179,6 @@ def request(query, params):
params['headers']['Accept-Language'] = language params['headers']['Accept-Language'] = language
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
if google_hostname == default_hostname:
params['cookies']['PREF'] = get_google_pref_cookie()
params['cookies']['NID'] = get_google_nid_cookie(google_hostname)
params['google_hostname'] = google_hostname params['google_hostname'] = google_hostname
@ -224,8 +203,8 @@ def response(resp):
# parse results # parse results
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):
title = extract_text(result.xpath(title_xpath)[0])
try: try:
title = extract_text(result.xpath(title_xpath)[0])
url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
parsed_url = urlparse(url, google_hostname) parsed_url = urlparse(url, google_hostname)
@ -268,12 +247,13 @@ def response(resp):
'content': content 'content': content
}) })
except: except:
logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
continue continue
# parse suggestion # parse suggestion
for suggestion in dom.xpath(suggestion_xpath): for suggestion in dom.xpath(suggestion_xpath):
# append suggestion # append suggestion
results.append({'suggestion': extract_text(suggestion)}) results.append({'suggestion': escape(extract_text(suggestion))})
# return results # return results
return results return results

View file

@ -2,41 +2,42 @@
Google (Images) Google (Images)
@website https://www.google.com @website https://www.google.com
@provide-api yes (https://developers.google.com/web-search/docs/), @provide-api yes (https://developers.google.com/custom-search/)
deprecated!
@using-api yes @using-api no
@results JSON @results HTML chunks with JSON inside
@stable yes (but deprecated) @stable no
@parse url, title, img_src @parse url, title, img_src
""" """
from urllib import urlencode, unquote from urllib import urlencode
from urlparse import parse_qs
from json import loads from json import loads
from lxml import html
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
paging = True paging = True
safesearch = True safesearch = True
# search-url search_url = 'https://www.google.com/search'\
url = 'https://ajax.googleapis.com/' '?{query}'\
search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe={safesearch}&filter=off&{query}' '&tbm=isch'\
'&ijn=1'\
'&start={offset}'
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 8 offset = (params['pageno'] - 1) * 100
if params['safesearch'] == 0:
safesearch = 'off'
else:
safesearch = 'on'
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}),
offset=offset, offset=offset,
safesearch=safesearch) safesearch=safesearch)
if safesearch and params['safesearch']:
params['url'] += '&' + urlencode({'safe': 'active'})
return params return params
@ -44,29 +45,26 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
search_res = loads(resp.text) dom = html.fromstring(resp.text)
# return empty array if there are no results
if not search_res.get('responseData', {}).get('results'):
return []
# parse results # parse results
for result in search_res['responseData']['results']: for result in dom.xpath('//div[@data-ved]'):
href = result['originalContextUrl'] data_url = result.xpath('./a/@href')[0]
title = result['title'] data_query = {k: v[0] for k, v in parse_qs(data_url.split('?', 1)[1]).iteritems()}
if 'url' not in result:
continue metadata = loads(result.xpath('./div[@class="rg_meta"]/text()')[0])
thumbnail_src = result['tbUrl']
thumbnail_src = metadata['tu']
# http to https # http to https
thumbnail_src = thumbnail_src.replace("http://", "https://") thumbnail_src = thumbnail_src.replace("http://", "https://")
# append result # append result
results.append({'url': href, results.append({'url': data_query['imgrefurl'],
'title': title, 'title': metadata['pt'],
'content': result['content'], 'content': metadata['s'],
'thumbnail_src': thumbnail_src, 'thumbnail_src': metadata['tu'],
'img_src': unquote(result['url']), 'img_src': data_query['imgurl'],
'template': 'images.html'}) 'template': 'images.html'})
# return results # return results

View file

@ -12,6 +12,8 @@
from lxml import html from lxml import html
from cgi import escape from cgi import escape
from dateutil import parser
from datetime import datetime, timedelta
import re import re
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
@ -79,11 +81,40 @@ def response(resp):
title = escape(extract_text(link)) title = escape(extract_text(link))
if result.xpath('./p[@class="desc"]'): if result.xpath('./p[@class="desc clk"]'):
content = escape(extract_text(result.xpath('./p[@class="desc"]'))) content = escape(extract_text(result.xpath('./p[@class="desc clk"]')))
else: else:
content = '' content = ''
published_date = None
# check if search result starts with something like: "2 Sep 2014 ... "
if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
date_pos = content.find('...')+4
date_string = content[0:date_pos-5]
published_date = parser.parse(date_string, dayfirst=True)
# fix content string
content = content[date_pos:]
# check if search result starts with something like: "5 days ago ... "
elif re.match("^[0-9]+ days? ago \.\.\. ", content):
date_pos = content.find('...')+4
date_string = content[0:date_pos-5]
# calculate datetime
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
# fix content string
content = content[date_pos:]
if published_date:
# append result
results.append({'url': url,
'title': title,
'content': content,
'publishedDate': published_date})
else:
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,

View file

@ -0,0 +1,62 @@
"""
Yahoo (Web)
@website https://yandex.ru/
@provide-api ?
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content
"""
from urllib import urlencode
from lxml import html
from searx.search import logger
logger = logger.getChild('yandex engine')
# engine dependent config
categories = ['general']
paging = True
language_support = True # TODO
default_tld = 'com'
language_map = {'ru': 'ru',
'ua': 'uk',
'tr': 'com.tr'}
# search-url
base_url = 'https://yandex.{tld}/'
search_url = 'search/?{query}&p={page}'
results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]'
url_xpath = './/h2/a/@href'
title_xpath = './/h2/a//text()'
content_xpath = './/div[@class="serp-item__text"]//text()'
def request(query, params):
lang = params['language'].split('_')[0]
host = base_url.format(tld=language_map.get(lang) or default_tld)
params['url'] = host + search_url.format(page=params['pageno']-1,
query=urlencode({'text': query}))
return params
# get response from search-request
def response(resp):
dom = html.fromstring(resp.text)
results = []
for result in dom.xpath(results_xpath):
try:
res = {'url': result.xpath(url_xpath)[0],
'title': ''.join(result.xpath(title_xpath)),
'content': ''.join(result.xpath(content_xpath))}
except:
logger.exception('yandex parse crash')
continue
results.append(res)
return results

View file

@ -35,10 +35,10 @@ def post_search(request, ctx):
ip = x_forwarded_for[0] ip = x_forwarded_for[0]
else: else:
ip = request.remote_addr ip = request.remote_addr
ctx['search'].answers.clear() ctx['search'].result_container.answers.clear()
ctx['search'].answers.add(ip) ctx['search'].result_container.answers.add(ip)
elif p.match(ctx['search'].query): elif p.match(ctx['search'].query):
ua = request.user_agent ua = request.user_agent
ctx['search'].answers.clear() ctx['search'].result_container.answers.clear()
ctx['search'].answers.add(ua) ctx['search'].result_container.answers.add(ua)
return True return True

View file

@ -1,5 +1,7 @@
import requests import requests
from itertools import cycle from itertools import cycle
from threading import RLock
from searx import settings from searx import settings
@ -55,6 +57,7 @@ class SessionSinglePool(requests.Session):
super(SessionSinglePool, self).__init__() super(SessionSinglePool, self).__init__()
# reuse the same adapters # reuse the same adapters
with RLock():
self.adapters.clear() self.adapters.clear()
self.mount('https://', next(https_adapters)) self.mount('https://', next(https_adapters))
self.mount('http://', next(http_adapters)) self.mount('http://', next(http_adapters))
@ -67,7 +70,6 @@ class SessionSinglePool(requests.Session):
def request(method, url, **kwargs): def request(method, url, **kwargs):
"""same as requests/requests/api.py request(...) except it use SessionSinglePool and force proxies""" """same as requests/requests/api.py request(...) except it use SessionSinglePool and force proxies"""
global settings
session = SessionSinglePool() session = SessionSinglePool()
kwargs['proxies'] = settings['outgoing'].get('proxies', None) kwargs['proxies'] = settings['outgoing'].get('proxies', None)
response = session.request(method=method, url=url, **kwargs) response = session.request(method=method, url=url, **kwargs)

239
sources/searx/results.py Normal file
View file

@ -0,0 +1,239 @@
import re
from collections import defaultdict
from operator import itemgetter
from threading import RLock
from urlparse import urlparse, unquote
from searx.engines import engines
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile('[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
# return the meaningful length of the content for a result
def result_content_len(content):
if isinstance(content, basestring):
return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
else:
return 0
def compare_urls(url_a, url_b):
if url_a.netloc != url_b.netloc or url_a.query != url_b.query:
return False
# remove / from the end of the url if required
path_a = url_a.path[:-1]\
if url_a.path.endswith('/')\
else url_a.path
path_b = url_b.path[:-1]\
if url_b.path.endswith('/')\
else url_b.path
return unquote(path_a) == unquote(path_b)
def merge_two_infoboxes(infobox1, infobox2):
if 'urls' in infobox2:
urls1 = infobox1.get('urls', None)
if urls1 is None:
urls1 = []
infobox1.set('urls', urls1)
urlSet = set()
for url in infobox1.get('urls', []):
urlSet.add(url.get('url', None))
for url in infobox2.get('urls', []):
if url.get('url', None) not in urlSet:
urls1.append(url)
if 'attributes' in infobox2:
attributes1 = infobox1.get('attributes', None)
if attributes1 is None:
attributes1 = []
infobox1.set('attributes', attributes1)
attributeSet = set()
for attribute in infobox1.get('attributes', []):
if attribute.get('label', None) not in attributeSet:
attributeSet.add(attribute.get('label', None))
for attribute in infobox2.get('attributes', []):
attributes1.append(attribute)
if 'content' in infobox2:
content1 = infobox1.get('content', None)
content2 = infobox2.get('content', '')
if content1 is not None:
if result_content_len(content2) > result_content_len(content1):
infobox1['content'] = content2
else:
infobox1.set('content', content2)
def result_score(result):
weight = 1.0
for result_engine in result['engines']:
if hasattr(engines[result_engine], 'weight'):
weight *= float(engines[result_engine].weight)
occurences = len(result['positions'])
return sum((occurences * weight) / position for position in result['positions'])
class ResultContainer(object):
"""docstring for ResultContainer"""
def __init__(self):
super(ResultContainer, self).__init__()
self.results = defaultdict(list)
self._merged_results = []
self.infoboxes = []
self._infobox_ids = {}
self.suggestions = set()
self.answers = set()
def extend(self, engine_name, results):
for result in list(results):
if 'suggestion' in result:
self.suggestions.add(result['suggestion'])
results.remove(result)
elif 'answer' in result:
self.answers.add(result['answer'])
results.remove(result)
elif 'infobox' in result:
self._merge_infobox(result)
results.remove(result)
with RLock():
engines[engine_name].stats['search_count'] += 1
engines[engine_name].stats['result_count'] += len(results)
if not results:
return
self.results[engine_name].extend(results)
for i, result in enumerate(results):
position = i + 1
self._merge_result(result, position)
def _merge_infobox(self, infobox):
add_infobox = True
infobox_id = infobox.get('id', None)
if infobox_id is not None:
existingIndex = self._infobox_ids.get(infobox_id, None)
if existingIndex is not None:
merge_two_infoboxes(self.infoboxes[existingIndex], infobox)
add_infobox = False
if add_infobox:
self.infoboxes.append(infobox)
self._infobox_ids[infobox_id] = len(self.infoboxes) - 1
def _merge_result(self, result, position):
result['parsed_url'] = urlparse(result['url'])
# if the result has no scheme, use http as default
if not result['parsed_url'].scheme:
result['parsed_url'] = result['parsed_url']._replace(scheme="http")
result['host'] = result['parsed_url'].netloc
if result['host'].startswith('www.'):
result['host'] = result['host'].replace('www.', '', 1)
result['engines'] = [result['engine']]
# strip multiple spaces and cariage returns from content
if result.get('content'):
result['content'] = WHITESPACE_REGEX.sub(' ', result['content'])
# check for duplicates
duplicated = False
for merged_result in self._merged_results:
if compare_urls(result['parsed_url'], merged_result['parsed_url'])\
and result.get('template') == merged_result.get('template'):
duplicated = merged_result
break
# merge duplicates together
if duplicated:
# using content with more text
if result_content_len(result.get('content', '')) >\
result_content_len(duplicated.get('content', '')):
duplicated['content'] = result['content']
# add the new position
duplicated['positions'].append(position)
# add engine to list of result-engines
duplicated['engines'].append(result['engine'])
# using https if possible
if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
duplicated['url'] = result['parsed_url'].geturl()
duplicated['parsed_url'] = result['parsed_url']
# if there is no duplicate found, append result
else:
result['positions'] = [position]
with RLock():
self._merged_results.append(result)
def get_ordered_results(self):
for result in self._merged_results:
score = result_score(result)
result['score'] = score
with RLock():
for result_engine in result['engines']:
engines[result_engine].stats['score_count'] += score
results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)
# pass 2 : group results by category and template
gresults = []
categoryPositions = {}
for i, res in enumerate(results):
# FIXME : handle more than one category per engine
category = engines[res['engine']].categories[0] + ':' + ''\
if 'template' not in res\
else res['template']
current = None if category not in categoryPositions\
else categoryPositions[category]
# group with previous results using the same category
# if the group can accept more result and is not too far
# from the current position
if current is not None and (current['count'] > 0)\
and (len(gresults) - current['index'] < 20):
# group with the previous results using
# the same category with this one
index = current['index']
gresults.insert(index, res)
# update every index after the current one
# (including the current one)
for k in categoryPositions:
v = categoryPositions[k]['index']
if v >= index:
categoryPositions[k]['index'] = v + 1
# update this category
current['count'] -= 1
else:
# same category
gresults.append(res)
# update categoryIndex
categoryPositions[category] = {'index': len(gresults), 'count': 8}
# return gresults
return gresults
def results_length(self):
return len(self._merged_results)

View file

@ -16,13 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
''' '''
import threading import threading
import re
import searx.poolrequests as requests_lib import searx.poolrequests as requests_lib
from itertools import izip_longest, chain
from operator import itemgetter
from Queue import Queue
from time import time from time import time
from urlparse import urlparse, unquote
from searx import settings from searx import settings
from searx.engines import ( from searx.engines import (
categories, engines categories, engines
@ -30,6 +25,7 @@ from searx.engines import (
from searx.languages import language_codes from searx.languages import language_codes
from searx.utils import gen_useragent, get_blocked_engines from searx.utils import gen_useragent, get_blocked_engines
from searx.query import Query from searx.query import Query
from searx.results import ResultContainer
from searx import logger from searx import logger
logger = logger.getChild('search') logger = logger.getChild('search')
@ -42,6 +38,7 @@ def search_request_wrapper(fn, url, engine_name, **kwargs):
return fn(url, **kwargs) return fn(url, **kwargs)
except: except:
# increase errors stats # increase errors stats
with threading.RLock():
engines[engine_name].stats['errors'] += 1 engines[engine_name].stats['errors'] += 1
# print engine name and specific error message # print engine name and specific error message
@ -84,7 +81,7 @@ def default_request_params():
# create a callback wrapper for the search engine results # create a callback wrapper for the search engine results
def make_callback(engine_name, results_queue, callback, params): def make_callback(engine_name, callback, params, result_container):
# creating a callback wrapper for the search engine results # creating a callback wrapper for the search engine results
def process_callback(response, **kwargs): def process_callback(response, **kwargs):
@ -96,11 +93,16 @@ def make_callback(engine_name, results_queue, callback, params):
response.search_params = params response.search_params = params
timeout_overhead = 0.2 # seconds
search_duration = time() - params['started'] search_duration = time() - params['started']
# update stats with current page-load-time
with threading.RLock():
engines[engine_name].stats['page_load_time'] += search_duration
timeout_overhead = 0.2 # seconds
timeout_limit = engines[engine_name].timeout + timeout_overhead timeout_limit = engines[engine_name].timeout + timeout_overhead
if search_duration > timeout_limit: if search_duration > timeout_limit:
engines[engine_name].stats['page_load_time'] += timeout_limit with threading.RLock():
engines[engine_name].stats['errors'] += 1 engines[engine_name].stats['errors'] += 1
return return
@ -111,211 +113,11 @@ def make_callback(engine_name, results_queue, callback, params):
for result in search_results: for result in search_results:
result['engine'] = engine_name result['engine'] = engine_name
results_queue.put_nowait((engine_name, search_results)) result_container.extend(engine_name, search_results)
# update stats with current page-load-time
engines[engine_name].stats['page_load_time'] += search_duration
return process_callback return process_callback
# return the meaningful length of the content for a result
def content_result_len(content):
if isinstance(content, basestring):
content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
return len(content)
else:
return 0
# score results and remove duplications
def score_results(results):
# calculate scoring parameters
flat_res = filter(
None, chain.from_iterable(izip_longest(*results.values())))
flat_len = len(flat_res)
engines_len = len(results)
results = []
# pass 1: deduplication + scoring
for i, res in enumerate(flat_res):
res['parsed_url'] = urlparse(res['url'])
res['host'] = res['parsed_url'].netloc
if res['host'].startswith('www.'):
res['host'] = res['host'].replace('www.', '', 1)
res['engines'] = [res['engine']]
weight = 1.0
# strip multiple spaces and cariage returns from content
if res.get('content'):
res['content'] = re.sub(' +', ' ',
res['content'].strip().replace('\n', ''))
# get weight of this engine if possible
if hasattr(engines[res['engine']], 'weight'):
weight = float(engines[res['engine']].weight)
# calculate score for that engine
score = int((flat_len - i) / engines_len) * weight + 1
# check for duplicates
duplicated = False
for new_res in results:
# remove / from the end of the url if required
p1 = res['parsed_url'].path[:-1]\
if res['parsed_url'].path.endswith('/')\
else res['parsed_url'].path
p2 = new_res['parsed_url'].path[:-1]\
if new_res['parsed_url'].path.endswith('/')\
else new_res['parsed_url'].path
# check if that result is a duplicate
if res['host'] == new_res['host'] and\
unquote(p1) == unquote(p2) and\
res['parsed_url'].query == new_res['parsed_url'].query and\
res.get('template') == new_res.get('template'):
duplicated = new_res
break
# merge duplicates together
if duplicated:
# using content with more text
if content_result_len(res.get('content', '')) >\
content_result_len(duplicated.get('content', '')):
duplicated['content'] = res['content']
# increase result-score
duplicated['score'] += score
# add engine to list of result-engines
duplicated['engines'].append(res['engine'])
# using https if possible
if duplicated['parsed_url'].scheme == 'https':
continue
elif res['parsed_url'].scheme == 'https':
duplicated['url'] = res['parsed_url'].geturl()
duplicated['parsed_url'] = res['parsed_url']
# if there is no duplicate found, append result
else:
res['score'] = score
# if the result has no scheme, use http as default
if res['parsed_url'].scheme == '':
res['parsed_url'] = res['parsed_url']._replace(scheme="http")
results.append(res)
results = sorted(results, key=itemgetter('score'), reverse=True)
# pass 2 : group results by category and template
gresults = []
categoryPositions = {}
for i, res in enumerate(results):
# FIXME : handle more than one category per engine
category = engines[res['engine']].categories[0] + ':' + ''\
if 'template' not in res\
else res['template']
current = None if category not in categoryPositions\
else categoryPositions[category]
# group with previous results using the same category
# if the group can accept more result and is not too far
# from the current position
if current is not None and (current['count'] > 0)\
and (len(gresults) - current['index'] < 20):
# group with the previous results using
# the same category with this one
index = current['index']
gresults.insert(index, res)
# update every index after the current one
# (including the current one)
for k in categoryPositions:
v = categoryPositions[k]['index']
if v >= index:
categoryPositions[k]['index'] = v + 1
# update this category
current['count'] -= 1
else:
# same category
gresults.append(res)
# update categoryIndex
categoryPositions[category] = {'index': len(gresults), 'count': 8}
# return gresults
return gresults
def merge_two_infoboxes(infobox1, infobox2):
if 'urls' in infobox2:
urls1 = infobox1.get('urls', None)
if urls1 is None:
urls1 = []
infobox1.set('urls', urls1)
urlSet = set()
for url in infobox1.get('urls', []):
urlSet.add(url.get('url', None))
for url in infobox2.get('urls', []):
if url.get('url', None) not in urlSet:
urls1.append(url)
if 'attributes' in infobox2:
attributes1 = infobox1.get('attributes', None)
if attributes1 is None:
attributes1 = []
infobox1.set('attributes', attributes1)
attributeSet = set()
for attribute in infobox1.get('attributes', []):
if attribute.get('label', None) not in attributeSet:
attributeSet.add(attribute.get('label', None))
for attribute in infobox2.get('attributes', []):
attributes1.append(attribute)
if 'content' in infobox2:
content1 = infobox1.get('content', None)
content2 = infobox2.get('content', '')
if content1 is not None:
if content_result_len(content2) > content_result_len(content1):
infobox1['content'] = content2
else:
infobox1.set('content', content2)
def merge_infoboxes(infoboxes):
results = []
infoboxes_id = {}
for infobox in infoboxes:
add_infobox = True
infobox_id = infobox.get('id', None)
if infobox_id is not None:
existingIndex = infoboxes_id.get(infobox_id, None)
if existingIndex is not None:
merge_two_infoboxes(results[existingIndex], infobox)
add_infobox = False
if add_infobox:
results.append(infobox)
infoboxes_id[infobox_id] = len(results) - 1
return results
class Search(object): class Search(object):
"""Search information container""" """Search information container"""
@ -333,10 +135,7 @@ class Search(object):
# set blocked engines # set blocked engines
self.blocked_engines = get_blocked_engines(engines, request.cookies) self.blocked_engines = get_blocked_engines(engines, request.cookies)
self.results = [] self.result_container = ResultContainer()
self.suggestions = set()
self.answers = set()
self.infoboxes = []
self.request_data = {} self.request_data = {}
# set specific language if set # set specific language if set
@ -357,7 +156,7 @@ class Search(object):
# set pagenumber # set pagenumber
pageno_param = self.request_data.get('pageno', '1') pageno_param = self.request_data.get('pageno', '1')
if not pageno_param.isdigit() or int(pageno_param) < 1: if not pageno_param.isdigit() or int(pageno_param) < 1:
raise Exception('wrong pagenumber') pageno_param = 1
self.pageno = int(pageno_param) self.pageno = int(pageno_param)
@ -448,8 +247,6 @@ class Search(object):
# init vars # init vars
requests = [] requests = []
results_queue = Queue()
results = {}
# increase number of searches # increase number of searches
number_of_searches += 1 number_of_searches += 1
@ -503,9 +300,9 @@ class Search(object):
# create a callback wrapper for the search engine results # create a callback wrapper for the search engine results
callback = make_callback( callback = make_callback(
selected_engine['name'], selected_engine['name'],
results_queue,
engine.response, engine.response,
request_params) request_params,
self.result_container)
# create dictionary which contain all # create dictionary which contain all
# informations about the request # informations about the request
@ -538,42 +335,5 @@ class Search(object):
# send all search-request # send all search-request
threaded_requests(requests) threaded_requests(requests)
while not results_queue.empty():
engine_name, engine_results = results_queue.get_nowait()
# TODO type checks
[self.suggestions.add(x['suggestion'])
for x in list(engine_results)
if 'suggestion' in x
and engine_results.remove(x) is None]
[self.answers.add(x['answer'])
for x in list(engine_results)
if 'answer' in x
and engine_results.remove(x) is None]
self.infoboxes.extend(x for x in list(engine_results)
if 'infobox' in x
and engine_results.remove(x) is None)
results[engine_name] = engine_results
# update engine-specific stats
for engine_name, engine_results in results.items():
engines[engine_name].stats['search_count'] += 1
engines[engine_name].stats['result_count'] += len(engine_results)
# score results and remove duplications
self.results = score_results(results)
# merge infoboxes according to their ids
self.infoboxes = merge_infoboxes(self.infoboxes)
# update engine stats, using calculated score
for result in self.results:
for res_engine in result['engines']:
engines[result['engine']]\
.stats['score_count'] += result['score']
# return results, suggestions, answers and infoboxes # return results, suggestions, answers and infoboxes
return self return self

View file

@ -274,6 +274,11 @@ engines:
engine : yahoo engine : yahoo
shortcut : yh shortcut : yh
- name : yandex
engine : yandex
shortcut : yn
disabled : True
- name : yahoo news - name : yahoo news
engine : yahoo_news engine : yahoo_news
shortcut : yhn shortcut : yhn
@ -311,7 +316,7 @@ engines:
locales: locales:
en : English en : English
de : Deutsch de : Deutsch
he : Hebrew he : עברית
hu : Magyar hu : Magyar
fr : Français fr : Français
es : Español es : Español

View file

@ -1 +1,88 @@
html{position:relative;min-height:100%}body{margin-bottom:80px}.footer{position:absolute;bottom:0;width:100%;height:60px}input[type=checkbox]:checked+.label_hide_if_checked,input[type=checkbox]:checked+.label_hide_if_not_checked+.label_hide_if_checked{display:none}input[type=checkbox]:not(:checked)+.label_hide_if_not_checked,input[type=checkbox]:not(:checked)+.label_hide_if_checked+.label_hide_if_not_checked{display:none}.result_header{margin-bottom:5px;margin-top:20px}.result_header .favicon{margin-bottom:-3px}.result_header a{vertical-align:bottom}.result_header a .highlight{font-weight:bold}.result-content{margin-top:5px;word-wrap:break-word}.result-content .highlight{font-weight:bold}.result-default{clear:both}.result-images{float:left !important}.img-thumbnail{margin:5px;max-height:128px;min-height:128px}.result-videos{clear:both}.result-torrents{clear:both}.result-map{clear:both}.result-code{clear:both}.suggestion_item{margin:2px 5px}.result_download{margin-right:5px}#pagination{margin-top:30px;padding-bottom:50px}.infobox .infobox_part{margin-bottom:20px;word-wrap:break-word}.infobox .infobox_part:last-child{margin-bottom:0}.search_categories{margin:10px 0;text-transform:capitalize}.cursor-text{cursor:text !important}.cursor-pointer{cursor:pointer !important}.highlight .hll{background-color:#ffc}.highlight{background:#f8f8f8}.highlight .c{color:#408080;font-style:italic}.highlight .err{border:1px solid #f00}.highlight .k{color:#008000;font-weight:bold}.highlight .o{color:#666}.highlight .cm{color:#408080;font-style:italic}.highlight .cp{color:#bc7a00}.highlight .c1{color:#408080;font-style:italic}.highlight .cs{color:#408080;font-style:italic}.highlight .gd{color:#a00000}.highlight .ge{font-style:italic}.highlight .gr{color:#f00}.highlight .gh{color:#000080;font-weight:bold}.highlight .gi{color:#00a000}.highlight .go{color:#888}.highlight .gp{color:#000080;font-weight:bold}.highlight .gs{font-weight:bold}.highlight .gu{color:#800080;font-weight:bold}.highlight .gt{color:#04d}.highlight .kc{color:#008000;font-weight:bold}.highlight .kd{color:#008000;font-weight:bold}.highlight .kn{color:#008000;font-weight:bold}.highlight .kp{color:#008000}.highlight .kr{color:#008000;font-weight:bold}.highlight .kt{color:#b00040}.highlight .m{color:#666}.highlight .s{color:#ba2121}.highlight .na{color:#7d9029}.highlight .nb{color:#008000}.highlight .nc{color:#00f;font-weight:bold}.highlight .no{color:#800}.highlight .nd{color:#a2f}.highlight .ni{color:#999;font-weight:bold}.highlight .ne{color:#d2413a;font-weight:bold}.highlight .nf{color:#00f}.highlight .nl{color:#a0a000}.highlight .nn{color:#00f;font-weight:bold}.highlight .nt{color:#008000;font-weight:bold}.highlight .nv{color:#19177c}.highlight .ow{color:#a2f;font-weight:bold}.highlight .w{color:#bbb}.highlight .mf{color:#666}.highlight .mh{color:#666}.highlight .mi{color:#666}.highlight .mo{color:#666}.highlight .sb{color:#ba2121}.highlight .sc{color:#ba2121}.highlight .sd{color:#ba2121;font-style:italic}.highlight .s2{color:#ba2121}.highlight .se{color:#b62;font-weight:bold}.highlight .sh{color:#ba2121}.highlight .si{color:#b68;font-weight:bold}.highlight .sx{color:#008000}.highlight .sr{color:#b68}.highlight .s1{color:#ba2121}.highlight .ss{color:#19177c}.highlight .bp{color:#008000}.highlight .vc{color:#19177c}.highlight .vg{color:#19177c}.highlight .vi{color:#19177c}.highlight .il{color:#666}.highlight .lineno{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;cursor:default}.highlight .lineno::selection{background:transparent}.highlight .lineno::-moz-selection{background:transparent} html{position:relative;min-height:100%}
body{margin-bottom:80px}
.footer{position:absolute;bottom:0;width:100%;height:60px}
input[type=checkbox]:checked+.label_hide_if_checked,input[type=checkbox]:checked+.label_hide_if_not_checked+.label_hide_if_checked{display:none}
input[type=checkbox]:not(:checked)+.label_hide_if_not_checked,input[type=checkbox]:not(:checked)+.label_hide_if_checked+.label_hide_if_not_checked{display:none}
.result_header{margin-bottom:5px;margin-top:20px}.result_header .favicon{margin-bottom:-3px}
.result_header a{vertical-align:bottom}.result_header a .highlight{font-weight:bold}
.result-content{margin-top:5px;word-wrap:break-word}.result-content .highlight{font-weight:bold}
.result-default{clear:both}
.result-images{float:left !important}
.img-thumbnail{margin:5px;max-height:128px;min-height:128px}
.result-videos{clear:both}
.result-torrents{clear:both}
.result-map{clear:both}
.result-code{clear:both}
.suggestion_item{margin:2px 5px}
.result_download{margin-right:5px}
#pagination{margin-top:30px;padding-bottom:50px}
.label-default{color:#aaa;background:#fff}
.infobox .infobox_part{margin-bottom:20px;word-wrap:break-word}
.infobox .infobox_part:last-child{margin-bottom:0}
.search_categories{margin:10px 0;text-transform:capitalize}
.cursor-text{cursor:text !important}
.cursor-pointer{cursor:pointer !important}
.highlight .hll{background-color:#ffc}
.highlight{background:#f8f8f8}
.highlight .c{color:#408080;font-style:italic}
.highlight .err{border:1px solid #f00}
.highlight .k{color:#008000;font-weight:bold}
.highlight .o{color:#666}
.highlight .cm{color:#408080;font-style:italic}
.highlight .cp{color:#bc7a00}
.highlight .c1{color:#408080;font-style:italic}
.highlight .cs{color:#408080;font-style:italic}
.highlight .gd{color:#a00000}
.highlight .ge{font-style:italic}
.highlight .gr{color:#f00}
.highlight .gh{color:#000080;font-weight:bold}
.highlight .gi{color:#00a000}
.highlight .go{color:#888}
.highlight .gp{color:#000080;font-weight:bold}
.highlight .gs{font-weight:bold}
.highlight .gu{color:#800080;font-weight:bold}
.highlight .gt{color:#04d}
.highlight .kc{color:#008000;font-weight:bold}
.highlight .kd{color:#008000;font-weight:bold}
.highlight .kn{color:#008000;font-weight:bold}
.highlight .kp{color:#008000}
.highlight .kr{color:#008000;font-weight:bold}
.highlight .kt{color:#b00040}
.highlight .m{color:#666}
.highlight .s{color:#ba2121}
.highlight .na{color:#7d9029}
.highlight .nb{color:#008000}
.highlight .nc{color:#00f;font-weight:bold}
.highlight .no{color:#800}
.highlight .nd{color:#a2f}
.highlight .ni{color:#999;font-weight:bold}
.highlight .ne{color:#d2413a;font-weight:bold}
.highlight .nf{color:#00f}
.highlight .nl{color:#a0a000}
.highlight .nn{color:#00f;font-weight:bold}
.highlight .nt{color:#008000;font-weight:bold}
.highlight .nv{color:#19177c}
.highlight .ow{color:#a2f;font-weight:bold}
.highlight .w{color:#bbb}
.highlight .mf{color:#666}
.highlight .mh{color:#666}
.highlight .mi{color:#666}
.highlight .mo{color:#666}
.highlight .sb{color:#ba2121}
.highlight .sc{color:#ba2121}
.highlight .sd{color:#ba2121;font-style:italic}
.highlight .s2{color:#ba2121}
.highlight .se{color:#b62;font-weight:bold}
.highlight .sh{color:#ba2121}
.highlight .si{color:#b68;font-weight:bold}
.highlight .sx{color:#008000}
.highlight .sr{color:#b68}
.highlight .s1{color:#ba2121}
.highlight .ss{color:#19177c}
.highlight .bp{color:#008000}
.highlight .vc{color:#19177c}
.highlight .vg{color:#19177c}
.highlight .vi{color:#19177c}
.highlight .il{color:#666}
.highlight .lineno{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;cursor:default}.highlight .lineno::selection{background:transparent;}
.highlight .lineno::-moz-selection{background:transparent;}

View file

@ -76,3 +76,8 @@
margin-top: 30px; margin-top: 30px;
padding-bottom: 50px; padding-bottom: 50px;
} }
.label-default {
color: #AAA;
background: #FFF;
}

View file

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"> <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<ShortName>searx</ShortName> <ShortName>searx</ShortName>
<Description>Search searx</Description> <Description>a privacy-respecting, hackable metasearch engine</Description>
<InputEncoding>UTF-8</InputEncoding> <InputEncoding>UTF-8</InputEncoding>
<Image>{{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }}</Image> <Image>{{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }}</Image>
<LongName>searx metasearch</LongName> <LongName>searx metasearch</LongName>

View file

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"> <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<ShortName>searx</ShortName> <ShortName>searx</ShortName>
<Description>Search searx</Description> <Description>a privacy-respecting, hackable metasearch engine</Description>
<InputEncoding>UTF-8</InputEncoding> <InputEncoding>UTF-8</InputEncoding>
<Image>{{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }}</Image> <Image>{{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }}</Image>
<LongName>searx metasearch</LongName> <LongName>searx metasearch</LongName>

View file

@ -25,7 +25,11 @@
<!-- Draw result footer --> <!-- Draw result footer -->
{% macro result_footer(result) -%} {% macro result_footer(result) -%}
<div class="clearfix"></div> <div class="clearfix"></div>
<span class="label label-default pull-right">{{ result.engine }}</span> <div class="pull-right">
{% for engine in result.engines %}
<span class="label label-default">{{ engine }}</span>
{% endfor %}
</div>
<p class="text-muted">{{ result.pretty_url }}</p> <p class="text-muted">{{ result.pretty_url }}</p>
{%- endmacro %} {%- endmacro %}

View file

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"> <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<ShortName>searx</ShortName> <ShortName>searx</ShortName>
<Description>Search searx</Description> <Description>a privacy-respecting, hackable metasearch engine</Description>
<InputEncoding>UTF-8</InputEncoding> <InputEncoding>UTF-8</InputEncoding>
<Image>{{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }}</Image> <Image>{{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }}</Image>
<LongName>searx metasearch</LongName> <LongName>searx metasearch</LongName>

View file

@ -53,8 +53,8 @@
<th>{{ _('Engine name') }}</th> <th>{{ _('Engine name') }}</th>
<th>{{ _('Allow') }} / {{ _('Block') }}</th> <th>{{ _('Allow') }} / {{ _('Block') }}</th>
</tr> </tr>
{% for (categ,search_engines) in categs %} {% for categ in all_categories %}
{% for search_engine in search_engines %} {% for search_engine in engines_by_category[categ] %}
{% if not search_engine.private %} {% if not search_engine.private %}
<tr> <tr>

View file

@ -28,10 +28,10 @@ class TestBingNewsEngine(SearxTestCase):
self.assertRaises(AttributeError, bing_news.response, '') self.assertRaises(AttributeError, bing_news.response, '')
self.assertRaises(AttributeError, bing_news.response, '[]') self.assertRaises(AttributeError, bing_news.response, '[]')
response = mock.Mock(text='<html></html>') response = mock.Mock(content='<html></html>')
self.assertEqual(bing_news.response(response), []) self.assertEqual(bing_news.response(response), [])
response = mock.Mock(text='<html></html>') response = mock.Mock(content='<html></html>')
self.assertEqual(bing_news.response(response), []) self.assertEqual(bing_news.response(response), [])
html = """<?xml version="1.0" encoding="utf-8" ?> html = """<?xml version="1.0" encoding="utf-8" ?>
@ -66,7 +66,7 @@ class TestBingNewsEngine(SearxTestCase):
</item> </item>
</channel> </channel>
</rss>""" # noqa </rss>""" # noqa
response = mock.Mock(text=html) response = mock.Mock(content=html)
results = bing_news.response(response) results = bing_news.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 2) self.assertEqual(len(results), 2)
@ -105,7 +105,7 @@ class TestBingNewsEngine(SearxTestCase):
</item> </item>
</channel> </channel>
</rss>""" # noqa </rss>""" # noqa
response = mock.Mock(text=html) response = mock.Mock(content=html)
results = bing_news.response(response) results = bing_news.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
@ -128,11 +128,11 @@ class TestBingNewsEngine(SearxTestCase):
</channel> </channel>
</rss>""" # noqa </rss>""" # noqa
response = mock.Mock(text=html) response = mock.Mock(content=html)
results = bing_news.response(response) results = bing_news.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
html = """<?xml version="1.0" encoding="utf-8" ?>gabarge""" html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
response = mock.Mock(text=html) response = mock.Mock(content=html)
self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response) self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)

View file

@ -22,7 +22,7 @@ class TestBtdiggEngine(SearxTestCase):
self.assertRaises(AttributeError, btdigg.response, '') self.assertRaises(AttributeError, btdigg.response, '')
self.assertRaises(AttributeError, btdigg.response, '[]') self.assertRaises(AttributeError, btdigg.response, '[]')
response = mock.Mock(text='<html></html>') response = mock.Mock(content='<html></html>')
self.assertEqual(btdigg.response(response), []) self.assertEqual(btdigg.response(response), [])
html = """ html = """
@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase):
</table> </table>
</div> </div>
""" """
response = mock.Mock(text=html) response = mock.Mock(content=html)
results = btdigg.response(response) results = btdigg.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
@ -101,7 +101,7 @@ class TestBtdiggEngine(SearxTestCase):
</table> </table>
</div> </div>
""" """
response = mock.Mock(text=html) response = mock.Mock(content=html)
results = btdigg.response(response) results = btdigg.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase):
</table> </table>
</div> </div>
""" """
response = mock.Mock(text=html) response = mock.Mock(content=html)
results = btdigg.response(response) results = btdigg.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 5) self.assertEqual(len(results), 5)

View file

@ -9,7 +9,7 @@ from searx.testing import SearxTestCase
class TestGoogleEngine(SearxTestCase): class TestGoogleEngine(SearxTestCase):
def mock_response(self, text): def mock_response(self, text):
response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1') response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr')
response.search_params = mock.Mock() response.search_params = mock.Mock()
response.search_params.get = mock.Mock(return_value='www.google.com') response.search_params.get = mock.Mock(return_value='www.google.com')
return response return response
@ -23,16 +23,12 @@ class TestGoogleEngine(SearxTestCase):
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])
self.assertIn('google.fr', params['url']) self.assertIn('google.fr', params['url'])
self.assertNotIn('PREF', params['cookies'])
self.assertIn('NID', params['cookies'])
self.assertIn('fr', params['headers']['Accept-Language']) self.assertIn('fr', params['headers']['Accept-Language'])
dicto['language'] = 'all' dicto['language'] = 'all'
params = google.request(query, dicto) params = google.request(query, dicto)
self.assertIn('google.com', params['url']) self.assertIn('google.com', params['url'])
self.assertIn('en', params['headers']['Accept-Language']) self.assertIn('en', params['headers']['Accept-Language'])
self.assertIn('PREF', params['cookies'])
self.assertIn('NID', params['cookies'])
def test_response(self): def test_response(self):
self.assertRaises(AttributeError, google.response, None) self.assertRaises(AttributeError, google.response, None)

View file

@ -10,15 +10,15 @@ class TestGoogleImagesEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['safesearch'] = 1
params = google_images.request(query, dicto) params = google_images.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])
self.assertIn('googleapis.com', params['url']) self.assertIn('safe=active', params['url'])
self.assertIn('safe=on', params['url'])
dicto['safesearch'] = 0 dicto['safesearch'] = 0
params = google_images.request(query, dicto) params = google_images.request(query, dicto)
self.assertIn('safe=off', params['url']) self.assertNotIn('safe', params['url'])
def test_response(self): def test_response(self):
self.assertRaises(AttributeError, google_images.response, None) self.assertRaises(AttributeError, google_images.response, None)
@ -26,88 +26,33 @@ class TestGoogleImagesEngine(SearxTestCase):
self.assertRaises(AttributeError, google_images.response, '') self.assertRaises(AttributeError, google_images.response, '')
self.assertRaises(AttributeError, google_images.response, '[]') self.assertRaises(AttributeError, google_images.response, '[]')
response = mock.Mock(text='{}') response = mock.Mock(text='<div></div>')
self.assertEqual(google_images.response(response), []) self.assertEqual(google_images.response(response), [])
response = mock.Mock(text='{"data": []}') html = """
self.assertEqual(google_images.response(response), []) <div style="display:none">
<div eid="fWhnVq4Shqpp3pWo4AM" id="isr_scm_1" style="display:none"></div>
json = """ <div data-cei="fWhnVq4Shqpp3pWo4AM" class="rg_add_chunk"><!--m-->
{ <div class="rg_di rg_el ivg-i" data-ved="0ahUKEwjuxPWQts3JAhUGVRoKHd4KCjwQMwgDKAAwAA">
"responseData": { <a href="/imgres?imgurl=http://www.clker.com/cliparts/H/X/l/b/0/0/south-arrow-hi.png&amp;imgrefurl=http://www.clker.com/clipart-south-arrow.html&amp;h=598&amp;w=504&amp;tbnid=bQWQ9wz9loJmjM:&amp;docid=vlONkeBtERfDuM&amp;ei=fWhnVq4Shqpp3pWo4AM&amp;tbm=isch" jsaction="fire.ivg_o;mouseover:str.hmov;mouseout:str.hmou" class="rg_l"><img data-src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRsxy3gKnEX0lrwwpRxdPWyLJ8iZ--PXZ-ThbBA2_xXDG_bdQutMQ" data-sz="f" name="bQWQ9wz9loJmjM:" class="rg_i" alt="Image result for south" jsaction="load:str.tbn" onload="google.aft&&google.aft(this)">
"results": [ <div class="_aOd rg_ilm">
{ <div class="rg_ilmbg"><span class="rg_ilmn"> 504&nbsp;&#215;&nbsp;598 - clker.com </span>
"GsearchResultClass": "GimageSearch", </div>
"width": "400", </div>
"height": "400", </a>
"imageId": "ANd9GcQbYb9FJuAbG_hT4i8FeC0O0x-P--EHdzgRIF9ao97nHLl7C2mREn6qTQ", <div class="rg_meta">
"tbWidth": "124", {"id":"bQWQ9wz9loJmjM:","isu":"clker.com","ity":"png","md":"/search?tbs\u003dsbi:AMhZZit7u1mHyop9pQisu-5idR-8W_1Itvwc3afChmsjQYPx_1yYMzBvUZgtkcGoojqekKZ-6n_1rjX9ySH0OWA_1eO5OijFY6BBDw_1GApr6xxb1bXJcBcj-DiguMoXWW7cZSG7MRQbwnI5SoDZNXcv_1xGszy886I7NVb_1oRKSliTHtzqbXAxhvYreM","msu":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAQSEgltBZD3DP2WgiG-U42R4G0RFw","oh":598,"os":"13KB","ow":504,"pt":"South Arrow Clip Art at Clker.com - vector clip art online ...","rid":"vlONkeBtERfDuM","s":"Download this image as:","sc":1,"si":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAESEgltBZD3DP2WgiG-U42R4G0RFw","th":245,"tu":"https://thumbnail.url/","tw":206}
"tbHeight": "124", </div>
"unescapedUrl": "http://unescaped.url.jpg", </div><!--n--><!--m-->
"url": "http://image.url.jpg", </div>
"visibleUrl": "insolitebuzz.fr", </div>
"title": "This is the title", """ # noqa
"titleNoFormatting": "Petit test sympa qui rend fou tout le monde ! A faire", response = mock.Mock(text=html)
"originalContextUrl": "http://this.is.the.url",
"content": "<b>test</b>",
"contentNoFormatting": "test",
"tbUrl": "http://thumbnail.url"
}
]
},
"responseDetails": null,
"responseStatus": 200
}
"""
response = mock.Mock(text=json)
results = google_images.response(response) results = google_images.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['title'], u'South Arrow Clip Art at Clker.com - vector clip art online ...')
self.assertEqual(results[0]['url'], 'http://this.is.the.url') self.assertEqual(results[0]['url'], 'http://www.clker.com/clipart-south-arrow.html')
self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url') self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url/')
self.assertEqual(results[0]['img_src'], 'http://image.url.jpg') self.assertEqual(results[0]['img_src'], 'http://www.clker.com/cliparts/H/X/l/b/0/0/south-arrow-hi.png')
self.assertEqual(results[0]['content'], '<b>test</b>') self.assertEqual(results[0]['content'], 'Download this image as:')
json = """
{
"responseData": {
"results": [
{
"GsearchResultClass": "GimageSearch",
"width": "400",
"height": "400",
"imageId": "ANd9GcQbYb9FJuAbG_hT4i8FeC0O0x-P--EHdzgRIF9ao97nHLl7C2mREn6qTQ",
"tbWidth": "124",
"tbHeight": "124",
"unescapedUrl": "http://unescaped.url.jpg",
"visibleUrl": "insolitebuzz.fr",
"title": "This is the title",
"titleNoFormatting": "Petit test sympa qui rend fou tout le monde ! A faire",
"originalContextUrl": "http://this.is.the.url",
"content": "<b>test</b>",
"contentNoFormatting": "test",
"tbUrl": "http://thumbnail.url"
}
]
},
"responseDetails": null,
"responseStatus": 200
}
"""
response = mock.Mock(text=json)
results = google_images.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
json = """
{
"responseData": {},
"responseDetails": null,
"responseStatus": 200
}
"""
response = mock.Mock(text=json)
results = google_images.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)

View file

@ -42,7 +42,7 @@ class TestStartpageEngine(SearxTestCase):
</a> </a>
<span id='title_stars_2' name='title_stars_2'> </span> <span id='title_stars_2' name='title_stars_2'> </span>
</h3> </h3>
<p class='desc'> <p class='desc clk'>
This should be the content. This should be the content.
</p> </p>
<p> <p>
@ -78,7 +78,7 @@ class TestStartpageEngine(SearxTestCase):
</a> </a>
<span id='title_stars_2' name='title_stars_2'> </span> <span id='title_stars_2' name='title_stars_2'> </span>
</h3> </h3>
<p class='desc'> <p class='desc clk'>
This should be the content. This should be the content.
</p> </p>
<p> <p>
@ -101,7 +101,7 @@ class TestStartpageEngine(SearxTestCase):
<h3> <h3>
<span id='title_stars_2' name='title_stars_2'> </span> <span id='title_stars_2' name='title_stars_2'> </span>
</h3> </h3>
<p class='desc'> <p class='desc clk'>
This should be the content. This should be the content.
</p> </p>
<p> <p>

View file

@ -5,6 +5,12 @@ from searx import plugins
from mock import Mock from mock import Mock
def get_search_mock(query, **kwargs):
return {'search': Mock(query=query,
result_container=Mock(answers=set()),
**kwargs)}
class PluginStoreTest(SearxTestCase): class PluginStoreTest(SearxTestCase):
def test_PluginStore_init(self): def test_PluginStore_init(self):
@ -46,23 +52,23 @@ class SelfIPTest(SearxTestCase):
request = Mock(user_plugins=store.plugins, request = Mock(user_plugins=store.plugins,
remote_addr='127.0.0.1') remote_addr='127.0.0.1')
request.headers.getlist.return_value = [] request.headers.getlist.return_value = []
ctx = {'search': Mock(answers=set(), ctx = get_search_mock(query='ip')
query='ip')}
store.call('post_search', request, ctx) store.call('post_search', request, ctx)
self.assertTrue('127.0.0.1' in ctx['search'].answers) self.assertTrue('127.0.0.1' in ctx['search'].result_container.answers)
# User agent test # User agent test
request = Mock(user_plugins=store.plugins, request = Mock(user_plugins=store.plugins,
user_agent='Mock') user_agent='Mock')
request.headers.getlist.return_value = [] request.headers.getlist.return_value = []
ctx = {'search': Mock(answers=set(),
query='user-agent')} ctx = get_search_mock(query='user-agent')
store.call('post_search', request, ctx) store.call('post_search', request, ctx)
self.assertTrue('Mock' in ctx['search'].answers) self.assertTrue('Mock' in ctx['search'].result_container.answers)
ctx = {'search': Mock(answers=set(),
query='user agent')} ctx = get_search_mock(query='user-agent')
store.call('post_search', request, ctx) store.call('post_search', request, ctx)
self.assertTrue('Mock' in ctx['search'].answers) self.assertTrue('Mock' in ctx['search'].result_container.answers)
ctx = {'search': Mock(answers=set(),
query='What is my User-Agent?')} ctx = get_search_mock(query='What is my User-Agent?')
store.call('post_search', request, ctx) store.call('post_search', request, ctx)
self.assertTrue('Mock' in ctx['search'].result_container.answers)

View file

@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
from searx.results import ResultContainer
from searx.testing import SearxTestCase
def fake_result(url='https://aa.bb/cc?dd=ee#ff',
title='aaa',
content='bbb',
engine='wikipedia', **kwargs):
result = {'url': url,
'title': title,
'content': content,
'engine': engine}
result.update(kwargs)
return result
# TODO
class ResultContainerTestCase(SearxTestCase):
def test_empty(self):
c = ResultContainer()
self.assertEqual(c.get_ordered_results(), [])
def test_one_result(self):
c = ResultContainer()
c.extend('wikipedia', [fake_result()])
self.assertEqual(c.results_length(), 1)
def test_one_suggestion(self):
c = ResultContainer()
c.extend('wikipedia', [fake_result(suggestion=True)])
self.assertEqual(len(c.suggestions), 1)
self.assertEqual(c.results_length(), 0)
def test_result_merge(self):
c = ResultContainer()
c.extend('wikipedia', [fake_result()])
c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
self.assertEqual(c.results_length(), 2)

View file

@ -1,25 +1,10 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from searx.search import score_results
from searx.testing import SearxTestCase from searx.testing import SearxTestCase
def fake_result(url='https://aa.bb/cc?dd=ee#ff', # TODO
title='aaa', class SearchTestCase(SearxTestCase):
content='bbb',
engine='wikipedia'):
return {'url': url,
'title': title,
'content': content,
'engine': engine}
def test_(self):
class ScoreResultsTestCase(SearxTestCase): pass
def test_empty(self):
self.assertEqual(score_results(dict()), [])
def test_urlparse(self):
results = score_results(dict(a=[fake_result(url='https://aa.bb/cc?dd=ee#ff')]))
parsed_url = results[0]['parsed_url']
self.assertEqual(parsed_url.query, 'dd=ee')

View file

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json import json
from mock import Mock
from urlparse import ParseResult from urlparse import ParseResult
from searx import webapp from searx import webapp
from searx.testing import SearxTestCase from searx.testing import SearxTestCase
@ -33,7 +34,12 @@ class ViewsTestCase(SearxTestCase):
] ]
def search_mock(search_self, *args): def search_mock(search_self, *args):
search_self.results = self.test_results search_self.result_container = Mock(get_ordered_results=lambda: self.test_results,
answers=set(),
suggestions=set(),
infoboxes=[],
results=self.test_results,
results_length=lambda: len(self.test_results))
webapp.Search.search = search_mock webapp.Search.search = search_mock
@ -138,7 +144,7 @@ class ViewsTestCase(SearxTestCase):
def test_opensearch_xml(self): def test_opensearch_xml(self):
result = self.app.get('/opensearch.xml') result = self.app.get('/opensearch.xml')
self.assertEqual(result.status_code, 200) self.assertEqual(result.status_code, 200)
self.assertIn('<Description>Search searx</Description>', result.data) self.assertIn('<Description>a privacy-respecting, hackable metasearch engine</Description>', result.data)
def test_favicon(self): def test_favicon(self):
result = self.app.get('/favicon.ico') result = self.app.get('/favicon.ico')

View file

@ -19,7 +19,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
# version of searx # version of searx
VERSION_MAJOR = 0 VERSION_MAJOR = 0
VERSION_MINOR = 8 VERSION_MINOR = 8
VERSION_BUILD = 0 VERSION_BUILD = 1
VERSION_STRING = "{0}.{1}.{2}".format(VERSION_MAJOR, VERSION_STRING = "{0}.{1}.{2}".format(VERSION_MAJOR,
VERSION_MINOR, VERSION_MINOR,

View file

@ -42,7 +42,7 @@ except:
from datetime import datetime, timedelta from datetime import datetime, timedelta
from urllib import urlencode from urllib import urlencode
from urlparse import urlparse from urlparse import urlparse, urljoin
from werkzeug.contrib.fixers import ProxyFix from werkzeug.contrib.fixers import ProxyFix
from flask import ( from flask import (
Flask, request, render_template, url_for, Response, make_response, Flask, request, render_template, url_for, Response, make_response,
@ -383,7 +383,7 @@ def index():
plugins.call('post_search', request, locals()) plugins.call('post_search', request, locals())
for result in search.results: for result in search.result_container.get_ordered_results():
plugins.call('on_result', request, locals()) plugins.call('on_result', request, locals())
if not search.paging and engines[result['engine']].paging: if not search.paging and engines[result['engine']].paging:
@ -411,7 +411,7 @@ def index():
minutes = int((timedifference.seconds / 60) % 60) minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60) hours = int(timedifference.seconds / 60 / 60)
if hours == 0: if hours == 0:
result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) # noqa result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
else: else:
result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
else: else:
@ -419,14 +419,13 @@ def index():
if search.request_data.get('format') == 'json': if search.request_data.get('format') == 'json':
return Response(json.dumps({'query': search.query, return Response(json.dumps({'query': search.query,
'results': search.results}), 'results': search.result_container.get_ordered_results()}),
mimetype='application/json') mimetype='application/json')
elif search.request_data.get('format') == 'csv': elif search.request_data.get('format') == 'csv':
csv = UnicodeWriter(cStringIO.StringIO()) csv = UnicodeWriter(cStringIO.StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score') keys = ('title', 'url', 'content', 'host', 'engine', 'score')
if search.results:
csv.writerow(keys) csv.writerow(keys)
for row in search.results: for row in search.result_container.get_ordered_results():
row['host'] = row['parsed_url'].netloc row['host'] = row['parsed_url'].netloc
csv.writerow([row.get(key, '') for key in keys]) csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0) csv.stream.seek(0)
@ -437,24 +436,24 @@ def index():
elif search.request_data.get('format') == 'rss': elif search.request_data.get('format') == 'rss':
response_rss = render( response_rss = render(
'opensearch_response_rss.xml', 'opensearch_response_rss.xml',
results=search.results, results=search.result_container.get_ordered_results(),
q=search.request_data['q'], q=search.request_data['q'],
number_of_results=len(search.results), number_of_results=search.result_container.results_length(),
base_url=get_base_url() base_url=get_base_url()
) )
return Response(response_rss, mimetype='text/xml') return Response(response_rss, mimetype='text/xml')
return render( return render(
'results.html', 'results.html',
results=search.results, results=search.result_container.get_ordered_results(),
q=search.request_data['q'], q=search.request_data['q'],
selected_categories=search.categories, selected_categories=search.categories,
paging=search.paging, paging=search.paging,
pageno=search.pageno, pageno=search.pageno,
base_url=get_base_url(), base_url=get_base_url(),
suggestions=search.suggestions, suggestions=search.result_container.suggestions,
answers=search.answers, answers=search.result_container.answers,
infoboxes=search.infoboxes, infoboxes=search.result_container.infoboxes,
theme=get_current_theme_name(), theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())] favicons=global_favicons[themes.index(get_current_theme_name())]
) )
@ -532,7 +531,7 @@ def preferences():
blocked_engines = [] blocked_engines = []
resp = make_response(redirect(url_for('index'))) resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index'))))
if request.method == 'GET': if request.method == 'GET':
blocked_engines = get_blocked_engines(engines, request.cookies) blocked_engines = get_blocked_engines(engines, request.cookies)
@ -767,7 +766,7 @@ def favicon():
@app.route('/clear_cookies') @app.route('/clear_cookies')
def clear_cookies(): def clear_cookies():
resp = make_response(redirect(url_for('index'))) resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index'))))
for cookie_name in request.cookies: for cookie_name in request.cookies:
resp.delete_cookie(cookie_name) resp.delete_cookie(cookie_name)
return resp return resp

View file

@ -8,7 +8,6 @@ Pygments = 2.0.2
WebOb = 1.4.1 WebOb = 1.4.1
WebTest = 2.0.18 WebTest = 2.0.18
Werkzeug = 0.10.4 Werkzeug = 0.10.4
buildout-versions = 1.7
collective.recipe.omelette = 0.16 collective.recipe.omelette = 0.16
coverage = 3.7.1 coverage = 3.7.1
decorator = 3.4.2 decorator = 3.4.2
@ -38,7 +37,6 @@ pyasn1 = 0.1.8
pyasn1-modules = 0.0.6 pyasn1-modules = 0.0.6
certifi = 2015.04.28 certifi = 2015.04.28
#
cffi = 1.1.2 cffi = 1.1.2
cryptography = 0.9.1 cryptography = 0.9.1