diff --git a/sources/AUTHORS.rst b/sources/AUTHORS.rst index 632e7f0..3605332 100644 --- a/sources/AUTHORS.rst +++ b/sources/AUTHORS.rst @@ -41,3 +41,6 @@ generally made searx better: - @GreenLunar - Noemi Vanyi - Kang-min Liu +- Kirill Isakov +- Guilhem Bonnefille +- Marc Abonce Seguin diff --git a/sources/Dockerfile b/sources/Dockerfile index 543c74d..387669b 100644 --- a/sources/Dockerfile +++ b/sources/Dockerfile @@ -1,22 +1,54 @@ -FROM python:2.7-slim +FROM alpine:3.3 +MAINTAINER searx +LABEL description "A privacy-respecting, hackable metasearch engine." -WORKDIR /app +ENV BASE_URL=False IMAGE_PROXY=False +EXPOSE 8888 +WORKDIR /usr/local/searx +CMD ["/usr/bin/tini","--","/usr/local/searx/run.sh"] -RUN useradd searx +RUN adduser -D -h /usr/local/searx -s /bin/sh searx searx \ + && echo '#!/bin/sh' >> run.sh \ + && echo 'sed -i "s|base_url : False|base_url : $BASE_URL|g" searx/settings.yml' >> run.sh \ + && echo 'sed -i "s/image_proxy : False/image_proxy : $IMAGE_PROXY/g" searx/settings.yml' >> run.sh \ + && echo 'sed -i "s/ultrasecretkey/`openssl rand -hex 16`/g" searx/settings.yml' >> run.sh \ + && echo 'python searx/webapp.py' >> run.sh \ + && chmod +x run.sh -EXPOSE 5000 -CMD ["/usr/local/bin/uwsgi", "--uid", "searx", "--gid", "searx", "--http", ":5000", "-w", "searx.webapp"] +COPY requirements.txt ./requirements.txt -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - zlib1g-dev libxml2-dev libxslt1-dev libffi-dev build-essential \ - libssl-dev openssl && \ - rm -rf /var/lib/apt/lists/* +RUN echo "@commuedge http://nl.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories \ + && apk -U add \ + build-base \ + python \ + python-dev \ + py-pip \ + libxml2 \ + libxml2-dev \ + libxslt \ + libxslt-dev \ + libffi-dev \ + openssl \ + openssl-dev \ + ca-certificates \ + tini@commuedge \ + && pip install --no-cache -r requirements.txt \ + && apk del \ + build-base \ + python-dev \ + py-pip\ + libffi-dev \ + openssl-dev \ + libxslt-dev \ + libxml2-dev \ + openssl-dev \ + ca-certificates \ + && rm -f /var/cache/apk/* -RUN pip install --no-cache uwsgi +COPY . . -COPY requirements.txt /app/requirements.txt -RUN pip install --no-cache -r requirements.txt +RUN chown -R searx:searx * -COPY . /app -RUN sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" searx/settings.yml +USER searx + +RUN sed -i "s/127.0.0.1/0.0.0.0/g" searx/settings.yml diff --git a/sources/Makefile b/sources/Makefile deleted file mode 100644 index 5573e54..0000000 --- a/sources/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -# convenience makefile to boostrap & run buildout -# use `make options=-v` to run buildout with extra options - -version = 2.7 -python = bin/python -options = - -all: .installed.cfg - -.installed.cfg: bin/buildout buildout.cfg setup.py - bin/buildout $(options) - -bin/buildout: $(python) buildout.cfg bootstrap.py - $(python) bootstrap.py - @touch $@ - -$(python): - virtualenv -p python$(version) --no-site-packages . - @touch $@ - -robot: .installed.cfg - @bin/robot - -flake8: .installed.cfg - @bin/flake8 setup.py - @bin/flake8 ./searx/ - -tests: .installed.cfg flake8 - @bin/test - @grunt test --gruntfile searx/static/themes/oscar/gruntfile.js - -coverage: .installed.cfg - @bin/coverage run bin/test - @bin/coverage report - @bin/coverage html - -production: bin/buildout production.cfg setup.py - bin/buildout -c production.cfg $(options) - @echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`" - @echo "* Hint 1: on production, disable debug mode and change secret_key" - @echo "* Hint 2: searx will be executed at server startup by crontab" - @echo "* Hint 3: to run immediatley, execute 'bin/supervisord'" - -minimal: bin/buildout minimal.cfg setup.py - bin/buildout -c minimal.cfg $(options) - -styles: - @lessc -x searx/static/themes/default/less/style.less > searx/static/themes/default/css/style.css - @lessc -x searx/static/themes/default/less/style-rtl.less > searx/static/themes/default/css/style-rtl.css - @lessc -x searx/static/themes/courgette/less/style.less > searx/static/themes/courgette/css/style.css - @lessc -x searx/static/themes/courgette/less/style-rtl.less > searx/static/themes/courgette/css/style-rtl.css - @lessc -x searx/static/less/bootstrap/bootstrap.less > searx/static/css/bootstrap.min.css - @lessc -x searx/static/themes/oscar/less/oscar/oscar.less > searx/static/themes/oscar/css/oscar.min.css - @lessc -x searx/static/themes/pix-art/less/style.less > searx/static/themes/pix-art/css/style.css - -grunt: - @grunt --gruntfile searx/static/themes/oscar/gruntfile.js - -locales: - @pybabel compile -d searx/translations - -clean: - @rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs eggs \ - searx.egg-info lib include .coverage coverage - -.PHONY: all tests robot flake8 coverage production minimal styles locales clean diff --git a/sources/README.rst b/sources/README.rst index cf1263c..c099315 100644 --- a/sources/README.rst +++ b/sources/README.rst @@ -7,31 +7,16 @@ engine `__. List of `running instances `__. -See the `wiki `__ for more information. +See the `documentation `__ and the `wiki `__ for more information. |Flattr searx| -Features -~~~~~~~~ - -- Tracking free -- Supports multiple output formats - - - json ``curl https://searx.me/?format=json&q=[query]`` - - csv ``curl https://searx.me/?format=csv&q=[query]`` - - opensearch/rss ``curl https://searx.me/?format=rss&q=[query]`` -- Opensearch support (you can set as default search engine) -- Configurable search engines/categories -- Different search languages -- Duckduckgo like !bang functionality with engine shortcuts -- Parallel queries - relatively fast - Installation ~~~~~~~~~~~~ - clone source: ``git clone git@github.com:asciimoo/searx.git && cd searx`` -- install dependencies: ``pip install -r requirements.txt`` +- install dependencies: ``./manage.sh update_packages`` - edit your `settings.yml `__ (set your ``secret_key``!) @@ -40,104 +25,6 @@ Installation For all the details, follow this `step by step installation `__ -Alternative (Recommended) Installation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -- clone source: - ``git clone git@github.com:asciimoo/searx.git && cd searx`` -- build in current folder: ``make minimal`` -- run ``bin/searx-run`` to start the application - -Development -~~~~~~~~~~~ - -Just run ``make``. Versions of dependencies are pinned down inside -``versions.cfg`` to produce most stable build. Also remember, NO make -command should be run as root, not even ``make production`` - -Deployment -~~~~~~~~~~ - -- clone source: - ``git clone git@github.com:asciimoo/searx.git && cd searx`` -- build in current folder: ``make production`` -- run ``bin/supervisord`` to start the application - -Upgrading -~~~~~~~~~ - -- inside previously cloned searx directory run: ``git stash`` to - temporarily save any changes you have made -- pull source: ``git pull origin master`` -- re-build in current folder: ``make production`` -- run ``bin/supervisorctl stop searx`` to stop searx, if it does not, - then run ``fuser -k 8888/tcp`` -- run ``bin/supervisorctl reload`` to re-read supervisor config and - start searx - -Command make -~~~~~~~~~~~~ - -``make`` -'''''''' - -Builds development environment with testing support. - -``make tests`` -'''''''''''''' - -Runs tests. You can write tests -`here `__ and -remember 'untested code is broken code'. - -``make robot`` -'''''''''''''' - -Runs robot (Selenium) tests, you must have ``firefox`` installed because -this functional tests actually run the browser and perform operations on -it. Also searx is executed with -`settings\_robot `__. - -``make flake8`` -''''''''''''''' - -'pep8 is a tool to check your Python code against some of the style -conventions in `PEP 8 `__.' - -``make coverage`` -''''''''''''''''' - -Checks coverage of tests, after running this, execute this: -``firefox ./coverage/index.html`` - -``make production`` -''''''''''''''''''' - -Used to make co-called production environment - without tests (you -should ran tests before deploying searx on the server). This installs -supervisord, so if searx crashes, it will try to pick itself up again. -And crontab entry is added to start supervisord at server boot. - -``make minimal`` -'''''''''''''''' - -Minimal build - without test frameworks, the quickest build option. - -``make clean`` -'''''''''''''' - -Deletes several folders and files (see ``Makefile`` for more), so that -next time you run any other ``make`` command it will rebuild everithing. - -TODO -~~~~ - -- Moar engines -- Better ui -- Browser integration -- Documentation -- Tests - Bugs ~~~~ diff --git a/sources/base.cfg b/sources/base.cfg deleted file mode 100644 index 6e46e9e..0000000 --- a/sources/base.cfg +++ /dev/null @@ -1,17 +0,0 @@ -[buildout] -extends = versions.cfg -unzip = true -newest = false -prefer-final = true -develop = . - -eggs = - searx - -parts = - omelette - - -[omelette] -recipe = collective.recipe.omelette -eggs = ${buildout:eggs} diff --git a/sources/bootstrap.py b/sources/bootstrap.py deleted file mode 100644 index a459921..0000000 --- a/sources/bootstrap.py +++ /dev/null @@ -1,210 +0,0 @@ -############################################################################## -# -# Copyright (c) 2006 Zope Foundation and Contributors. -# All Rights Reserved. -# -# This software is subject to the provisions of the Zope Public License, -# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. -# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED -# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS -# FOR A PARTICULAR PURPOSE. -# -############################################################################## -"""Bootstrap a buildout-based project - -Simply run this script in a directory containing a buildout.cfg. -The script accepts buildout command-line options, so you can -use the -c option to specify an alternate configuration file. -""" - -import os -import shutil -import sys -import tempfile - -from optparse import OptionParser - -__version__ = '2015-07-01' -# See zc.buildout's changelog if this version is up to date. - -tmpeggs = tempfile.mkdtemp(prefix='bootstrap-') - -usage = '''\ -[DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options] - -Bootstraps a buildout-based project. - -Simply run this script in a directory containing a buildout.cfg, using the -Python that you want bin/buildout to use. - -Note that by using --find-links to point to local resources, you can keep -this script from going over the network. -''' - -parser = OptionParser(usage=usage) -parser.add_option("--version", - action="store_true", default=False, - help=("Return bootstrap.py version.")) -parser.add_option("-t", "--accept-buildout-test-releases", - dest='accept_buildout_test_releases', - action="store_true", default=False, - help=("Normally, if you do not specify a --version, the " - "bootstrap script and buildout gets the newest " - "*final* versions of zc.buildout and its recipes and " - "extensions for you. If you use this flag, " - "bootstrap and buildout will get the newest releases " - "even if they are alphas or betas.")) -parser.add_option("-c", "--config-file", - help=("Specify the path to the buildout configuration " - "file to be used.")) -parser.add_option("-f", "--find-links", - help=("Specify a URL to search for buildout releases")) -parser.add_option("--allow-site-packages", - action="store_true", default=False, - help=("Let bootstrap.py use existing site packages")) -parser.add_option("--buildout-version", - help="Use a specific zc.buildout version") -parser.add_option("--setuptools-version", - help="Use a specific setuptools version") -parser.add_option("--setuptools-to-dir", - help=("Allow for re-use of existing directory of " - "setuptools versions")) - -options, args = parser.parse_args() -if options.version: - print("bootstrap.py version %s" % __version__) - sys.exit(0) - - -###################################################################### -# load/install setuptools - -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen - -ez = {} -if os.path.exists('ez_setup.py'): - exec(open('ez_setup.py').read(), ez) -else: - exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez) - -if not options.allow_site_packages: - # ez_setup imports site, which adds site packages - # this will remove them from the path to ensure that incompatible versions - # of setuptools are not in the path - import site - # inside a virtualenv, there is no 'getsitepackages'. - # We can't remove these reliably - if hasattr(site, 'getsitepackages'): - for sitepackage_path in site.getsitepackages(): - # Strip all site-packages directories from sys.path that - # are not sys.prefix; this is because on Windows - # sys.prefix is a site-package directory. - if sitepackage_path != sys.prefix: - sys.path[:] = [x for x in sys.path - if sitepackage_path not in x] - -setup_args = dict(to_dir=tmpeggs, download_delay=0) - -if options.setuptools_version is not None: - setup_args['version'] = options.setuptools_version -if options.setuptools_to_dir is not None: - setup_args['to_dir'] = options.setuptools_to_dir - -ez['use_setuptools'](**setup_args) -import setuptools -import pkg_resources - -# This does not (always?) update the default working set. We will -# do it. -for path in sys.path: - if path not in pkg_resources.working_set.entries: - pkg_resources.working_set.add_entry(path) - -###################################################################### -# Install buildout - -ws = pkg_resources.working_set - -setuptools_path = ws.find( - pkg_resources.Requirement.parse('setuptools')).location - -# Fix sys.path here as easy_install.pth added before PYTHONPATH -cmd = [sys.executable, '-c', - 'import sys; sys.path[0:0] = [%r]; ' % setuptools_path + - 'from setuptools.command.easy_install import main; main()', - '-mZqNxd', tmpeggs] - -find_links = os.environ.get( - 'bootstrap-testing-find-links', - options.find_links or - ('http://downloads.buildout.org/' - if options.accept_buildout_test_releases else None) - ) -if find_links: - cmd.extend(['-f', find_links]) - -requirement = 'zc.buildout' -version = options.buildout_version -if version is None and not options.accept_buildout_test_releases: - # Figure out the most recent final version of zc.buildout. - import setuptools.package_index - _final_parts = '*final-', '*final' - - def _final_version(parsed_version): - try: - return not parsed_version.is_prerelease - except AttributeError: - # Older setuptools - for part in parsed_version: - if (part[:1] == '*') and (part not in _final_parts): - return False - return True - - index = setuptools.package_index.PackageIndex( - search_path=[setuptools_path]) - if find_links: - index.add_find_links((find_links,)) - req = pkg_resources.Requirement.parse(requirement) - if index.obtain(req) is not None: - best = [] - bestv = None - for dist in index[req.project_name]: - distv = dist.parsed_version - if _final_version(distv): - if bestv is None or distv > bestv: - best = [dist] - bestv = distv - elif distv == bestv: - best.append(dist) - if best: - best.sort() - version = best[-1].version -if version: - requirement = '=='.join((requirement, version)) -cmd.append(requirement) - -import subprocess -if subprocess.call(cmd) != 0: - raise Exception( - "Failed to execute command:\n%s" % repr(cmd)[1:-1]) - -###################################################################### -# Import and run buildout - -ws.add_entry(tmpeggs) -ws.require(requirement) -import zc.buildout.buildout - -if not [a for a in args if '=' not in a]: - args.append('bootstrap') - -# if -c was provided, we push it back into args for buildout' main function -if options.config_file is not None: - args[0:0] = ['-c', options.config_file] - -zc.buildout.buildout.main(args) -shutil.rmtree(tmpeggs) diff --git a/sources/buildout.cfg b/sources/buildout.cfg deleted file mode 100644 index b9e6d24..0000000 --- a/sources/buildout.cfg +++ /dev/null @@ -1,30 +0,0 @@ -[buildout] -extends = base.cfg -develop = . - -eggs = - searx [test] - -parts += - pyscripts - robot - test - - -[pyscripts] -recipe = zc.recipe.egg:script -eggs = ${buildout:eggs} -interpreter = py -dependent-scripts = true - - -[robot] -recipe = zc.recipe.testrunner -eggs = ${buildout:eggs} -defaults = ['--color', '--auto-progress', '--layer', 'SearxRobotLayer'] - - -[test] -recipe = zc.recipe.testrunner -eggs = ${buildout:eggs} -defaults = ['--color', '--auto-progress', '--layer', 'SearxTestLayer', '--layer', '!SearxRobotLayer'] diff --git a/sources/manage.sh b/sources/manage.sh new file mode 100755 index 0000000..0a21f0e --- /dev/null +++ b/sources/manage.sh @@ -0,0 +1,95 @@ +#!/bin/sh + +BASE_DIR=$(dirname `readlink -f $0`) +PYTHONPATH=$BASE_DIR +SEARX_DIR="$BASE_DIR/searx" +ACTION=$1 + +update_packages() { + pip install --upgrade -r "$BASE_DIR/requirements.txt" +} + +update_dev_packages() { + update_packages + pip install --upgrade -r "$BASE_DIR/requirements-dev.txt" +} + +pep8_check() { + echo '[!] Running pep8 check' + # ignored rules: + # E402 module level import not at top of file + # W503 line break before binary operator + pep8 --max-line-length=120 --ignore "E402,W503" "$SEARX_DIR" "$BASE_DIR/tests" +} + +unit_tests() { + echo '[!] Running unit tests' + python -m nose2 -s "$BASE_DIR/tests/unit" +} + +py_test_coverage() { + echo '[!] Running python test coverage' + PYTHONPATH=`pwd` python -m nose2 -C --coverage "$SEARX_DIR" -s "$BASE_DIR/tests/unit" + coverage report + coverage html +} + +robot_tests() { + echo '[!] Running robot tests' + PYTHONPATH=`pwd` python "$SEARX_DIR/testing.py" robot +} + +tests() { + set -e + pep8_check + unit_tests + robot_tests + set +e +} + +build_style() { + lessc -x "$BASE_DIR/searx/static/$1" "$BASE_DIR/searx/static/$2" +} + +styles() { + echo '[!] Building styles' + build_style themes/default/less/style.less themes/default/css/style.css + build_style themes/default/less/style-rtl.less themes/default/css/style-rtl.css + build_style themes/courgette/less/style.less themes/courgette/css/style.css + build_style themes/courgette/less/style-rtl.less themes/courgette/css/style-rtl.css + build_style less/bootstrap/bootstrap.less css/bootstrap.min.css + build_style themes/oscar/less/oscar/oscar.less themes/oscar/css/oscar.min.css + build_style themes/pix-art/less/style.less themes/pix-art/css/style.css +} + +grunt_build() { + grunt --gruntfile "$SEARX_DIR/static/themes/oscar/gruntfile.js" +} + +locales() { + pybabel compile -d "$SEARX_DIR/translations" +} + +help() { + [ -z "$1" ] || printf "Error: $1\n" + echo "Searx manage.sh help + +Commands +======== + grunt_build - Build js files + help - This text + locales - Compile locales + pep8_check - Pep8 validation + py_test_coverage - Unit test coverage + robot_tests - Run selenium tests + styles - Build less files + tests - Run all python tests (pep8, unit, robot) + unit_tests - Run unit tests + update_dev_packages - Check & update development and production dependency changes + update_packages - Check & update dependency changes +" +} + +[ "$(command -V "$ACTION" | grep ' function$')" = "" ] \ + && help "action not found" \ + || $ACTION diff --git a/sources/minimal.cfg b/sources/minimal.cfg deleted file mode 100644 index 339a293..0000000 --- a/sources/minimal.cfg +++ /dev/null @@ -1,15 +0,0 @@ -[buildout] -extends = base.cfg -develop = . - -eggs = - searx - -parts += - pyscripts - - -[pyscripts] -recipe = zc.recipe.egg:script -eggs = ${buildout:eggs} -interpreter = py diff --git a/sources/production.cfg b/sources/production.cfg deleted file mode 100644 index ea40682..0000000 --- a/sources/production.cfg +++ /dev/null @@ -1,34 +0,0 @@ -[buildout] -extends = base.cfg -develop = . - -eggs = - searx - -parts += - pyscripts - supervisor - crontab_reboot - - -[pyscripts] -recipe = zc.recipe.egg:script -eggs = ${buildout:eggs} -interpreter = py - - -[supervisor] -recipe = collective.recipe.supervisor -http-socket = unix -user = searxer -password = ohpleasedochangeme -file = /tmp/supervisor.sock -chmod = 0700 -programs = - 50 searx ${buildout:bin-directory}/searx-run - - -[crontab_reboot] -recipe = z3c.recipe.usercrontab -times = @reboot -command = ${buildout:bin-directory}/supervisord diff --git a/sources/requirements-dev.txt b/sources/requirements-dev.txt new file mode 100644 index 0000000..38be888 --- /dev/null +++ b/sources/requirements-dev.txt @@ -0,0 +1,10 @@ +babel==2.2.0 +mock==1.0.1 +nose2[coverage-plugin] +pep8==1.7.0 +plone.testing==4.0.15 +robotframework-selenium2library==1.7.4 +robotsuite==1.7.0 +transifex-client==0.11 +unittest2==1.1.0 +zope.testrunner==4.4.10 diff --git a/sources/requirements.txt b/sources/requirements.txt index e021c4a..80c08a4 100644 --- a/sources/requirements.txt +++ b/sources/requirements.txt @@ -1,12 +1,12 @@ -flask -flask-babel -requests -lxml -pyyaml -pygments -python-dateutil -ndg-httpsclient -pyopenssl -pyasn1 -pyasn1-modules -certifi +certifi==2015.11.20.1 +flask==0.10.1 +flask-babel==0.9 +lxml==3.5.0 +ndg-httpsclient==0.4.0 +pyasn1==0.1.9 +pyasn1-modules==0.0.8 +pygments==2.0.2 +pyopenssl==0.15.1 +python-dateutil==2.4.2 +pyyaml==3.11 +requests==2.9.1 diff --git a/sources/searx/__init__.py b/sources/searx/__init__.py index ea21e8f..7b67a39 100644 --- a/sources/searx/__init__.py +++ b/sources/searx/__init__.py @@ -15,9 +15,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' +import certifi import logging from os import environ from os.path import realpath, dirname, join, abspath +from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION try: from yaml import load except: @@ -47,4 +49,11 @@ else: logger = logging.getLogger('searx') +# Workaround for openssl versions <1.0.2 +# https://github.com/certifi/python-certifi/issues/26 +if OPENSSL_VERSION_INFO[0:3] < (1, 0, 2): + if hasattr(certifi, 'old_where'): + environ['REQUESTS_CA_BUNDLE'] = certifi.old_where() + logger.warning('You are using an old openssl version({0}), please upgrade above 1.0.2!'.format(OPENSSL_VERSION)) + logger.info('Initialisation done') diff --git a/sources/searx/autocomplete.py b/sources/searx/autocomplete.py index 264d0cc..5271040 100644 --- a/sources/searx/autocomplete.py +++ b/sources/searx/autocomplete.py @@ -110,12 +110,11 @@ def searx_bang(full_query): return list(result_set) -def dbpedia(query): +def dbpedia(query, lang): # dbpedia autocompleter, no HTTPS autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' - response = get(autocomplete_url - + urlencode(dict(QueryString=query))) + response = get(autocomplete_url + urlencode(dict(QueryString=query))) results = [] @@ -127,7 +126,7 @@ def dbpedia(query): return results -def duckduckgo(query): +def duckduckgo(query, lang): # duckduckgo autocompleter url = 'https://ac.duckduckgo.com/ac/?{0}&type=list' @@ -137,12 +136,11 @@ def duckduckgo(query): return [] -def google(query): +def google(query, lang): # google autocompleter autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' - response = get(autocomplete_url - + urlencode(dict(q=query))) + response = get(autocomplete_url + urlencode(dict(hl=lang, q=query))) results = [] @@ -153,8 +151,8 @@ def google(query): return results -def startpage(query): - # wikipedia autocompleter +def startpage(query, lang): + # startpage autocompleter url = 'https://startpage.com/do/suggest?{query}' resp = get(url.format(query=urlencode({'query': query}))).text.split('\n') @@ -163,9 +161,26 @@ def startpage(query): return [] -def wikipedia(query): +def qwant(query, lang): + # qwant autocompleter (additional parameter : lang=en_en&count=xxx ) + url = 'https://api.qwant.com/api/suggest?{query}' + + resp = get(url.format(query=urlencode({'q': query, 'lang': lang}))) + + results = [] + + if resp.ok: + data = loads(resp.text) + if data['status'] == 'success': + for item in data['data']['items']: + results.append(item['value']) + + return results + + +def wikipedia(query, lang): # wikipedia autocompleter - url = 'https://en.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json' + url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json' resp = loads(get(url.format(urlencode(dict(search=query)))).text) if len(resp) > 1: @@ -177,5 +192,6 @@ backends = {'dbpedia': dbpedia, 'duckduckgo': duckduckgo, 'google': google, 'startpage': startpage, + 'qwant': qwant, 'wikipedia': wikipedia } diff --git a/sources/searx/engines/__init__.py b/sources/searx/engines/__init__.py index 447138d..6d50667 100644 --- a/sources/searx/engines/__init__.py +++ b/sources/searx/engines/__init__.py @@ -34,6 +34,15 @@ engines = {} categories = {'general': []} engine_shortcuts = {} +engine_default_args = {'paging': False, + 'categories': ['general'], + 'language_support': True, + 'safesearch': False, + 'timeout': settings['outgoing']['request_timeout'], + 'shortcut': '-', + 'disabled': False, + 'suspend_end_time': 0, + 'continuous_errors': 0} def load_module(filename): @@ -62,26 +71,9 @@ def load_engine(engine_data): continue setattr(engine, param_name, engine_data[param_name]) - if not hasattr(engine, 'paging'): - engine.paging = False - - if not hasattr(engine, 'categories'): - engine.categories = ['general'] - - if not hasattr(engine, 'language_support'): - engine.language_support = True - - if not hasattr(engine, 'safesearch'): - engine.safesearch = False - - if not hasattr(engine, 'timeout'): - engine.timeout = settings['outgoing']['request_timeout'] - - if not hasattr(engine, 'shortcut'): - engine.shortcut = '' - - if not hasattr(engine, 'disabled'): - engine.disabled = False + for arg_name, arg_value in engine_default_args.iteritems(): + if not hasattr(engine, arg_name): + setattr(engine, arg_name, arg_value) # checking required variables for engine_attr in dir(engine): @@ -100,18 +92,15 @@ def load_engine(engine_data): 'errors': 0 } - if hasattr(engine, 'categories'): - for category_name in engine.categories: - categories.setdefault(category_name, []).append(engine) - else: - categories['general'].append(engine) + for category_name in engine.categories: + categories.setdefault(category_name, []).append(engine) + + if engine.shortcut in engine_shortcuts: + logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) + sys.exit(1) + + engine_shortcuts[engine.shortcut] = engine.name - if engine.shortcut: - if engine.shortcut in engine_shortcuts: - logger.error('Engine config error: ambigious shortcut: {0}' - .format(engine.shortcut)) - sys.exit(1) - engine_shortcuts[engine.shortcut] = engine.name return engine diff --git a/sources/searx/engines/archlinux.py b/sources/searx/engines/archlinux.py new file mode 100644 index 0000000..84e0d0f --- /dev/null +++ b/sources/searx/engines/archlinux.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +""" + Arch Linux Wiki + + @website https://wiki.archlinux.org + @provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title +""" + +from urlparse import urljoin +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['it'] +language_support = True +paging = True +base_url = 'https://wiki.archlinux.org' + +# xpath queries +xpath_results = '//ul[@class="mw-search-results"]/li' +xpath_link = './/div[@class="mw-search-result-heading"]/a' + + +# cut 'en' from 'en_US', 'de' from 'de_CH', and so on +def locale_to_lang_code(locale): + if locale.find('_') >= 0: + locale = locale.split('_')[0] + return locale + +# wikis for some languages were moved off from the main site, we need to make +# requests to correct URLs to be able to get results in those languages +lang_urls = { + 'all': { + 'base': 'https://wiki.archlinux.org', + 'search': '/index.php?title=Special:Search&offset={offset}&{query}' + }, + 'de': { + 'base': 'https://wiki.archlinux.de', + 'search': '/index.php?title=Spezial:Suche&offset={offset}&{query}' + }, + 'fr': { + 'base': 'https://wiki.archlinux.fr', + 'search': '/index.php?title=Spécial:Recherche&offset={offset}&{query}' + }, + 'ja': { + 'base': 'https://wiki.archlinuxjp.org', + 'search': '/index.php?title=特別:検索&offset={offset}&{query}' + }, + 'ro': { + 'base': 'http://wiki.archlinux.ro', + 'search': '/index.php?title=Special:Căutare&offset={offset}&{query}' + }, + 'tr': { + 'base': 'http://archtr.org/wiki', + 'search': '/index.php?title=Özel:Ara&offset={offset}&{query}' + } +} + + +# get base & search URLs for selected language +def get_lang_urls(language): + if language in lang_urls: + return lang_urls[language] + return lang_urls['all'] + +# Language names to build search requests for +# those languages which are hosted on the main site. +main_langs = { + 'ar': 'العربية', + 'bg': 'Български', + 'cs': 'Česky', + 'da': 'Dansk', + 'el': 'Ελληνικά', + 'es': 'Español', + 'he': 'עברית', + 'hr': 'Hrvatski', + 'hu': 'Magyar', + 'it': 'Italiano', + 'ko': '한국어', + 'lt': 'Lietuviškai', + 'nl': 'Nederlands', + 'pl': 'Polski', + 'pt': 'Português', + 'ru': 'Русский', + 'sl': 'Slovenský', + 'th': 'ไทย', + 'uk': 'Українська', + 'zh': '简体中文' +} + + +# do search-request +def request(query, params): + # translate the locale (e.g. 'en_US') to language code ('en') + language = locale_to_lang_code(params['language']) + + # if our language is hosted on the main site, we need to add its name + # to the query in order to narrow the results to that language + if language in main_langs: + query += '(' + main_langs[language] + ')' + + # prepare the request parameters + query = urlencode({'search': query}) + offset = (params['pageno'] - 1) * 20 + + # get request URLs for our language of choice + urls = get_lang_urls(language) + search_url = urls['base'] + urls['search'] + + params['url'] = search_url.format(query=query, offset=offset) + + return params + + +# get response from search-request +def response(resp): + # get the base URL for the language in which request was made + language = locale_to_lang_code(resp.search_params['language']) + base_url = get_lang_urls(language)['base'] + + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath(xpath_results): + link = result.xpath(xpath_link)[0] + href = urljoin(base_url, link.attrib.get('href')) + title = escape(extract_text(link)) + + results.append({'url': href, + 'title': title}) + + return results diff --git a/sources/searx/engines/base.py b/sources/searx/engines/base.py new file mode 100755 index 0000000..66491d3 --- /dev/null +++ b/sources/searx/engines/base.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +""" + BASE (Scholar publications) + + @website https://base-search.net + @provide-api yes with authorization (https://api.base-search.net/) + + @using-api yes + @results XML + @stable ? + @parse url, title, publishedDate, content + More info on api: http://base-search.net/about/download/base_interface.pdf +""" + +from lxml import etree +from urllib import urlencode +from searx.utils import searx_useragent +from cgi import escape +from datetime import datetime +import re + + +categories = ['science'] + +base_url = 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'\ + + '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}' + +# engine dependent config +paging = True +number_of_results = 10 + +# shortcuts for advanced search +shorcut_dict = { + # user-friendly keywords + 'format:': 'dcformat:', + 'author:': 'dccreator:', + 'collection:': 'dccollection:', + 'hdate:': 'dchdate:', + 'contributor:': 'dccontributor:', + 'coverage:': 'dccoverage:', + 'date:': 'dcdate:', + 'abstract:': 'dcdescription:', + 'urls:': 'dcidentifier:', + 'language:': 'dclanguage:', + 'publisher:': 'dcpublisher:', + 'relation:': 'dcrelation:', + 'rights:': 'dcrights:', + 'source:': 'dcsource:', + 'subject:': 'dcsubject:', + 'title:': 'dctitle:', + 'type:': 'dcdctype:' +} + + +def request(query, params): + # replace shortcuts with API advanced search keywords + for key in shorcut_dict.keys(): + query = re.sub(str(key), str(shorcut_dict[key]), query) + + # basic search + offset = (params['pageno'] - 1) * number_of_results + + string_args = dict(query=urlencode({'query': query}), + offset=offset, + hits=number_of_results) + + params['url'] = base_url.format(**string_args) + + params['headers']['User-Agent'] = searx_useragent() + return params + + +def response(resp): + results = [] + + search_results = etree.XML(resp.content) + + for entry in search_results.xpath('./result/doc'): + content = "No description available" + + date = datetime.now() # needed in case no dcdate is available for an item + for item in entry: + if item.attrib["name"] == "dchdate": + harvestDate = item.text + + elif item.attrib["name"] == "dcdate": + date = item.text + + elif item.attrib["name"] == "dctitle": + title = item.text + + elif item.attrib["name"] == "dclink": + url = item.text + + elif item.attrib["name"] == "dcdescription": + content = escape(item.text[:300]) + if len(item.text) > 300: + content += "..." + +# dates returned by the BASE API are not several formats + publishedDate = None + for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']: + try: + publishedDate = datetime.strptime(date, date_format) + break + except: + pass + + if publishedDate is not None: + res_dict = {'url': url, + 'title': title, + 'publishedDate': publishedDate, + 'content': content} + else: + res_dict = {'url': url, + 'title': title, + 'content': content} + + results.append(res_dict) + + return results diff --git a/sources/searx/engines/bing_images.py b/sources/searx/engines/bing_images.py index 06850df..3845203 100644 --- a/sources/searx/engines/bing_images.py +++ b/sources/searx/engines/bing_images.py @@ -17,7 +17,7 @@ from urllib import urlencode from lxml import html -from yaml import load +from json import loads import re # engine dependent config @@ -36,6 +36,9 @@ safesearch_types = {2: 'STRICT', 0: 'OFF'} +_quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U) + + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 @@ -65,22 +68,19 @@ def response(resp): dom = html.fromstring(resp.text) - # init regex for yaml-parsing - p = re.compile('({|,)([a-z]+):(")') - # parse results - for result in dom.xpath('//div[@class="dg_u"]'): + for result in dom.xpath('//div[@class="dg_u"]/div'): link = result.xpath('./a')[0] - # parse yaml-data (it is required to add a space, to make it parsable) - yaml_data = load(p.sub(r'\1\2: \3', link.attrib.get('m'))) + # parse json-data (it is required to add a space, to make it parsable) + json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('m'))) title = link.attrib.get('t1') ihk = link.attrib.get('ihk') # url = 'http://' + link.attrib.get('t3') - url = yaml_data.get('surl') - img_src = yaml_data.get('imgurl') + url = json_data.get('surl') + img_src = json_data.get('imgurl') # append result results.append({'template': 'images.html', diff --git a/sources/searx/engines/blekko_images.py b/sources/searx/engines/blekko_images.py index 93ac661..c0664f3 100644 --- a/sources/searx/engines/blekko_images.py +++ b/sources/searx/engines/blekko_images.py @@ -37,7 +37,7 @@ def request(query, params): c=c) if params['pageno'] != 1: - params['url'] += '&page={pageno}'.format(pageno=(params['pageno']-1)) + params['url'] += '&page={pageno}'.format(pageno=(params['pageno'] - 1)) # let Blekko know we wan't have profiling params['cookies']['tag_lesslogging'] = '1' diff --git a/sources/searx/engines/btdigg.py b/sources/searx/engines/btdigg.py index 192ed6e..c2b22f0 100644 --- a/sources/searx/engines/btdigg.py +++ b/sources/searx/engines/btdigg.py @@ -29,7 +29,7 @@ search_url = url + '/search?q={search_term}&p={pageno}' # do search-request def request(query, params): params['url'] = search_url.format(search_term=quote(query), - pageno=params['pageno']-1) + pageno=params['pageno'] - 1) return params diff --git a/sources/searx/engines/deviantart.py b/sources/searx/engines/deviantart.py index 60c8d7e..135aeb3 100644 --- a/sources/searx/engines/deviantart.py +++ b/sources/searx/engines/deviantart.py @@ -24,7 +24,7 @@ paging = True # search-url base_url = 'https://www.deviantart.com/' -search_url = base_url+'browse/all/?offset={offset}&{query}' +search_url = base_url + 'browse/all/?offset={offset}&{query}' # do search-request diff --git a/sources/searx/engines/digg.py b/sources/searx/engines/digg.py index 000f66b..a10b38b 100644 --- a/sources/searx/engines/digg.py +++ b/sources/searx/engines/digg.py @@ -22,7 +22,7 @@ paging = True # search-url base_url = 'https://digg.com/' -search_url = base_url+'api/search/{query}.json?position={position}&format=html' +search_url = base_url + 'api/search/{query}.json?position={position}&format=html' # specific xpath variables results_xpath = '//article' diff --git a/sources/searx/engines/doku.py b/sources/searx/engines/doku.py new file mode 100644 index 0000000..93867fd --- /dev/null +++ b/sources/searx/engines/doku.py @@ -0,0 +1,84 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://localhost:8090' +search_url = '/?do=search'\ + '&{query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'id': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/sources/searx/engines/duckduckgo.py b/sources/searx/engines/duckduckgo.py index 4ac2099..373ce1b 100644 --- a/sources/searx/engines/duckduckgo.py +++ b/sources/searx/engines/duckduckgo.py @@ -28,10 +28,10 @@ language_support = True url = 'https://duckduckgo.com/html?{query}&s={offset}' # specific xpath variables -result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa -url_xpath = './/a[@class="large"]/@href' -title_xpath = './/a[@class="large"]' -content_xpath = './/div[@class="snippet"]' +result_xpath = '//div[@class="result results_links results_links_deep web-result "]' # noqa +url_xpath = './/a[@class="result__a"]/@href' +title_xpath = './/a[@class="result__a"]' +content_xpath = './/a[@class="result__snippet"]' # do search-request diff --git a/sources/searx/engines/duckduckgo_definitions.py b/sources/searx/engines/duckduckgo_definitions.py index 793e97d..208ccca 100644 --- a/sources/searx/engines/duckduckgo_definitions.py +++ b/sources/searx/engines/duckduckgo_definitions.py @@ -1,5 +1,6 @@ import json from urllib import urlencode +from re import compile, sub from lxml import html from searx.utils import html_to_text from searx.engines.xpath import extract_text @@ -7,6 +8,8 @@ from searx.engines.xpath import extract_text url = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' +http_regex = compile(r'^http:') + def result_to_text(url, text, htmlResult): # TODO : remove result ending with "Meaning" or "Category" @@ -19,8 +22,8 @@ def result_to_text(url, text, htmlResult): def request(query, params): - # TODO add kl={locale} params['url'] = url.format(query=urlencode({'q': query})) + params['headers']['Accept-Language'] = params['language'] return params @@ -103,6 +106,10 @@ def response(resp): urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) + # to merge with wikidata's infobox + if infobox_id: + infobox_id = http_regex.sub('https:', infobox_id) + # entity entity = search_res.get('Entity', None) # TODO continent / country / department / location / waterfall / diff --git a/sources/searx/engines/faroo.py b/sources/searx/engines/faroo.py index 43df14e..9fa244e 100644 --- a/sources/searx/engines/faroo.py +++ b/sources/searx/engines/faroo.py @@ -88,7 +88,7 @@ def response(resp): for result in search_res['results']: if result['news']: # timestamp (milliseconds since 1970) - publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0) # noqa + publishedDate = datetime.datetime.fromtimestamp(result['date'] / 1000.0) # noqa # append news result results.append({'url': result['url'], diff --git a/sources/searx/engines/fdroid.py b/sources/searx/engines/fdroid.py new file mode 100644 index 0000000..0b16773 --- /dev/null +++ b/sources/searx/engines/fdroid.py @@ -0,0 +1,53 @@ +""" + F-Droid (a repository of FOSS applications for Android) + + @website https://f-droid.org/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content +""" + +from cgi import escape +from urllib import urlencode +from searx.engines.xpath import extract_text +from lxml import html + +# engine dependent config +categories = ['files'] +paging = True + +# search-url +base_url = 'https://f-droid.org/' +search_url = base_url + 'repository/browse/?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'fdfilter': query, + 'fdpage': params['pageno']}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for app in dom.xpath('//div[@id="appheader"]'): + url = app.xpath('./ancestor::a/@href')[0] + title = app.xpath('./p/span/text()')[0] + img_src = app.xpath('.//img/@src')[0] + + content = extract_text(app.xpath('./p')[0]) + content = escape(content.replace(title, '', 1).strip()) + + results.append({'url': url, + 'title': title, + 'content': content, + 'img_src': img_src}) + + return results diff --git a/sources/searx/engines/frinkiac.py b/sources/searx/engines/frinkiac.py new file mode 100644 index 0000000..a9383f8 --- /dev/null +++ b/sources/searx/engines/frinkiac.py @@ -0,0 +1,44 @@ +""" +Frinkiac (Images) + +@website https://www.frinkiac.com +@provide-api no +@using-api no +@results JSON +@stable no +@parse url, title, img_src +""" + +from json import loads +from urllib import urlencode + +categories = ['images'] + +BASE = 'https://frinkiac.com/' +SEARCH_URL = '{base}api/search?{query}' +RESULT_URL = '{base}?{query}' +THUMB_URL = '{base}img/{episode}/{timestamp}/medium.jpg' +IMAGE_URL = '{base}img/{episode}/{timestamp}.jpg' + + +def request(query, params): + params['url'] = SEARCH_URL.format(base=BASE, query=urlencode({'q': query})) + return params + + +def response(resp): + results = [] + response_data = loads(resp.text) + for result in response_data: + episode = result['Episode'] + timestamp = result['Timestamp'] + + results.append({'template': 'images.html', + 'url': RESULT_URL.format(base=BASE, + query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})), + 'title': episode, + 'content': '', + 'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp), + 'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp)}) + + return results diff --git a/sources/searx/engines/gigablast.py b/sources/searx/engines/gigablast.py index 3fef102..1cc2431 100644 --- a/sources/searx/engines/gigablast.py +++ b/sources/searx/engines/gigablast.py @@ -10,20 +10,30 @@ @parse url, title, content """ -from urllib import urlencode from cgi import escape -from lxml import etree +from json import loads from random import randint from time import time +from urllib import urlencode # engine dependent config categories = ['general'] paging = True -number_of_results = 5 +number_of_results = 10 +language_support = True +safesearch = True -# search-url, invalid HTTPS certificate +# search-url base_url = 'https://gigablast.com/' -search_string = 'search?{query}&n={number_of_results}&s={offset}&format=xml&qh=0&rxiyd={rxiyd}&rand={rand}' +search_string = 'search?{query}'\ + '&n={number_of_results}'\ + '&c=main'\ + '&s={offset}'\ + '&format=json'\ + '&qh=0'\ + '&rxiwd={rxiwd}'\ + '&qlang={lang}'\ + '&ff={safesearch}' # specific xpath variables results_xpath = '//response//result' @@ -36,12 +46,23 @@ content_xpath = './/sum' def request(query, params): offset = (params['pageno'] - 1) * number_of_results - search_path = search_string.format( - query=urlencode({'q': query}), - offset=offset, - number_of_results=number_of_results, - rxiyd=randint(10000, 10000000), - rand=int(time())) + if params['language'] == 'all': + language = 'xx' + else: + language = params['language'][0:2] + + if params['safesearch'] >= 1: + safesearch = 1 + else: + safesearch = 0 + + search_path = search_string.format(query=urlencode({'q': query}), + offset=offset, + number_of_results=number_of_results, + rxiwd=1, + # rand=int(time()), + lang=language, + safesearch=safesearch) params['url'] = base_url + search_path @@ -52,18 +73,14 @@ def request(query, params): def response(resp): results = [] - dom = etree.fromstring(resp.content) - # parse results - for result in dom.xpath(results_xpath): - url = result.xpath(url_xpath)[0].text - title = result.xpath(title_xpath)[0].text - content = escape(result.xpath(content_xpath)[0].text) + response_json = loads(resp.text) + for result in response_json['results']: # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': result['url'], + 'title': escape(result['title']), + 'content': escape(result['sum'])}) # return results return results diff --git a/sources/searx/engines/google.py b/sources/searx/engines/google.py index e822603..6018ad1 100644 --- a/sources/searx/engines/google.py +++ b/sources/searx/engines/google.py @@ -46,11 +46,11 @@ country_to_hostname = { 'NZ': 'www.google.co.nz', # New Zealand 'PH': 'www.google.com.ph', # Philippines 'SG': 'www.google.com.sg', # Singapore - # 'US': 'www.google.us', # United State, redirect to .com + # 'US': 'www.google.us', # United States, redirect to .com 'ZA': 'www.google.co.za', # South Africa 'AR': 'www.google.com.ar', # Argentina 'CL': 'www.google.cl', # Chile - 'ES': 'www.google.es', # Span + 'ES': 'www.google.es', # Spain 'MX': 'www.google.com.mx', # Mexico 'EE': 'www.google.ee', # Estonia 'FI': 'www.google.fi', # Finland @@ -61,7 +61,7 @@ country_to_hostname = { 'HU': 'www.google.hu', # Hungary 'IT': 'www.google.it', # Italy 'JP': 'www.google.co.jp', # Japan - 'KR': 'www.google.co.kr', # South Korean + 'KR': 'www.google.co.kr', # South Korea 'LT': 'www.google.lt', # Lithuania 'LV': 'www.google.lv', # Latvia 'NO': 'www.google.no', # Norway @@ -76,9 +76,9 @@ country_to_hostname = { 'SE': 'www.google.se', # Sweden 'TH': 'www.google.co.th', # Thailand 'TR': 'www.google.com.tr', # Turkey - 'UA': 'www.google.com.ua', # Ikraine - # 'CN': 'www.google.cn', # China, only from china ? - 'HK': 'www.google.com.hk', # Hong kong + 'UA': 'www.google.com.ua', # Ukraine + # 'CN': 'www.google.cn', # China, only from China ? + 'HK': 'www.google.com.hk', # Hong Kong 'TW': 'www.google.com.tw' # Taiwan } @@ -90,7 +90,7 @@ url_map = 'https://www.openstreetmap.org/'\ search_path = '/search' search_url = ('https://{hostname}' + search_path + - '?{query}&start={offset}&gbv=1&gws_rd=cr') + '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&ei=x') # other URLs map_hostname_start = 'maps.google.' @@ -99,7 +99,7 @@ redirect_path = '/url' images_path = '/images' # specific xpath variables -results_xpath = '//li[@class="g"]' +results_xpath = '//div[@class="g"]' url_xpath = './/h3/a/@href' title_xpath = './/h3' content_xpath = './/span[@class="st"]' @@ -160,6 +160,7 @@ def request(query, params): if params['language'] == 'all': language = 'en' country = 'US' + url_lang = '' else: language_array = params['language'].lower().split('_') if len(language_array) == 2: @@ -167,6 +168,7 @@ def request(query, params): else: country = 'US' language = language_array[0] + ',' + language_array[0] + '-' + country + url_lang = 'lang_' + language_array[0] if use_locale_domain: google_hostname = country_to_hostname.get(country.upper(), default_hostname) @@ -175,7 +177,8 @@ def request(query, params): params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), - hostname=google_hostname) + hostname=google_hostname, + lang=url_lang) params['headers']['Accept-Language'] = language params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' @@ -209,29 +212,29 @@ def response(resp): parsed_url = urlparse(url, google_hostname) # map result - if ((parsed_url.netloc == google_hostname and parsed_url.path.startswith(maps_path)) - or (parsed_url.netloc.startswith(map_hostname_start))): - x = result.xpath(map_near) - if len(x) > 0: - # map : near the location - results = results + parse_map_near(parsed_url, x, google_hostname) - else: - # map : detail about a location - results = results + parse_map_detail(parsed_url, result, google_hostname) + if parsed_url.netloc == google_hostname: + # TODO fix inside links + continue + # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start): + # print "yooooo"*30 + # x = result.xpath(map_near) + # if len(x) > 0: + # # map : near the location + # results = results + parse_map_near(parsed_url, x, google_hostname) + # else: + # # map : detail about a location + # results = results + parse_map_detail(parsed_url, result, google_hostname) + # # google news + # elif parsed_url.path == search_path: + # # skipping news results + # pass - # google news - elif (parsed_url.netloc == google_hostname - and parsed_url.path == search_path): - # skipping news results - pass - - # images result - elif (parsed_url.netloc == google_hostname - and parsed_url.path == images_path): - # only thumbnail image provided, - # so skipping image results - # results = results + parse_images(result, google_hostname) - pass + # # images result + # elif parsed_url.path == images_path: + # # only thumbnail image provided, + # # so skipping image results + # # results = results + parse_images(result, google_hostname) + # pass else: # normal result diff --git a/sources/searx/engines/google_images.py b/sources/searx/engines/google_images.py index 9d51428..efe4681 100644 --- a/sources/searx/engines/google_images.py +++ b/sources/searx/engines/google_images.py @@ -49,8 +49,6 @@ def response(resp): # parse results for result in dom.xpath('//div[@data-ved]'): - data_url = result.xpath('./a/@href')[0] - data_query = {k: v[0] for k, v in parse_qs(data_url.split('?', 1)[1]).iteritems()} metadata = loads(result.xpath('./div[@class="rg_meta"]/text()')[0]) @@ -60,11 +58,11 @@ def response(resp): thumbnail_src = thumbnail_src.replace("http://", "https://") # append result - results.append({'url': data_query['imgrefurl'], + results.append({'url': metadata['ru'], 'title': metadata['pt'], 'content': metadata['s'], - 'thumbnail_src': metadata['tu'], - 'img_src': data_query['imgurl'], + 'thumbnail_src': thumbnail_src, + 'img_src': metadata['ou'], 'template': 'images.html'}) # return results diff --git a/sources/searx/engines/mediawiki.py b/sources/searx/engines/mediawiki.py index 9fb72e8..26d3720 100644 --- a/sources/searx/engines/mediawiki.py +++ b/sources/searx/engines/mediawiki.py @@ -24,13 +24,13 @@ number_of_results = 1 # search-url base_url = 'https://{language}.wikipedia.org/' -search_url = base_url + 'w/api.php?action=query'\ - '&list=search'\ - '&{query}'\ - '&srprop=timestamp'\ - '&format=json'\ - '&sroffset={offset}'\ - '&srlimit={limit}' # noqa +search_postfix = 'w/api.php?action=query'\ + '&list=search'\ + '&{query}'\ + '&format=json'\ + '&sroffset={offset}'\ + '&srlimit={limit}'\ + '&srwhat=nearmatch' # search for a near match in the title # do search-request @@ -48,12 +48,15 @@ def request(query, params): else: language = params['language'].split('_')[0] - if len(format_strings) > 1: + # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] + if any(x[1] == 'language' for x in format_strings): string_args['language'] = language # write search-language back to params, required in response params['language'] = language + search_url = base_url + search_postfix + params['url'] = search_url.format(**string_args) return params @@ -71,6 +74,8 @@ def response(resp): # parse results for result in search_results['query']['search']: + if result.get('snippet', '').startswith('#REDIRECT'): + continue url = base_url.format(language=resp.search_params['language']) +\ 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')) diff --git a/sources/searx/engines/nyaa.py b/sources/searx/engines/nyaa.py new file mode 100644 index 0000000..cda8231 --- /dev/null +++ b/sources/searx/engines/nyaa.py @@ -0,0 +1,119 @@ +""" + Nyaa.se (Anime Bittorrent tracker) + + @website http://www.nyaa.se/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, content, seed, leech, torrentfile +""" + +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['files', 'images', 'videos', 'music'] +paging = True + +# search-url +base_url = 'http://www.nyaa.se/' +search_url = base_url + '?page=search&{query}&offset={offset}' + +# xpath queries +xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' +xpath_category = './/td[@class="tlisticon"]/a' +xpath_title = './/td[@class="tlistname"]/a' +xpath_torrent_file = './/td[@class="tlistdownload"]/a' +xpath_filesize = './/td[@class="tlistsize"]/text()' +xpath_seeds = './/td[@class="tlistsn"]/text()' +xpath_leeches = './/td[@class="tlistln"]/text()' +xpath_downloads = './/td[@class="tlistdn"]/text()' + + +# convert a variable to integer or return 0 if it's not a number +def int_or_zero(num): + if isinstance(num, list): + if len(num) < 1: + return 0 + num = num[0] + if num.isdigit(): + return int(num) + return 0 + + +# get multiplier to convert torrent size to bytes +def get_filesize_mul(suffix): + return { + 'KB': 1024, + 'MB': 1024 ** 2, + 'GB': 1024 ** 3, + 'TB': 1024 ** 4, + + 'KIB': 1024, + 'MIB': 1024 ** 2, + 'GIB': 1024 ** 3, + 'TIB': 1024 ** 4 + }[str(suffix).upper()] + + +# do search-request +def request(query, params): + query = urlencode({'term': query}) + params['url'] = search_url.format(query=query, offset=params['pageno']) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath(xpath_results): + # category in which our torrent belongs + category = result.xpath(xpath_category)[0].attrib.get('title') + + # torrent title + page_a = result.xpath(xpath_title)[0] + title = escape(extract_text(page_a)) + + # link to the page + href = page_a.attrib.get('href') + + # link to the torrent file + torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') + + # torrent size + try: + file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') + file_size = int(float(file_size) * get_filesize_mul(suffix)) + except Exception as e: + file_size = None + + # seed count + seed = int_or_zero(result.xpath(xpath_seeds)) + + # leech count + leech = int_or_zero(result.xpath(xpath_leeches)) + + # torrent downloads count + downloads = int_or_zero(result.xpath(xpath_downloads)) + + # content string contains all information not included into template + content = 'Category: "{category}". Downloaded {downloads} times.' + content = content.format(category=category, downloads=downloads) + content = escape(content) + + results.append({'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': file_size, + 'torrentfile': torrent_link, + 'template': 'torrent.html'}) + + return results diff --git a/sources/searx/engines/reddit.py b/sources/searx/engines/reddit.py new file mode 100644 index 0000000..3ca7e44 --- /dev/null +++ b/sources/searx/engines/reddit.py @@ -0,0 +1,79 @@ +""" + Reddit + + @website https://www.reddit.com/ + @provide-api yes (https://www.reddit.com/dev/api) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, thumbnail, publishedDate +""" + +import json +from cgi import escape +from urllib import urlencode +from urlparse import urlparse, urljoin +from datetime import datetime + +# engine dependent config +categories = ['general', 'images', 'news', 'social media'] +page_size = 25 + +# search-url +base_url = 'https://www.reddit.com/' +search_url = base_url + 'search.json?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'q': query, + 'limit': page_size}) + params['url'] = search_url.format(query=query) + + return params + + +# get response from search-request +def response(resp): + img_results = [] + text_results = [] + + search_results = json.loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + posts = search_results.get('data', {}).get('children', []) + + # process results + for post in posts: + data = post['data'] + + # extract post information + params = { + 'url': urljoin(base_url, data['permalink']), + 'title': data['title'] + } + + # if thumbnail field contains a valid URL, we need to change template + thumbnail = data['thumbnail'] + url_info = urlparse(thumbnail) + # netloc & path + if url_info[1] != '' and url_info[2] != '': + params['img_src'] = data['url'] + params['thumbnail_src'] = thumbnail + params['template'] = 'images.html' + img_results.append(params) + else: + created = datetime.fromtimestamp(data['created_utc']) + content = escape(data['selftext']) + if len(content) > 500: + content = content[:500] + '...' + params['content'] = content + params['publishedDate'] = created + text_results.append(params) + + # show images first and text results second + return img_results + text_results diff --git a/sources/searx/engines/searchcode_code.py b/sources/searx/engines/searchcode_code.py index bd5eb71..de8cd43 100644 --- a/sources/searx/engines/searchcode_code.py +++ b/sources/searx/engines/searchcode_code.py @@ -20,7 +20,7 @@ paging = True # search-url url = 'https://searchcode.com/' -search_url = url+'api/codesearch_I/?{query}&p={pageno}' +search_url = url + 'api/codesearch_I/?{query}&p={pageno}' # special code-endings which are not recognised by the file ending code_endings = {'cs': 'c#', @@ -32,7 +32,7 @@ code_endings = {'cs': 'c#', # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), - pageno=params['pageno']-1) + pageno=params['pageno'] - 1) # Disable SSL verification # error: (60) SSL certificate problem: unable to get local issuer diff --git a/sources/searx/engines/searchcode_doc.py b/sources/searx/engines/searchcode_doc.py index 9453f31..f24fe6f 100644 --- a/sources/searx/engines/searchcode_doc.py +++ b/sources/searx/engines/searchcode_doc.py @@ -19,13 +19,13 @@ paging = True # search-url url = 'https://searchcode.com/' -search_url = url+'api/search_IV/?{query}&p={pageno}' +search_url = url + 'api/search_IV/?{query}&p={pageno}' # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), - pageno=params['pageno']-1) + pageno=params['pageno'] - 1) # Disable SSL verification # error: (60) SSL certificate problem: unable to get local issuer diff --git a/sources/searx/engines/soundcloud.py b/sources/searx/engines/soundcloud.py index 46e17fc..ac23c1e 100644 --- a/sources/searx/engines/soundcloud.py +++ b/sources/searx/engines/soundcloud.py @@ -10,17 +10,19 @@ @parse url, title, content, publishedDate, embedded """ +import re +from StringIO import StringIO from json import loads +from lxml import etree from urllib import urlencode, quote_plus from dateutil import parser +from searx import logger +from searx.poolrequests import get as http_get # engine dependent config categories = ['music'] paging = True -# api-key -guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28' - # search-url url = 'https://api.soundcloud.com/' search_url = url + 'search?{query}'\ @@ -35,6 +37,30 @@ embedded_url = '' +def get_client_id(): + response = http_get("https://soundcloud.com") + rx_namespace = {"re": "http://exslt.org/regular-expressions"} + + if response.ok: + tree = etree.parse(StringIO(response.content), etree.HTMLParser()) + script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) + app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] + + # extracts valid app_js urls from soundcloud.com content + for app_js_url in app_js_urls: + # gets app_js and searches for the clientid + response = http_get(app_js_url) + if response.ok: + cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) + if cids is not None and len(cids.groups()): + return cids.groups()[0] + logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") + return "" + +# api-key +guest_client_id = get_client_id() + + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 20 diff --git a/sources/searx/engines/stackoverflow.py b/sources/searx/engines/stackoverflow.py index 34ecaba..fdd3711 100644 --- a/sources/searx/engines/stackoverflow.py +++ b/sources/searx/engines/stackoverflow.py @@ -22,7 +22,7 @@ paging = True # search-url url = 'https://stackoverflow.com/' -search_url = url+'search?{query}&page={pageno}' +search_url = url + 'search?{query}&page={pageno}' # specific xpath variables results_xpath = '//div[contains(@class,"question-summary")]' diff --git a/sources/searx/engines/startpage.py b/sources/searx/engines/startpage.py index a91cafa..52dd0b9 100644 --- a/sources/searx/engines/startpage.py +++ b/sources/searx/engines/startpage.py @@ -90,8 +90,8 @@ def response(resp): # check if search result starts with something like: "2 Sep 2014 ... " if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): - date_pos = content.find('...')+4 - date_string = content[0:date_pos-5] + date_pos = content.find('...') + 4 + date_string = content[0:date_pos - 5] published_date = parser.parse(date_string, dayfirst=True) # fix content string @@ -99,8 +99,8 @@ def response(resp): # check if search result starts with something like: "5 days ago ... " elif re.match("^[0-9]+ days? ago \.\.\. ", content): - date_pos = content.find('...')+4 - date_string = content[0:date_pos-5] + date_pos = content.find('...') + 4 + date_string = content[0:date_pos - 5] # calculate datetime published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) diff --git a/sources/searx/engines/swisscows.py b/sources/searx/engines/swisscows.py index 2d31264..864436a 100644 --- a/sources/searx/engines/swisscows.py +++ b/sources/searx/engines/swisscows.py @@ -10,6 +10,7 @@ @parse url, title, content """ +from cgi import escape from json import loads from urllib import urlencode, unquote import re @@ -77,7 +78,7 @@ def response(resp): # append result results.append({'url': result['SourceUrl'], - 'title': result['Title'], + 'title': escape(result['Title']), 'content': '', 'img_src': img_url, 'template': 'images.html'}) @@ -89,8 +90,8 @@ def response(resp): # append result results.append({'url': result_url, - 'title': result_title, - 'content': result_content}) + 'title': escape(result_title), + 'content': escape(result_content)}) # parse images for result in json.get('Images', []): @@ -99,7 +100,7 @@ def response(resp): # append result results.append({'url': result['SourceUrl'], - 'title': result['Title'], + 'title': escape(result['Title']), 'content': '', 'img_src': img_url, 'template': 'images.html'}) diff --git a/sources/searx/engines/tokyotoshokan.py b/sources/searx/engines/tokyotoshokan.py new file mode 100644 index 0000000..17e8e21 --- /dev/null +++ b/sources/searx/engines/tokyotoshokan.py @@ -0,0 +1,102 @@ +""" + Tokyo Toshokan (A BitTorrent Library for Japanese Media) + + @website https://www.tokyotosho.info/ + @provide-api no + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, publishedDate, seed, leech, + filesize, magnetlink, content +""" + +import re +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text +from datetime import datetime +from searx.engines.nyaa import int_or_zero, get_filesize_mul + +# engine dependent config +categories = ['files', 'videos', 'music'] +paging = True + +# search-url +base_url = 'https://www.tokyotosho.info/' +search_url = base_url + 'search.php?{query}' + + +# do search-request +def request(query, params): + query = urlencode({'page': params['pageno'], + 'terms': query}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + rows = dom.xpath('//table[@class="listing"]//tr[contains(@class, "category_0")]') + + # check if there are no results or page layout was changed so we cannot parse it + # currently there are two rows for each result, so total count must be even + if len(rows) == 0 or len(rows) % 2 != 0: + return [] + + # regular expression for parsing torrent size strings + size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) + + # processing the results, two rows at a time + for i in xrange(0, len(rows), 2): + # parse the first row + name_row = rows[i] + + links = name_row.xpath('./td[@class="desc-top"]/a') + params = { + 'template': 'torrent.html', + 'url': links[-1].attrib.get('href'), + 'title': extract_text(links[-1]) + } + # I have not yet seen any torrents without magnet links, but + # it's better to be prepared to stumble upon one some day + if len(links) == 2: + magnet = links[0].attrib.get('href') + if magnet.startswith('magnet'): + # okay, we have a valid magnet link, let's add it to the result + params['magnetlink'] = magnet + + # no more info in the first row, start parsing the second one + info_row = rows[i + 1] + desc = extract_text(info_row.xpath('./td[@class="desc-bot"]')[0]) + for item in desc.split('|'): + item = item.strip() + if item.startswith('Size:'): + try: + # ('1.228', 'GB') + groups = size_re.match(item).groups() + multiplier = get_filesize_mul(groups[1]) + params['filesize'] = int(multiplier * float(groups[0])) + except Exception as e: + pass + elif item.startswith('Date:'): + try: + # Date: 2016-02-21 21:44 UTC + date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC') + params['publishedDate'] = date + except Exception as e: + pass + elif item.startswith('Comment:'): + params['content'] = item + stats = info_row.xpath('./td[@class="stats"]/span') + # has the layout not changed yet? + if len(stats) == 3: + params['seed'] = int_or_zero(extract_text(stats[0])) + params['leech'] = int_or_zero(extract_text(stats[1])) + + results.append(params) + + return results diff --git a/sources/searx/engines/torrentz.py b/sources/searx/engines/torrentz.py new file mode 100644 index 0000000..92fbe70 --- /dev/null +++ b/sources/searx/engines/torrentz.py @@ -0,0 +1,93 @@ +""" + Torrentz.eu (BitTorrent meta-search engine) + + @website https://torrentz.eu/ + @provide-api no + + @using-api no + @results HTML + @stable no (HTML can change, although unlikely, + see https://torrentz.eu/torrentz.btsearch) + @parse url, title, publishedDate, seed, leech, filesize, magnetlink +""" + +import re +from cgi import escape +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text +from datetime import datetime +from searx.engines.nyaa import int_or_zero, get_filesize_mul + +# engine dependent config +categories = ['files', 'videos', 'music'] +paging = True + +# search-url +# https://torrentz.eu/search?f=EXAMPLE&p=6 +base_url = 'https://torrentz.eu/' +search_url = base_url + 'search?{query}' + + +# do search-request +def request(query, params): + page = params['pageno'] - 1 + query = urlencode({'q': query, 'p': page}) + params['url'] = search_url.format(query=query) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath('//div[@class="results"]/dl'): + name_cell = result.xpath('./dt')[0] + title = extract_text(name_cell) + + # skip rows that do not contain a link to a torrent + links = name_cell.xpath('./a') + if len(links) != 1: + continue + + # extract url and remove a slash in the beginning + link = links[0].attrib.get('href').lstrip('/') + + seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '') + leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '') + + params = { + 'url': base_url + link, + 'title': title, + 'seed': int_or_zero(seed), + 'leech': int_or_zero(leech), + 'template': 'torrent.html' + } + + # let's try to calculate the torrent size + try: + size_str = result.xpath('./dd/span[@class="s"]/text()')[0] + size, suffix = size_str.split() + params['filesize'] = int(size) * get_filesize_mul(suffix) + except Exception as e: + pass + + # does our link contain a valid SHA1 sum? + if re.compile('[0-9a-fA-F]{40}').match(link): + # add a magnet link to the result + params['magnetlink'] = 'magnet:?xt=urn:btih:' + link + + # extract and convert creation date + try: + date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title') + # Fri, 25 Mar 2016 16:29:01 + date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S') + params['publishedDate'] = date + except Exception as e: + pass + + results.append(params) + + return results diff --git a/sources/searx/engines/wikidata.py b/sources/searx/engines/wikidata.py index fc840d4..8aa2fcd 100644 --- a/sources/searx/engines/wikidata.py +++ b/sources/searx/engines/wikidata.py @@ -86,15 +86,15 @@ def getDetail(jsonresponse, wikidata_id, language, locale): results.append({'title': title, 'url': official_website}) wikipedia_link_count = 0 - if language != 'en': - wikipedia_link_count += add_url(urls, - 'Wikipedia (' + language + ')', - get_wikilink(result, language + - 'wiki')) - wikipedia_en_link = get_wikilink(result, 'enwiki') + wikipedia_link = get_wikilink(result, language + 'wiki') wikipedia_link_count += add_url(urls, - 'Wikipedia (en)', - wikipedia_en_link) + 'Wikipedia (' + language + ')', + wikipedia_link) + if language != 'en': + wikipedia_en_link = get_wikilink(result, 'enwiki') + wikipedia_link_count += add_url(urls, + 'Wikipedia (en)', + wikipedia_en_link) if wikipedia_link_count == 0: misc_language = get_wiki_firstlanguage(result, 'wiki') if misc_language is not None: @@ -188,7 +188,7 @@ def getDetail(jsonresponse, wikidata_id, language, locale): else: results.append({ 'infobox': title, - 'id': wikipedia_en_link, + 'id': wikipedia_link, 'content': description, 'attributes': attributes, 'urls': urls @@ -295,7 +295,7 @@ def get_geolink(claims, propertyName, defaultValue=''): if precision < 0.0003: zoom = 19 else: - zoom = int(15 - precision*8.8322 + precision*precision*0.625447) + zoom = int(15 - precision * 8.8322 + precision * precision * 0.625447) url = url_map\ .replace('{latitude}', str(value.get('latitude', 0)))\ @@ -318,6 +318,6 @@ def get_wikilink(result, wikiid): def get_wiki_firstlanguage(result, wikipatternid): for k in result.get('sitelinks', {}).keys(): - if k.endswith(wikipatternid) and len(k) == (2+len(wikipatternid)): + if k.endswith(wikipatternid) and len(k) == (2 + len(wikipatternid)): return k[0:2] return None diff --git a/sources/searx/engines/wikipedia.py b/sources/searx/engines/wikipedia.py new file mode 100644 index 0000000..fed7b26 --- /dev/null +++ b/sources/searx/engines/wikipedia.py @@ -0,0 +1,114 @@ +""" + Wikipedia (Web) + + @website https://{language}.wikipedia.org + @provide-api yes + + @using-api yes + @results JSON + @stable yes + @parse url, infobox +""" + +from json import loads +from urllib import urlencode, quote + +# search-url +base_url = 'https://{language}.wikipedia.org/' +search_postfix = 'w/api.php?'\ + 'action=query'\ + '&format=json'\ + '&{query}'\ + '&prop=extracts|pageimages'\ + '&exintro'\ + '&explaintext'\ + '&pithumbsize=300'\ + '&redirects' + + +# set language in base_url +def url_lang(lang): + if lang == 'all': + language = 'en' + else: + language = lang.split('_')[0] + + return base_url.format(language=language) + + +# do search-request +def request(query, params): + if query.islower(): + query += '|' + query.title() + + params['url'] = url_lang(params['language']) \ + + search_postfix.format(query=urlencode({'titles': query})) + + return params + + +# get first meaningful paragraph +# this should filter out disambiguation pages and notes above first paragraph +# "magic numbers" were obtained by fine tuning +def extract_first_paragraph(content, title, image): + first_paragraph = None + + failed_attempts = 0 + for paragraph in content.split('\n'): + + starts_with_title = paragraph.lower().find(title.lower(), 0, len(title) + 35) + length = len(paragraph) + + if length >= 200 or (starts_with_title >= 0 and (image or length >= 150)): + first_paragraph = paragraph + break + + failed_attempts += 1 + if failed_attempts > 3: + return None + + return first_paragraph + + +# get response from search-request +def response(resp): + results = [] + + search_result = loads(resp.content) + + # wikipedia article's unique id + # first valid id is assumed to be the requested article + for article_id in search_result['query']['pages']: + page = search_result['query']['pages'][article_id] + if int(article_id) > 0: + break + + if int(article_id) < 0: + return [] + + title = page.get('title') + + image = page.get('thumbnail') + if image: + image = image.get('source') + + extract = page.get('extract') + + summary = extract_first_paragraph(extract, title, image) + if not summary: + return [] + + # link to wikipedia article + # parenthesis are not quoted to make infobox mergeable with wikidata's + wikipedia_link = url_lang(resp.search_params['language']) \ + + 'wiki/' + quote(title.replace(' ', '_').encode('utf8')).replace('%28', '(').replace('%29', ')') + + results.append({'url': wikipedia_link, 'title': title}) + + results.append({'infobox': title, + 'id': wikipedia_link, + 'content': summary, + 'img_src': image, + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) + + return results diff --git a/sources/searx/engines/wolframalpha_api.py b/sources/searx/engines/wolframalpha_api.py new file mode 100644 index 0000000..4526c82 --- /dev/null +++ b/sources/searx/engines/wolframalpha_api.py @@ -0,0 +1,122 @@ +# Wolfram Alpha (Science) +# +# @website https://www.wolframalpha.com +# @provide-api yes (https://api.wolframalpha.com/v2/) +# +# @using-api yes +# @results XML +# @stable yes +# @parse url, infobox + +from urllib import urlencode +from lxml import etree + +# search-url +search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' +site_url = 'https://www.wolframalpha.com/input/?{query}' +api_key = '' # defined in settings.yml + +# xpath variables +failure_xpath = '/queryresult[attribute::success="false"]' +answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' +input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext' +pods_xpath = '//pod' +subpods_xpath = './subpod' +pod_id_xpath = './@id' +pod_title_xpath = './@title' +plaintext_xpath = './plaintext' +image_xpath = './img' +img_src_xpath = './@src' +img_alt_xpath = './@alt' + +# pods to display as image in infobox +# this pods do return a plaintext, but they look better and are more useful as images +image_pods = {'VisualRepresentation', + 'Illustration'} + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'input': query}), + api_key=api_key) + params['headers']['Referer'] = site_url.format(query=urlencode({'i': query})) + + return params + + +# replace private user area characters to make text legible +def replace_pua_chars(text): + pua_chars = {u'\uf522': u'\u2192', # rigth arrow + u'\uf7b1': u'\u2115', # set of natural numbers + u'\uf7b4': u'\u211a', # set of rational numbers + u'\uf7b5': u'\u211d', # set of real numbers + u'\uf7bd': u'\u2124', # set of integer numbers + u'\uf74c': 'd', # differential + u'\uf74d': u'\u212f', # euler's number + u'\uf74e': 'i', # imaginary number + u'\uf7d9': '='} # equals sign + + for k, v in pua_chars.iteritems(): + text = text.replace(k, v) + + return text + + +# get response from search-request +def response(resp): + results = [] + + search_results = etree.XML(resp.content) + + # return empty array if there are no results + if search_results.xpath(failure_xpath): + return [] + + try: + infobox_title = search_results.xpath(input_xpath)[0].text + except: + infobox_title = None + + pods = search_results.xpath(pods_xpath) + result_chunks = [] + for pod in pods: + pod_id = pod.xpath(pod_id_xpath)[0] + pod_title = pod.xpath(pod_title_xpath)[0] + + subpods = pod.xpath(subpods_xpath) + if not subpods: + continue + + # Appends either a text or an image, depending on which one is more suitable + for subpod in subpods: + content = subpod.xpath(plaintext_xpath)[0].text + image = subpod.xpath(image_xpath) + + if content and pod_id not in image_pods: + + # if no input pod was found, title is first plaintext pod + if not infobox_title: + infobox_title = content + + content = replace_pua_chars(content) + result_chunks.append({'label': pod_title, 'value': content}) + + elif image: + result_chunks.append({'label': pod_title, + 'image': {'src': image[0].xpath(img_src_xpath)[0], + 'alt': image[0].xpath(img_alt_xpath)[0]}}) + + if not result_chunks: + return [] + + # append infobox + results.append({'infobox': infobox_title, + 'attributes': result_chunks, + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) + + # append link to site + results.append({'url': resp.request.headers['Referer'].decode('utf8'), + 'title': 'Wolfram|Alpha', + 'content': infobox_title}) + + return results diff --git a/sources/searx/engines/wolframalpha_noapi.py b/sources/searx/engines/wolframalpha_noapi.py new file mode 100644 index 0000000..59629b8 --- /dev/null +++ b/sources/searx/engines/wolframalpha_noapi.py @@ -0,0 +1,116 @@ +# Wolfram|Alpha (Science) +# +# @website https://www.wolframalpha.com/ +# @provide-api yes (https://api.wolframalpha.com/v2/) +# +# @using-api no +# @results JSON +# @stable no +# @parse url, infobox + +from cgi import escape +from json import loads +from time import time +from urllib import urlencode +from lxml.etree import XML + +from searx.poolrequests import get as http_get + +# search-url +url = 'https://www.wolframalpha.com/' + +search_url = url + 'input/json.jsp'\ + '?async=false'\ + '&banners=raw'\ + '&debuggingdata=false'\ + '&format=image,plaintext,imagemap,minput,moutput'\ + '&formattimeout=2'\ + '&{query}'\ + '&output=JSON'\ + '&parsetimeout=2'\ + '&proxycode={token}'\ + '&scantimeout=0.5'\ + '&sponsorcategories=true'\ + '&statemethod=deploybutton' + +referer_url = url + 'input/?{query}' + +token = {'value': '', + 'last_updated': None} + +# pods to display as image in infobox +# this pods do return a plaintext, but they look better and are more useful as images +image_pods = {'VisualRepresentation', + 'Illustration', + 'Symbol'} + + +# seems, wolframalpha resets its token in every hour +def obtain_token(): + update_time = time() - (time() % 3600) + try: + token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0) + token['value'] = loads(token_response.text)['code'] + token['last_updated'] = update_time + except: + pass + return token + + +obtain_token() + + +# do search-request +def request(query, params): + # obtain token if last update was more than an hour + if time() - token['last_updated'] > 3600: + obtain_token() + params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value']) + params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + resp_json = loads(resp.text) + + if not resp_json['queryresult']['success']: + return [] + + # TODO handle resp_json['queryresult']['assumptions'] + result_chunks = [] + infobox_title = None + for pod in resp_json['queryresult']['pods']: + pod_id = pod.get('id', '') + pod_title = pod.get('title', '') + + if 'subpods' not in pod: + continue + + if pod_id == 'Input' or not infobox_title: + infobox_title = pod['subpods'][0]['plaintext'] + + for subpod in pod['subpods']: + if subpod['plaintext'] != '' and pod_id not in image_pods: + # append unless it's not an actual answer + if subpod['plaintext'] != '(requires interactivity)': + result_chunks.append({'label': pod_title, 'value': subpod['plaintext']}) + + elif 'img' in subpod: + result_chunks.append({'label': pod_title, 'image': subpod['img']}) + + if not result_chunks: + return [] + + results.append({'infobox': infobox_title, + 'attributes': result_chunks, + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) + + results.append({'url': resp.request.headers['Referer'].decode('utf8'), + 'title': 'Wolfram|Alpha', + 'content': infobox_title}) + + return results diff --git a/sources/searx/engines/www1x.py b/sources/searx/engines/www1x.py index ddb79bf..1269a54 100644 --- a/sources/searx/engines/www1x.py +++ b/sources/searx/engines/www1x.py @@ -22,7 +22,7 @@ paging = False # search-url base_url = 'https://1x.com' -search_url = base_url+'/backend/search.php?{query}' +search_url = base_url + '/backend/search.php?{query}' # do search-request diff --git a/sources/searx/engines/xpath.py b/sources/searx/engines/xpath.py index 1a599dc..e701c02 100644 --- a/sources/searx/engines/xpath.py +++ b/sources/searx/engines/xpath.py @@ -11,6 +11,14 @@ title_xpath = None suggestion_xpath = '' results_xpath = '' +# parameters for engines with paging support +# +# number of results on each page +# (only needed if the site requires not a page number, but an offset) +page_size = 1 +# number of the first page (usually 0 or 1) +first_page_num = 1 + ''' if xpath_results is list, extract the text from each result and concat the list @@ -43,7 +51,7 @@ def extract_url(xpath_results, search_url): if url.startswith('//'): # add http or https to this kind of url //example.com/ parsed_search_url = urlparse(search_url) - url = parsed_search_url.scheme+url + url = parsed_search_url.scheme + url elif url.startswith('/'): # fix relative url to the search engine url = urljoin(search_url, url) @@ -69,15 +77,21 @@ def normalize_url(url): p = parsed_url.path mark = p.find('/**') if mark != -1: - return unquote(p[mark+3:]).decode('utf-8') + return unquote(p[mark + 3:]).decode('utf-8') return url def request(query, params): query = urlencode({'q': query})[2:] - params['url'] = search_url.format(query=query) + + fp = {'query': query} + if paging and search_url.find('{pageno}') >= 0: + fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size + + params['url'] = search_url.format(**fp) params['query'] = query + return params diff --git a/sources/searx/engines/yandex.py b/sources/searx/engines/yandex.py index edc6ad5..be3ec36 100644 --- a/sources/searx/engines/yandex.py +++ b/sources/searx/engines/yandex.py @@ -9,6 +9,7 @@ @parse url, title, content """ +from cgi import escape from urllib import urlencode from lxml import html from searx.search import logger @@ -38,7 +39,7 @@ content_xpath = './/div[@class="serp-item__text"]//text()' def request(query, params): lang = params['language'].split('_')[0] host = base_url.format(tld=language_map.get(lang) or default_tld) - params['url'] = host + search_url.format(page=params['pageno']-1, + params['url'] = host + search_url.format(page=params['pageno'] - 1, query=urlencode({'text': query})) return params @@ -51,8 +52,8 @@ def response(resp): for result in dom.xpath(results_xpath): try: res = {'url': result.xpath(url_xpath)[0], - 'title': ''.join(result.xpath(title_xpath)), - 'content': ''.join(result.xpath(content_xpath))} + 'title': escape(''.join(result.xpath(title_xpath))), + 'content': escape(''.join(result.xpath(content_xpath)))} except: logger.exception('yandex parse crash') continue diff --git a/sources/searx/languages.py b/sources/searx/languages.py index df5fabf..70459a5 100644 --- a/sources/searx/languages.py +++ b/sources/searx/languages.py @@ -20,10 +20,10 @@ language_codes = ( ("ar_XA", "Arabic", "Arabia"), ("bg_BG", "Bulgarian", "Bulgaria"), ("cs_CZ", "Czech", "Czech Republic"), - ("de_DE", "German", "Germany"), ("da_DK", "Danish", "Denmark"), ("de_AT", "German", "Austria"), ("de_CH", "German", "Switzerland"), + ("de_DE", "German", "Germany"), ("el_GR", "Greek", "Greece"), ("en_AU", "English", "Australia"), ("en_CA", "English", "Canada"), @@ -61,6 +61,7 @@ language_codes = ( ("nb_NO", "Norwegian", "Norway"), ("nl_BE", "Dutch", "Belgium"), ("nl_NL", "Dutch", "Netherlands"), + ("oc_OC", "Occitan", "Occitan"), ("pl_PL", "Polish", "Poland"), ("pt_BR", "Portuguese", "Brazil"), ("pt_PT", "Portuguese", "Portugal"), diff --git a/sources/searx/plugins/__init__.py b/sources/searx/plugins/__init__.py index a4d7ad8..87cc013 100644 --- a/sources/searx/plugins/__init__.py +++ b/sources/searx/plugins/__init__.py @@ -20,6 +20,7 @@ from searx import logger logger = logger.getChild('plugins') from searx.plugins import (https_rewrite, + open_results_on_new_tab, self_info, search_on_category_select, tracker_url_remover) @@ -72,6 +73,7 @@ class PluginStore(): plugins = PluginStore() plugins.register(https_rewrite) +plugins.register(open_results_on_new_tab) plugins.register(self_info) plugins.register(search_on_category_select) plugins.register(tracker_url_remover) diff --git a/sources/searx/plugins/https_rewrite.py b/sources/searx/plugins/https_rewrite.py index a24f15a..0a58cc8 100644 --- a/sources/searx/plugins/https_rewrite.py +++ b/sources/searx/plugins/https_rewrite.py @@ -103,10 +103,10 @@ def load_single_https_ruleset(rules_path): # into a valid python regex group rule_from = ruleset.attrib['from'].replace('$', '\\') if rule_from.endswith('\\'): - rule_from = rule_from[:-1]+'$' + rule_from = rule_from[:-1] + '$' rule_to = ruleset.attrib['to'].replace('$', '\\') if rule_to.endswith('\\'): - rule_to = rule_to[:-1]+'$' + rule_to = rule_to[:-1] + '$' # TODO, not working yet because of the hack above, # currently doing that in webapp.py diff --git a/sources/searx/plugins/open_results_on_new_tab.py b/sources/searx/plugins/open_results_on_new_tab.py new file mode 100644 index 0000000..5ebece1 --- /dev/null +++ b/sources/searx/plugins/open_results_on_new_tab.py @@ -0,0 +1,24 @@ +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2016 by Adam Tauber, +''' +from flask.ext.babel import gettext +name = gettext('Open result links on new browser tabs') +description = gettext('Results are opened in the same window by default. ' + 'This plugin overwrites the default behaviour to open links on new tabs/windows. ' + '(JavaScript required)') +default_on = False + +js_dependencies = ('plugins/js/open_results_on_new_tab.js',) diff --git a/sources/searx/plugins/search_on_category_select.py b/sources/searx/plugins/search_on_category_select.py index a166702..53585fa 100644 --- a/sources/searx/plugins/search_on_category_select.py +++ b/sources/searx/plugins/search_on_category_select.py @@ -20,4 +20,4 @@ description = gettext('Perform search immediately if a category selected. ' 'Disable to select multiple categories. (JavaScript required)') default_on = True -js_dependencies = ('js/search_on_category_select.js',) +js_dependencies = ('plugins/js/search_on_category_select.js',) diff --git a/sources/searx/poolrequests.py b/sources/searx/poolrequests.py index 4761f6a..13c6a90 100644 --- a/sources/searx/poolrequests.py +++ b/sources/searx/poolrequests.py @@ -92,7 +92,7 @@ def head(url, **kwargs): return request('head', url, **kwargs) -def post(url, data=None, **kwargs): +def post(url, data=None, **kwargs): return request('post', url, data=data, **kwargs) diff --git a/sources/searx/preferences.py b/sources/searx/preferences.py new file mode 100644 index 0000000..ad9e673 --- /dev/null +++ b/sources/searx/preferences.py @@ -0,0 +1,271 @@ +from searx import settings, autocomplete +from searx.languages import language_codes as languages + + +COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years +LANGUAGE_CODES = [l[0] for l in languages] +LANGUAGE_CODES.append('all') +DISABLED = 0 +ENABLED = 1 + + +class MissingArgumentException(Exception): + pass + + +class ValidationException(Exception): + pass + + +class Setting(object): + """Base class of user settings""" + + def __init__(self, default_value, **kwargs): + super(Setting, self).__init__() + self.value = default_value + for key, value in kwargs.iteritems(): + setattr(self, key, value) + + self._post_init() + + def _post_init(self): + pass + + def parse(self, data): + self.value = data + + def get_value(self): + return self.value + + def save(self, name, resp): + resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE) + + +class StringSetting(Setting): + """Setting of plain string values""" + pass + + +class EnumStringSetting(Setting): + """Setting of a value which can only come from the given choices""" + + def _post_init(self): + if not hasattr(self, 'choices'): + raise MissingArgumentException('Missing argument: choices') + + if self.value != '' and self.value not in self.choices: + raise ValidationException('Invalid default value: {0}'.format(self.value)) + + def parse(self, data): + if data not in self.choices and data != self.value: + raise ValidationException('Invalid choice: {0}'.format(data)) + self.value = data + + +class MultipleChoiceSetting(EnumStringSetting): + """Setting of values which can only come from the given choices""" + + def _post_init(self): + if not hasattr(self, 'choices'): + raise MissingArgumentException('Missing argument: choices') + for item in self.value: + if item not in self.choices: + raise ValidationException('Invalid default value: {0}'.format(self.value)) + + def parse(self, data): + if data == '': + self.value = [] + return + + elements = data.split(',') + for item in elements: + if item not in self.choices: + raise ValidationException('Invalid choice: {0}'.format(item)) + self.value = elements + + def parse_form(self, data): + self.value = [] + for choice in data: + if choice in self.choices and choice not in self.value: + self.value.append(choice) + + def save(self, name, resp): + resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) + + +class MapSetting(Setting): + """Setting of a value that has to be translated in order to be storable""" + + def _post_init(self): + if not hasattr(self, 'map'): + raise MissingArgumentException('missing argument: map') + if self.value not in self.map.values(): + raise ValidationException('Invalid default value') + + def parse(self, data): + if data not in self.map: + raise ValidationException('Invalid choice: {0}'.format(data)) + self.value = self.map[data] + self.key = data + + def save(self, name, resp): + resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE) + + +class SwitchableSetting(Setting): + """ Base class for settings that can be turned on && off""" + + def _post_init(self): + self.disabled = set() + self.enabled = set() + if not hasattr(self, 'choices'): + raise MissingArgumentException('missing argument: choices') + + def transform_form_items(self, items): + return items + + def transform_values(self, values): + return values + + def parse_cookie(self, data): + if data[DISABLED] != '': + self.disabled = set(data[DISABLED].split(',')) + if data[ENABLED] != '': + self.enabled = set(data[ENABLED].split(',')) + + def parse_form(self, items): + items = self.transform_form_items(items) + + self.disabled = set() + self.enabled = set() + for choice in self.choices: + if choice['default_on']: + if choice['id'] in items: + self.disabled.add(choice['id']) + else: + if choice['id'] not in items: + self.enabled.add(choice['id']) + + def save(self, resp): + resp.set_cookie('disabled_{0}'.format(self.value), ','.join(self.disabled), max_age=COOKIE_MAX_AGE) + resp.set_cookie('enabled_{0}'.format(self.value), ','.join(self.enabled), max_age=COOKIE_MAX_AGE) + + def get_disabled(self): + disabled = self.disabled + for choice in self.choices: + if not choice['default_on'] and choice['id'] not in self.enabled: + disabled.add(choice['id']) + return self.transform_values(disabled) + + def get_enabled(self): + enabled = self.enabled + for choice in self.choices: + if choice['default_on'] and choice['id'] not in self.disabled: + enabled.add(choice['id']) + return self.transform_values(enabled) + + +class EnginesSetting(SwitchableSetting): + def _post_init(self): + super(EnginesSetting, self)._post_init() + transformed_choices = [] + for engine_name, engine in self.choices.iteritems(): + for category in engine.categories: + transformed_choice = dict() + transformed_choice['default_on'] = not engine.disabled + transformed_choice['id'] = '{}__{}'.format(engine_name, category) + transformed_choices.append(transformed_choice) + self.choices = transformed_choices + + def transform_form_items(self, items): + return [item[len('engine_'):].replace('_', ' ').replace(' ', '__') for item in items] + + def transform_values(self, values): + if len(values) == 1 and values[0] == '': + return list() + transformed_values = [] + for value in values: + engine, category = value.split('__') + transformed_values.append((engine, category)) + return transformed_values + + +class PluginsSetting(SwitchableSetting): + def _post_init(self): + super(PluginsSetting, self)._post_init() + transformed_choices = [] + for plugin in self.choices: + transformed_choice = dict() + transformed_choice['default_on'] = plugin.default_on + transformed_choice['id'] = plugin.id + transformed_choices.append(transformed_choice) + self.choices = transformed_choices + + def transform_form_items(self, items): + return [item[len('plugin_'):] for item in items] + + +class Preferences(object): + """Stores, validates and saves preferences to cookies""" + + def __init__(self, themes, categories, engines, plugins): + super(Preferences, self).__init__() + + self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories), + 'language': EnumStringSetting('all', choices=LANGUAGE_CODES), + 'locale': EnumStringSetting(settings['ui']['default_locale'], + choices=settings['locales'].keys()), + 'autocomplete': EnumStringSetting(settings['search']['autocomplete'], + choices=autocomplete.backends.keys()), + 'image_proxy': MapSetting(settings['server']['image_proxy'], + map={'': settings['server']['image_proxy'], + '0': False, + '1': True}), + 'method': EnumStringSetting('POST', choices=('GET', 'POST')), + 'safesearch': MapSetting(settings['search']['safe_search'], map={'0': 0, + '1': 1, + '2': 2}), + 'theme': EnumStringSetting(settings['ui']['default_theme'], choices=themes)} + + self.engines = EnginesSetting('engines', choices=engines) + self.plugins = PluginsSetting('plugins', choices=plugins) + + def parse_cookies(self, input_data): + for user_setting_name, user_setting in input_data.iteritems(): + if user_setting_name in self.key_value_settings: + self.key_value_settings[user_setting_name].parse(user_setting) + elif user_setting_name == 'disabled_engines': + self.engines.parse_cookie((input_data.get('disabled_engines', ''), + input_data.get('enabled_engines', ''))) + elif user_setting_name == 'disabled_plugins': + self.plugins.parse_cookie((input_data.get('disabled_plugins', ''), + input_data.get('enabled_plugins', ''))) + + def parse_form(self, input_data): + disabled_engines = [] + enabled_categories = [] + disabled_plugins = [] + for user_setting_name, user_setting in input_data.iteritems(): + if user_setting_name in self.key_value_settings: + self.key_value_settings[user_setting_name].parse(user_setting) + elif user_setting_name.startswith('engine_'): + disabled_engines.append(user_setting_name) + elif user_setting_name.startswith('category_'): + enabled_categories.append(user_setting_name[len('category_'):]) + elif user_setting_name.startswith('plugin_'): + disabled_plugins.append(user_setting_name) + self.key_value_settings['categories'].parse_form(enabled_categories) + self.engines.parse_form(disabled_engines) + self.plugins.parse_form(disabled_plugins) + + # cannot be used in case of engines or plugins + def get_value(self, user_setting_name): + if user_setting_name in self.key_value_settings: + return self.key_value_settings[user_setting_name].get_value() + + def save(self, resp): + for user_setting_name, user_setting in self.key_value_settings.iteritems(): + user_setting.save(user_setting_name, resp) + self.engines.save(resp) + self.plugins.save(resp) + return resp diff --git a/sources/searx/query.py b/sources/searx/query.py index e79e760..3d617ab 100644 --- a/sources/searx/query.py +++ b/sources/searx/query.py @@ -28,12 +28,12 @@ import re class Query(object): """parse query""" - def __init__(self, query, blocked_engines): + def __init__(self, query, disabled_engines): self.query = query - self.blocked_engines = [] + self.disabled_engines = [] - if blocked_engines: - self.blocked_engines = blocked_engines + if disabled_engines: + self.disabled_engines = disabled_engines self.query_parts = [] self.engines = [] @@ -107,7 +107,7 @@ class Query(object): self.engines.extend({'category': prefix, 'name': engine.name} for engine in categories[prefix] - if (engine.name, prefix) not in self.blocked_engines) + if (engine.name, prefix) not in self.disabled_engines) if query_part[0] == '!': self.specific = True diff --git a/sources/searx/results.py b/sources/searx/results.py index bc656f2..c3040b3 100644 --- a/sources/searx/results.py +++ b/sources/searx/results.py @@ -37,7 +37,7 @@ def merge_two_infoboxes(infobox1, infobox2): urls1 = infobox1.get('urls', None) if urls1 is None: urls1 = [] - infobox1.set('urls', urls1) + infobox1['urls'] = urls1 urlSet = set() for url in infobox1.get('urls', []): @@ -47,11 +47,17 @@ def merge_two_infoboxes(infobox1, infobox2): if url.get('url', None) not in urlSet: urls1.append(url) + if 'img_src' in infobox2: + img1 = infobox1.get('img_src', None) + img2 = infobox2.get('img_src') + if img1 is None: + infobox1['img_src'] = img2 + if 'attributes' in infobox2: attributes1 = infobox1.get('attributes', None) if attributes1 is None: attributes1 = [] - infobox1.set('attributes', attributes1) + infobox1['attributes'] = attributes1 attributeSet = set() for attribute in infobox1.get('attributes', []): @@ -68,7 +74,7 @@ def merge_two_infoboxes(infobox1, infobox2): if result_content_len(content2) > result_content_len(content1): infobox1['content'] = content2 else: - infobox1.set('content', content2) + infobox1['content'] = content2 def result_score(result): @@ -116,6 +122,10 @@ class ResultContainer(object): self.results[engine_name].extend(results) for i, result in enumerate(results): + try: + result['url'] = result['url'].decode('utf-8') + except: + pass position = i + 1 self._merge_result(result, position) @@ -138,6 +148,7 @@ class ResultContainer(object): # if the result has no scheme, use http as default if not result['parsed_url'].scheme: result['parsed_url'] = result['parsed_url']._replace(scheme="http") + result['url'] = result['parsed_url'].geturl() result['host'] = result['parsed_url'].netloc diff --git a/sources/searx/search.py b/sources/searx/search.py index 655b780..a408016 100644 --- a/sources/searx/search.py +++ b/sources/searx/search.py @@ -23,7 +23,7 @@ from searx.engines import ( categories, engines ) from searx.languages import language_codes -from searx.utils import gen_useragent, get_blocked_engines +from searx.utils import gen_useragent from searx.query import Query from searx.results import ResultContainer from searx import logger @@ -34,16 +34,23 @@ number_of_searches = 0 def search_request_wrapper(fn, url, engine_name, **kwargs): + ret = None + engine = engines[engine_name] try: - return fn(url, **kwargs) + ret = fn(url, **kwargs) + with threading.RLock(): + engine.continuous_errors = 0 + engine.suspend_end_time = 0 except: # increase errors stats with threading.RLock(): - engines[engine_name].stats['errors'] += 1 + engine.stats['errors'] += 1 + engine.continuous_errors += 1 + engine.suspend_end_time = time() + min(60, engine.continuous_errors) # print engine name and specific error message logger.exception('engine crash: {0}'.format(engine_name)) - return + return ret def threaded_requests(requests): @@ -133,15 +140,13 @@ class Search(object): self.lang = 'all' # set blocked engines - self.blocked_engines = get_blocked_engines(engines, request.cookies) + self.disabled_engines = request.preferences.engines.get_disabled() self.result_container = ResultContainer() self.request_data = {} # set specific language if set - if request.cookies.get('language')\ - and request.cookies['language'] in (x[0] for x in language_codes): - self.lang = request.cookies['language'] + self.lang = request.preferences.get_value('language') # set request method if request.method == 'POST': @@ -162,7 +167,7 @@ class Search(object): # parse query, if tags are set, which change # the serch engine or search-language - query_obj = Query(self.request_data['q'], self.blocked_engines) + query_obj = Query(self.request_data['q'], self.disabled_engines) query_obj.parse_query() # set query @@ -222,8 +227,7 @@ class Search(object): # using user-defined default-configuration which # (is stored in cookie) if not self.categories: - cookie_categories = request.cookies.get('categories', '') - cookie_categories = cookie_categories.split(',') + cookie_categories = request.preferences.get_value('categories') for ccateg in cookie_categories: if ccateg in categories: self.categories.append(ccateg) @@ -239,7 +243,11 @@ class Search(object): self.engines.extend({'category': categ, 'name': engine.name} for engine in categories[categ] - if (engine.name, categ) not in self.blocked_engines) + if (engine.name, categ) not in self.disabled_engines) + + # remove suspended engines + self.engines = [e for e in self.engines + if engines[e['name']].suspend_end_time <= time()] # do search-request def search(self, request): @@ -283,11 +291,8 @@ class Search(object): else: request_params['language'] = self.lang - try: - # 0 = None, 1 = Moderate, 2 = Strict - request_params['safesearch'] = int(request.cookies.get('safesearch')) - except Exception: - request_params['safesearch'] = settings['search']['safe_search'] + # 0 = None, 1 = Moderate, 2 = Strict + request_params['safesearch'] = request.preferences.get_value('safesearch') # update request parameters dependent on # search-engine (contained in engines folder) diff --git a/sources/searx/settings.yml b/sources/searx/settings.yml index c7f659e..ff85684 100644 --- a/sources/searx/settings.yml +++ b/sources/searx/settings.yml @@ -1,5 +1,6 @@ general: debug : False # Debug mode, only for development + instance_name : "searx" # displayed name search: safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict @@ -33,11 +34,18 @@ outgoing: # communication with search engines # - 1.1.1.2 engines: + - name : arch linux wiki + engine : archlinux + shortcut : al + + - name : base + engine : base + shortcut : bs + - name : wikipedia - engine : mediawiki + engine : wikipedia shortcut : wp base_url : 'https://{language}.wikipedia.org/' - number_of_results : 1 - name : bing engine : bing @@ -51,6 +59,18 @@ engines: engine : bing_news shortcut : bin + - name : bitbucket + engine : xpath + paging : True + search_url : https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath : //article[@class="repo-summary"]/p + categories : it + timeout : 4.0 + disabled : True + shortcut : bb + - name : btdigg engine : btdigg shortcut : bt @@ -72,11 +92,25 @@ engines: - name : ddg definitions engine : duckduckgo_definitions shortcut : ddd + disabled : True - name : digg engine : digg shortcut : dg + - name : erowid + engine : xpath + paging : True + first_page_num : 0 + page_size : 30 + search_url : https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath : //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath : //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath : //dl[@class="results-list"]/dd[@class="result-details"] + categories : general + shortcut : ew + disabled : True + - name : wikidata engine : wikidata shortcut : wd @@ -100,6 +134,11 @@ engines: shortcut : 1x disabled : True + - name : fdroid + engine : fdroid + shortcut : fd + disabled : True + - name : flickr categories : images shortcut : fl @@ -110,11 +149,28 @@ engines: # Or you can use the html non-stable engine, activated by default engine : flickr_noapi + - name : frinkiac + engine : frinkiac + shortcut : frk + disabled : True + - name : gigablast engine : gigablast shortcut : gb disabled: True + - name : gitlab + engine : xpath + paging : True + search_url : https://gitlab.com/search?page={pageno}&search={query} + url_xpath : //li[@class="project-row"]//a[@class="project"]/@href + title_xpath : //li[@class="project-row"]//span[contains(@class, "project-full-name")] + content_xpath : //li[@class="project-row"]//div[@class="description"]/p + categories : it + shortcut : gl + timeout : 5.0 + disabled : True + - name : github engine : github shortcut : gh @@ -161,10 +217,39 @@ engines: shortcut : gps disabled : True + - name : geektimes + engine : xpath + paging : True + search_url : https://geektimes.ru/search/page{pageno}/?q={query} + url_xpath : //div[@class="search_results"]//a[@class="post_title"]/@href + title_xpath : //div[@class="search_results"]//a[@class="post_title"] + content_xpath : //div[@class="search_results"]//div[contains(@class, "content")] + categories : it + timeout : 4.0 + disabled : True + shortcut : gt + + - name : habrahabr + engine : xpath + paging : True + search_url : https://habrahabr.ru/search/page{pageno}/?q={query} + url_xpath : //div[@class="search_results"]//a[@class="post_title"]/@href + title_xpath : //div[@class="search_results"]//a[@class="post_title"] + content_xpath : //div[@class="search_results"]//div[contains(@class, "content")] + categories : it + timeout : 4.0 + disabled : True + shortcut : habr + - name : mixcloud engine : mixcloud shortcut : mc + - name : nyaa + engine : nyaa + shortcut : nt + disabled : True + - name : openstreetmap engine : openstreetmap shortcut : osm @@ -199,6 +284,13 @@ engines: shortcut : qws categories : social media + - name : reddit + engine : reddit + shortcut : re + page_size : 25 + timeout : 10.0 + disabled : True + - name : kickass engine : kickass shortcut : ka @@ -250,6 +342,17 @@ engines: shortcut : sw disabled : True + - name : tokyotoshokan + engine : tokyotoshokan + shortcut : tt + timeout : 6.0 + disabled : True + + - name : torrentz + engine : torrentz + timeout : 5.0 + shortcut : to + - name : twitter engine : twitter shortcut : tw @@ -300,6 +403,16 @@ engines: engine : vimeo shortcut : vm + - name : wolframalpha + shortcut : wa + # You can use the engine using the official stable API, but you need an API key + # See : http://products.wolframalpha.com/api/ + # engine : wolframalpha_api + # api_key: '' # required! + engine : wolframalpha_noapi + timeout: 6.0 + categories : science + #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images # engine : blekko_images @@ -313,18 +426,29 @@ engines: # number_of_results : 5 # timeout : 3.0 +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name : ubuntuwiki +# engine : doku +# shortcut : uw +# base_url : 'http://doc.ubuntu-fr.org' + locales: en : English - de : Deutsch - he : עברית - hu : Magyar - fr : Français - es : Español - it : Italiano - nl : Nederlands + bg : Български (Bulgarian) + de : Deutsch (German) + el_GR : Ελληνικά (Greek_Greece) + eo : Esperanto (Esperanto) + es : Español (Spanish) + fr : Français (French) + he : עברית (Hebrew) + hu : Magyar (Hungarian) + it : Italiano (Italian) ja : 日本語 (Japanese) - tr : Türkçe - pt : Português - ru : Russian - ro : Romanian + nl : Nederlands (Dutch) + pt : Português (Portuguese) + pt_BR : Português (Portuguese_Brazil) + ro : Română (Romanian) + ru : Русский (Russian) + tr : Türkçe (Turkish) zh : 中文 (Chinese) diff --git a/sources/searx/settings_robot.yml b/sources/searx/settings_robot.yml index f14443c..7c7c4ee 100644 --- a/sources/searx/settings_robot.yml +++ b/sources/searx/settings_robot.yml @@ -1,9 +1,10 @@ general: debug : False + instance_name : "searx_test" search: safe_search : 0 - autocomplete : 0 + autocomplete : "" server: port : 11111 @@ -25,10 +26,12 @@ engines: - name : general_dummy engine : dummy categories : general + shortcut : gd - name : dummy_dummy engine : dummy categories : dummy + shortcut : dd locales: en : English diff --git a/sources/searx/static/plugins/js/open_results_on_new_tab.js b/sources/searx/static/plugins/js/open_results_on_new_tab.js new file mode 100644 index 0000000..99ef382 --- /dev/null +++ b/sources/searx/static/plugins/js/open_results_on_new_tab.js @@ -0,0 +1,3 @@ +$(document).ready(function() { + $('.result_header > a').attr('target', '_blank'); +}); diff --git a/sources/searx/static/js/search_on_category_select.js b/sources/searx/static/plugins/js/search_on_category_select.js similarity index 100% rename from sources/searx/static/js/search_on_category_select.js rename to sources/searx/static/plugins/js/search_on_category_select.js diff --git a/sources/searx/static/themes/default/css/style.css b/sources/searx/static/themes/default/css/style.css index 5be452e..71422bc 100644 --- a/sources/searx/static/themes/default/css/style.css +++ b/sources/searx/static/themes/default/css/style.css @@ -1 +1 @@ -.highlight .hll{background-color:#ffc}.highlight{background:#f8f8f8}.highlight .c{color:#408080;font-style:italic}.highlight .err{border:1px solid #f00}.highlight .k{color:#008000;font-weight:bold}.highlight .o{color:#666}.highlight .cm{color:#408080;font-style:italic}.highlight .cp{color:#bc7a00}.highlight .c1{color:#408080;font-style:italic}.highlight .cs{color:#408080;font-style:italic}.highlight .gd{color:#a00000}.highlight .ge{font-style:italic}.highlight .gr{color:#f00}.highlight .gh{color:#000080;font-weight:bold}.highlight .gi{color:#00a000}.highlight .go{color:#888}.highlight .gp{color:#000080;font-weight:bold}.highlight .gs{font-weight:bold}.highlight .gu{color:#800080;font-weight:bold}.highlight .gt{color:#04d}.highlight .kc{color:#008000;font-weight:bold}.highlight .kd{color:#008000;font-weight:bold}.highlight .kn{color:#008000;font-weight:bold}.highlight .kp{color:#008000}.highlight .kr{color:#008000;font-weight:bold}.highlight .kt{color:#b00040}.highlight .m{color:#666}.highlight .s{color:#ba2121}.highlight .na{color:#7d9029}.highlight .nb{color:#008000}.highlight .nc{color:#00f;font-weight:bold}.highlight .no{color:#800}.highlight .nd{color:#a2f}.highlight .ni{color:#999;font-weight:bold}.highlight .ne{color:#d2413a;font-weight:bold}.highlight .nf{color:#00f}.highlight .nl{color:#a0a000}.highlight .nn{color:#00f;font-weight:bold}.highlight .nt{color:#008000;font-weight:bold}.highlight .nv{color:#19177c}.highlight .ow{color:#a2f;font-weight:bold}.highlight .w{color:#bbb}.highlight .mf{color:#666}.highlight .mh{color:#666}.highlight .mi{color:#666}.highlight .mo{color:#666}.highlight .sb{color:#ba2121}.highlight .sc{color:#ba2121}.highlight .sd{color:#ba2121;font-style:italic}.highlight .s2{color:#ba2121}.highlight .se{color:#b62;font-weight:bold}.highlight .sh{color:#ba2121}.highlight .si{color:#b68;font-weight:bold}.highlight .sx{color:#008000}.highlight .sr{color:#b68}.highlight .s1{color:#ba2121}.highlight .ss{color:#19177c}.highlight .bp{color:#008000}.highlight .vc{color:#19177c}.highlight .vg{color:#19177c}.highlight .vi{color:#19177c}.highlight .il{color:#666}.highlight pre{overflow:auto}.highlight .lineno{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;cursor:default}.highlight .lineno::selection{background:transparent}.highlight .lineno::-moz-selection{background:transparent}html{font-family:sans-serif;font-size:.9em;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;-moz-text-size-adjust:100%;text-size-adjust:100%;color:#444;padding:0;margin:0}body,#container{padding:0;margin:0}#container{width:100%;position:absolute;top:0}.search{padding:0;margin:0}.search .checkbox_container label{font-size:.9em;border-bottom:2px solid #e8e7e6}.search .checkbox_container label:hover{border-bottom:2px solid #3498db}.search .checkbox_container input[type="checkbox"]:checked+label{border-bottom:2px solid #2980b9}#search_wrapper{position:relative;width:50em;padding:10px}.center #search_wrapper{margin-left:auto;margin-right:auto}.q{background:none repeat scroll 0 0 #fff;border:1px solid #3498db;color:#222;font-size:16px;height:28px;margin:0;outline:medium none;padding:2px;padding-left:8px;padding-right:0 !important;width:100%;z-index:2}#search_submit{position:absolute;top:13px;right:1px;padding:0;border:0;background:url('../img/search-icon.png') no-repeat;background-size:24px 24px;opacity:.8;width:24px;height:30px;font-size:0}@media screen and (max-width:50em){#search_wrapper{width:90%;clear:both;overflow:hidden}}ul.autocompleter-choices{position:absolute;margin:0;padding:0;list-style:none;border:1px solid #3498db;border-left-color:#3498db;border-right-color:#3498db;border-bottom-color:#3498db;text-align:left;font-family:Verdana,Geneva,Arial,Helvetica,sans-serif;z-index:50;background-color:#fff;color:#444}ul.autocompleter-choices li{position:relative;margin:-2px 0 0 0;padding:.2em 1.5em .2em 1em;display:block;float:none !important;cursor:pointer;font-weight:normal;white-space:nowrap;font-size:1em;line-height:1.5em}ul.autocompleter-choices li.autocompleter-selected{background-color:#444;color:#fff}ul.autocompleter-choices li.autocompleter-selected span.autocompleter-queried{color:#9fcfff}ul.autocompleter-choices span.autocompleter-queried{display:inline;float:none;font-weight:bold;margin:0;padding:0}.row{max-width:800px;margin:20px auto;text-align:justify}.row h1{font-size:3em;margin-top:50px}.row p{padding:0 10px;max-width:700px}.row h3,.row ul{margin:4px 8px}.hmarg{margin:0 20px;border:1px solid #3498db;padding:4px 10px}a:link.hmarg{color:#3498db}a:visited.hmarg{color:#3498db}a:active.hmarg{color:#3498db}a:hover.hmarg{color:#3498db}.top_margin{margin-top:60px}.center{text-align:center}h1{font-size:5em}div.title{background:url('../img/searx.png') no-repeat;width:100%;min-height:80px;background-position:center}div.title h1{visibility:hidden}input[type="submit"]{padding:2px 6px;margin:2px 4px;display:inline-block;background:#3498db;color:#fff;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;border:0;cursor:pointer}input[type="checkbox"]{visibility:hidden}fieldset{margin:8px;border:1px solid #3498db}#categories{margin:0 10px;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.checkbox_container{display:inline-block;position:relative;margin:0 3px;padding:0}.checkbox_container input{display:none}.checkbox_container label,.engine_checkbox label{cursor:pointer;padding:4px 10px;margin:0;display:block;text-transform:capitalize;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.checkbox_container input[type="checkbox"]:checked+label{background:#3498db;color:#fff}.engine_checkbox{padding:4px}label.allow{background:#e74c3c;padding:4px 8px;color:#fff;display:none}label.deny{background:#2ecc71;padding:4px 8px;color:#444;display:inline}.engine_checkbox input[type="checkbox"]:checked+label:nth-child(2)+label{display:none}.engine_checkbox input[type="checkbox"]:checked+label.allow{display:inline}a{text-decoration:none;color:#1a11be}a:visited{color:#8e44ad}.result{margin:19px 0 18px 0;padding:0;clear:both}.result_title{margin-bottom:0}.result_title a{color:#2980b9;font-weight:normal;font-size:1.1em}.result_title a:hover{text-decoration:underline}.result_title a:visited{color:#8e44ad}.cache_link{font-size:10px !important}.result h3{font-size:1em;word-wrap:break-word;margin:5px 0 1px 0;padding:0}.result .content{font-size:.8em;margin:0;padding:0;max-width:54em;word-wrap:break-word;line-height:1.24}.result .content img{float:left;margin-right:5px;max-width:200px;max-height:100px}.result .content br.last{clear:both}.result .url{font-size:.8em;margin:0 0 3px 0;padding:0;max-width:54em;word-wrap:break-word;color:#c0392b}.result .published_date{font-size:.8em;color:#888;Margin:5px 20px}.result .thumbnail{width:400px}.engines{color:#888}.small_font{font-size:.8em}.small p{margin:2px 0}.right{float:right}.invisible{display:none}.left{float:left}.highlight{color:#094089}.content .highlight{color:#000}.image_result{display:inline-block;margin:10px 10px;position:relative;max-height:160px}.image_result img{border:0;max-height:160px}.image_result p{margin:0;padding:0}.image_result p span a{display:none;color:#fff}.image_result p:hover span a{display:block;position:absolute;bottom:0;right:0;padding:4px;background-color:rgba(0,0,0,0.6);font-size:.7em}.torrent_result{border-left:10px solid lightgray;padding-left:3px}.torrent_result p{margin:3px;font-size:.8em}.torrent_result a{color:#2980b9}.torrent_result a:hover{text-decoration:underline}.torrent_result a:visited{color:#8e44ad}.definition_result{border-left:10px solid gray;padding-left:3px}.percentage{position:relative;width:300px}.percentage div{background:#444}table{width:100%}td{padding:0 4px}tr:hover{background:#ddd}#results{margin:auto;padding:0;width:50em;margin-bottom:20px}#sidebar{position:fixed;bottom:10px;left:10px;margin:0 2px 5px 5px;padding:0 2px 2px 2px;width:14em}#sidebar input{padding:0;margin:3px;font-size:.8em;display:inline-block;background:transparent;color:#444;cursor:pointer}#sidebar input[type="submit"]{text-decoration:underline}#suggestions form{display:inline}#suggestions,#answers{margin-top:20px;max-width:45em}#suggestions input,#answers input,#infoboxes input{padding:0;margin:3px;font-size:.8em;display:inline-block;background:transparent;color:#444;cursor:pointer}#suggestions input[type="submit"],#answers input[type="submit"],#infoboxes input[type="submit"]{text-decoration:underline}#suggestions-title{color:#888}#answers{border:2px solid #2980b9;padding:20px}#answers form,#infoboxes form{min-width:210px}#infoboxes{position:absolute;top:100px;right:20px;margin:0 2px 5px 5px;padding:0 2px 2px;max-width:21em}#infoboxes .infobox{margin:10px 0 10px;border:1px solid #ddd;padding:5px;font-size:.8em}#infoboxes .infobox img{max-width:20em;max-heigt:12em;display:block;margin:5px;padding:5px}#infoboxes .infobox h2{margin:0}#infoboxes .infobox table{width:auto}#infoboxes .infobox table td{vertical-align:top}#infoboxes .infobox input{font-size:1em}#infoboxes .infobox br{clear:both}#search_url{margin-top:8px}#search_url input{border:1px solid #888;padding:4px;color:#444;width:14em;display:block;margin:4px;font-size:.8em}#preferences{top:10px;padding:0;border:0;background:url('../img/preference-icon.png') no-repeat;background-size:28px 28px;opacity:.8;width:28px;height:30px;display:block}#preferences *{display:none}#pagination{clear:both}#pagination br{clear:both}#apis{margin-top:8px;clear:both}#categories_container{position:relative}@media screen and (max-width:50em){#results{margin:auto;padding:0;width:90%}.github{display:none}.checkbox_container{display:block;width:90%}.checkbox_container label{border-bottom:0}.preferences_container{display:none;postion:fixed !important;top:100px;right:0}}@media screen and (max-width:75em){div.title h1{font-size:1em}html.touch #categories{width:95%;height:30px;text-align:left;overflow-x:scroll;overflow-y:hidden;-webkit-overflow-scrolling:touch}html.touch #categories #categories_container{width:1000px;width:-moz-max-content;width:-webkit-max-content;width:max-content}html.touch #categories #categories_container .checkbox_container{display:inline-block;width:auto}#categories{font-size:90%;clear:both}#categories .checkbox_container{margin-top:2px;margin:auto}#suggestions,#answers{margin-top:5px}#infoboxes{position:inherit;max-width:inherit}#infoboxes .infobox{clear:both}#infoboxes .infobox img{float:left;max-width:10em}#categories{font-size:90%;clear:both}#categories .checkbox_container{margin-top:2px;margin:auto}#sidebar{position:static;max-width:50em;margin:0 0 2px 0;padding:0;float:none;border:none;width:auto}#sidebar input{border:0}#apis{display:none}#search_url{display:none}.result{border-top:1px solid #e8e7e6;margin:8px 0 8px 0}.result .thumbnail{max-width:98%}.image_result{max-width:98%}.image_result img{max-width:98%}}.favicon{float:left;margin-right:4px;margin-top:2px}.preferences_back{background:none repeat scroll 0 0 #3498db;border:0 none;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;cursor:pointer;display:inline-block;margin:2px 4px;padding:4px 6px}.preferences_back a{color:#fff}.hidden{opacity:0;overflow:hidden;font-size:.8em;position:absolute;bottom:-20px;width:100%;text-position:center;background:white;transition:opacity 1s ease}#categories_container:hover .hidden{transition:opacity 1s ease;opacity:.8} \ No newline at end of file +.highlight .hll{background-color:#ffc}.highlight{background:#f8f8f8}.highlight .c{color:#408080;font-style:italic}.highlight .err{border:1px solid #f00}.highlight .k{color:#008000;font-weight:bold}.highlight .o{color:#666}.highlight .cm{color:#408080;font-style:italic}.highlight .cp{color:#bc7a00}.highlight .c1{color:#408080;font-style:italic}.highlight .cs{color:#408080;font-style:italic}.highlight .gd{color:#a00000}.highlight .ge{font-style:italic}.highlight .gr{color:#f00}.highlight .gh{color:#000080;font-weight:bold}.highlight .gi{color:#00a000}.highlight .go{color:#888}.highlight .gp{color:#000080;font-weight:bold}.highlight .gs{font-weight:bold}.highlight .gu{color:#800080;font-weight:bold}.highlight .gt{color:#04d}.highlight .kc{color:#008000;font-weight:bold}.highlight .kd{color:#008000;font-weight:bold}.highlight .kn{color:#008000;font-weight:bold}.highlight .kp{color:#008000}.highlight .kr{color:#008000;font-weight:bold}.highlight .kt{color:#b00040}.highlight .m{color:#666}.highlight .s{color:#ba2121}.highlight .na{color:#7d9029}.highlight .nb{color:#008000}.highlight .nc{color:#00f;font-weight:bold}.highlight .no{color:#800}.highlight .nd{color:#a2f}.highlight .ni{color:#999;font-weight:bold}.highlight .ne{color:#d2413a;font-weight:bold}.highlight .nf{color:#00f}.highlight .nl{color:#a0a000}.highlight .nn{color:#00f;font-weight:bold}.highlight .nt{color:#008000;font-weight:bold}.highlight .nv{color:#19177c}.highlight .ow{color:#a2f;font-weight:bold}.highlight .w{color:#bbb}.highlight .mf{color:#666}.highlight .mh{color:#666}.highlight .mi{color:#666}.highlight .mo{color:#666}.highlight .sb{color:#ba2121}.highlight .sc{color:#ba2121}.highlight .sd{color:#ba2121;font-style:italic}.highlight .s2{color:#ba2121}.highlight .se{color:#b62;font-weight:bold}.highlight .sh{color:#ba2121}.highlight .si{color:#b68;font-weight:bold}.highlight .sx{color:#008000}.highlight .sr{color:#b68}.highlight .s1{color:#ba2121}.highlight .ss{color:#19177c}.highlight .bp{color:#008000}.highlight .vc{color:#19177c}.highlight .vg{color:#19177c}.highlight .vi{color:#19177c}.highlight .il{color:#666}.highlight pre{overflow:auto}.highlight .lineno{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;cursor:default}.highlight .lineno::selection{background:transparent}.highlight .lineno::-moz-selection{background:transparent}html{font-family:sans-serif;font-size:.9em;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;-moz-text-size-adjust:100%;text-size-adjust:100%;color:#444;padding:0;margin:0}body,#container{padding:0;margin:0}#container{width:100%;position:absolute;top:0}.search{padding:0;margin:0}.search .checkbox_container label{font-size:.9em;border-bottom:2px solid #e8e7e6}.search .checkbox_container label:hover{border-bottom:2px solid #3498db}.search .checkbox_container input[type="checkbox"]:checked+label{border-bottom:2px solid #2980b9}#search_wrapper{position:relative;width:50em;padding:10px}.center #search_wrapper{margin-left:auto;margin-right:auto}.q{background:none repeat scroll 0 0 #fff;border:1px solid #3498db;color:#222;font-size:16px;height:28px;margin:0;outline:medium none;padding:2px;padding-left:8px;padding-right:0 !important;width:100%;z-index:2}#search_submit{position:absolute;top:13px;right:1px;padding:0;border:0;background:url('../img/search-icon.png') no-repeat;background-size:24px 24px;opacity:.8;width:24px;height:30px;font-size:0}@media screen and (max-width:50em){#search_wrapper{width:90%;clear:both;overflow:hidden}}ul.autocompleter-choices{position:absolute;margin:0;padding:0;list-style:none;border:1px solid #3498db;border-left-color:#3498db;border-right-color:#3498db;border-bottom-color:#3498db;text-align:left;font-family:Verdana,Geneva,Arial,Helvetica,sans-serif;z-index:50;background-color:#fff;color:#444}ul.autocompleter-choices li{position:relative;margin:-2px 0 0 0;padding:.2em 1.5em .2em 1em;display:block;float:none !important;cursor:pointer;font-weight:normal;white-space:nowrap;font-size:1em;line-height:1.5em}ul.autocompleter-choices li.autocompleter-selected{background-color:#444;color:#fff}ul.autocompleter-choices li.autocompleter-selected span.autocompleter-queried{color:#9fcfff}ul.autocompleter-choices span.autocompleter-queried{display:inline;float:none;font-weight:bold;margin:0;padding:0}.row{max-width:800px;margin:20px auto;text-align:justify}.row h1{font-size:3em;margin-top:50px}.row p{padding:0 10px;max-width:700px}.row h3,.row ul{margin:4px 8px}.hmarg{margin:0 20px;border:1px solid #3498db;padding:4px 10px}a:link.hmarg{color:#3498db}a:visited.hmarg{color:#3498db}a:active.hmarg{color:#3498db}a:hover.hmarg{color:#3498db}.top_margin{margin-top:60px}.center{text-align:center}h1{font-size:5em}div.title{background:url('../img/searx.png') no-repeat;width:100%;min-height:80px;background-position:center}div.title h1{visibility:hidden}input[type="submit"]{padding:2px 6px;margin:2px 4px;display:inline-block;background:#3498db;color:#fff;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;border:0;cursor:pointer}input[type="checkbox"]{visibility:hidden}fieldset{margin:8px;border:1px solid #3498db}#categories{margin:0 10px;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.checkbox_container{display:inline-block;position:relative;margin:0 3px;padding:0}.checkbox_container input{display:none}.checkbox_container label,.engine_checkbox label{cursor:pointer;padding:4px 10px;margin:0;display:block;text-transform:capitalize;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.checkbox_container input[type="checkbox"]:checked+label{background:#3498db;color:#fff}.engine_checkbox{padding:4px}label.allow{background:#e74c3c;padding:4px 8px;color:#fff;display:none}label.deny{background:#2ecc71;padding:4px 8px;color:#444;display:inline}.engine_checkbox input[type="checkbox"]:checked+label:nth-child(2)+label{display:none}.engine_checkbox input[type="checkbox"]:checked+label.allow{display:inline}a{text-decoration:none;color:#1a11be}a:visited{color:#8e44ad}.result{margin:19px 0 18px 0;padding:0;clear:both}.result_title{margin-bottom:0}.result_title a{color:#2980b9;font-weight:normal;font-size:1.1em}.result_title a:hover{text-decoration:underline}.result_title a:visited{color:#8e44ad}.cache_link{font-size:10px !important}.result h3{font-size:1em;word-wrap:break-word;margin:5px 0 1px 0;padding:0}.result .content{font-size:.8em;margin:0;padding:0;max-width:54em;word-wrap:break-word;line-height:1.24}.result .content img{float:left;margin-right:5px;max-width:200px;max-height:100px}.result .content br.last{clear:both}.result .url{font-size:.8em;margin:0 0 3px 0;padding:0;max-width:54em;word-wrap:break-word;color:#c0392b}.result .published_date{font-size:.8em;color:#888;Margin:5px 20px}.result .thumbnail{width:400px}.engines{color:#888}.small_font{font-size:.8em}.small p{margin:2px 0}.right{float:right}.invisible{display:none}.left{float:left}.highlight{color:#094089}.content .highlight{color:#000}.image_result{display:inline-block;margin:10px 10px;position:relative;max-height:160px}.image_result img{border:0;max-height:160px}.image_result p{margin:0;padding:0}.image_result p span a{display:none;color:#fff}.image_result p:hover span a{display:block;position:absolute;bottom:0;right:0;padding:4px;background-color:rgba(0,0,0,0.6);font-size:.7em}.torrent_result{border-left:10px solid lightgray;padding-left:3px}.torrent_result p{margin:3px;font-size:.8em}.torrent_result a{color:#2980b9}.torrent_result a:hover{text-decoration:underline}.torrent_result a:visited{color:#8e44ad}.definition_result{border-left:10px solid gray;padding-left:3px}.percentage{position:relative;width:300px}.percentage div{background:#444}table{width:100%}td{padding:0 4px}tr:hover{background:#ddd}#results{margin:auto;padding:0;width:50em;margin-bottom:20px}#sidebar{position:fixed;bottom:10px;left:10px;margin:0 2px 5px 5px;padding:0 2px 2px 2px;width:14em}#sidebar input{padding:0;margin:3px;font-size:.8em;display:inline-block;background:transparent;color:#444;cursor:pointer}#sidebar input[type="submit"]{text-decoration:underline}#suggestions form{display:inline}#suggestions,#answers{margin-top:20px;max-width:45em}#suggestions input,#answers input,#infoboxes input{padding:0;margin:3px;font-size:.8em;display:inline-block;background:transparent;color:#444;cursor:pointer}#suggestions input[type="submit"],#answers input[type="submit"],#infoboxes input[type="submit"]{text-decoration:underline}#suggestions-title{color:#888}#answers{border:2px solid #2980b9;padding:20px}#answers form,#infoboxes form{min-width:210px}#infoboxes{position:absolute;top:100px;right:20px;margin:0 2px 5px 5px;padding:0 2px 2px;max-width:21em;word-wrap:break-word;}#infoboxes .infobox{margin:10px 0 10px;border:1px solid #ddd;padding:5px;font-size:.8em}#infoboxes .infobox img{max-width:90%;max-heigt:12em;display:block;margin:5px;padding:5px}#infoboxes .infobox h2{margin:0}#infoboxes .infobox table{table-layout:fixed;}#infoboxes .infobox table td{vertical-align:top}#infoboxes .infobox input{font-size:1em}#infoboxes .infobox br{clear:both}#search_url{margin-top:8px}#search_url input{border:1px solid #888;padding:4px;color:#444;width:14em;display:block;margin:4px;font-size:.8em}#preferences{top:10px;padding:0;border:0;background:url('../img/preference-icon.png') no-repeat;background-size:28px 28px;opacity:.8;width:28px;height:30px;display:block}#preferences *{display:none}#pagination{clear:both}#pagination br{clear:both}#apis{margin-top:8px;clear:both}#categories_container{position:relative}@media screen and (max-width:50em){#results{margin:auto;padding:0;width:90%}.github{display:none}.checkbox_container{display:block;width:90%}.checkbox_container label{border-bottom:0}.preferences_container{display:none;postion:fixed !important;top:100px;right:0}}@media screen and (max-width:75em){div.title h1{font-size:1em}html.touch #categories{width:95%;height:30px;text-align:left;overflow-x:scroll;overflow-y:hidden;-webkit-overflow-scrolling:touch}html.touch #categories #categories_container{width:1000px;width:-moz-max-content;width:-webkit-max-content;width:max-content}html.touch #categories #categories_container .checkbox_container{display:inline-block;width:auto}#categories{font-size:90%;clear:both}#categories .checkbox_container{margin-top:2px;margin:auto}#suggestions,#answers{margin-top:5px}#infoboxes{position:inherit;max-width:inherit}#infoboxes .infobox{clear:both}#infoboxes .infobox img{float:left;max-width:10em}#categories{font-size:90%;clear:both}#categories .checkbox_container{margin-top:2px;margin:auto}#sidebar{position:static;max-width:50em;margin:0 0 2px 0;padding:0;float:none;border:none;width:auto}#sidebar input{border:0}#apis{display:none}#search_url{display:none}.result{border-top:1px solid #e8e7e6;margin:8px 0 8px 0}.result .thumbnail{max-width:98%}.image_result{max-width:98%}.image_result img{max-width:98%}}.favicon{float:left;margin-right:4px;margin-top:2px}.preferences_back{background:none repeat scroll 0 0 #3498db;border:0 none;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;cursor:pointer;display:inline-block;margin:2px 4px;padding:4px 6px}.preferences_back a{color:#fff}.hidden{opacity:0;overflow:hidden;font-size:.8em;position:absolute;bottom:-20px;width:100%;text-position:center;background:white;transition:opacity 1s ease}#categories_container:hover .hidden{transition:opacity 1s ease;opacity:.8} diff --git a/sources/searx/static/themes/default/less/style.less b/sources/searx/static/themes/default/less/style.less index 575bc22..4374f7d 100644 --- a/sources/searx/static/themes/default/less/style.less +++ b/sources/searx/static/themes/default/less/style.less @@ -476,6 +476,7 @@ color: @color-font-light; margin: 0px 2px 5px 5px; padding: 0px 2px 2px; max-width: 21em; + word-wrap: break-word; .infobox { margin: 10px 0 10px; @@ -485,7 +486,7 @@ color: @color-font-light; /* box-shadow: 0px 0px 5px #CCC; */ img { - max-width: 20em; + max-width: 90%; max-heigt: 12em; display: block; margin: 5px; @@ -497,7 +498,7 @@ color: @color-font-light; } table { - width: auto; + table-layout: fixed; td { vertical-align: top; diff --git a/sources/searx/static/themes/oscar/css/oscar.min.css b/sources/searx/static/themes/oscar/css/oscar.min.css index f7aba2b..60b5c37 100644 --- a/sources/searx/static/themes/oscar/css/oscar.min.css +++ b/sources/searx/static/themes/oscar/css/oscar.min.css @@ -17,7 +17,7 @@ input[type=checkbox]:not(:checked)+.label_hide_if_not_checked,input[type=checkbo .result_download{margin-right:5px} #pagination{margin-top:30px;padding-bottom:50px} .label-default{color:#aaa;background:#fff} -.infobox .infobox_part{margin-bottom:20px;word-wrap:break-word} +.infobox .infobox_part{margin-bottom:20px;word-wrap:break-word;table-layout:fixed} .infobox .infobox_part:last-child{margin-bottom:0} .search_categories{margin:10px 0;text-transform:capitalize} .cursor-text{cursor:text !important} diff --git a/sources/searx/static/themes/oscar/less/oscar/infobox.less b/sources/searx/static/themes/oscar/less/oscar/infobox.less index d8f6f92..41375f2 100644 --- a/sources/searx/static/themes/oscar/less/oscar/infobox.less +++ b/sources/searx/static/themes/oscar/less/oscar/infobox.less @@ -1,7 +1,8 @@ .infobox { .infobox_part { margin-bottom: 20px; - word-wrap: break-word; + word-wrap: break-word; + table-layout: fixed; } .infobox_part:last-child { diff --git a/sources/searx/templates/courgette/opensearch.xml b/sources/searx/templates/courgette/opensearch.xml index b85c3a7..15d3eb7 100644 --- a/sources/searx/templates/courgette/opensearch.xml +++ b/sources/searx/templates/courgette/opensearch.xml @@ -1,9 +1,9 @@ - searx + {{ instance_name }} a privacy-respecting, hackable metasearch engine UTF-8 - {{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }} + {{ urljoin(host, url_for('static', filename='img/favicon.png')) }} searx metasearch {% if opensearch_method == 'get' %} diff --git a/sources/searx/templates/courgette/preferences.html b/sources/searx/templates/courgette/preferences.html index f89915d..ba4d0c6 100644 --- a/sources/searx/templates/courgette/preferences.html +++ b/sources/searx/templates/courgette/preferences.html @@ -109,7 +109,7 @@ {{ search_engine.name }} ({{ shortcuts[search_engine.name] }})‎ {{ _(categ) }} - + diff --git a/sources/searx/templates/default/infobox.html b/sources/searx/templates/default/infobox.html index 1733f77..178a27e 100644 --- a/sources/searx/templates/default/infobox.html +++ b/sources/searx/templates/default/infobox.html @@ -7,7 +7,14 @@
{% for attribute in infobox.attributes %} - + + + {% if attribute.image %} + + {% else %} + + {% endif %} + {% endfor %}
{{ attribute.label }}{{ attribute.value }}
{{ attribute.label }}{{ attribute.image.alt }}{{ attribute.value }}
diff --git a/sources/searx/templates/default/opensearch.xml b/sources/searx/templates/default/opensearch.xml index b85c3a7..15d3eb7 100644 --- a/sources/searx/templates/default/opensearch.xml +++ b/sources/searx/templates/default/opensearch.xml @@ -1,9 +1,9 @@ - searx + {{ instance_name }} a privacy-respecting, hackable metasearch engine UTF-8 - {{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }} + {{ urljoin(host, url_for('static', filename='img/favicon.png')) }} searx metasearch {% if opensearch_method == 'get' %} diff --git a/sources/searx/templates/default/preferences.html b/sources/searx/templates/default/preferences.html index 90006c0..a47dba4 100644 --- a/sources/searx/templates/default/preferences.html +++ b/sources/searx/templates/default/preferences.html @@ -97,7 +97,7 @@ {{ search_engine.name }} ({{ shortcuts[search_engine.name] }})‎ {{ _(categ) }} - + diff --git a/sources/searx/templates/oscar/base.html b/sources/searx/templates/oscar/base.html index a799376..f63025e 100644 --- a/sources/searx/templates/oscar/base.html +++ b/sources/searx/templates/oscar/base.html @@ -9,7 +9,7 @@ {% block meta %}{% endblock %} - {% block title %}{% endblock %}searx + {% block title %}{% endblock %}{{ instance_name }} @@ -31,7 +31,7 @@ {% block head %} {% endblock %} - + @@ -86,5 +86,8 @@ {% for script in scripts %} {% endfor %} + diff --git a/sources/searx/templates/oscar/infobox.html b/sources/searx/templates/oscar/infobox.html index 2abdbf0..c72cfb6 100644 --- a/sources/searx/templates/oscar/infobox.html +++ b/sources/searx/templates/oscar/infobox.html @@ -1,8 +1,9 @@
-

{{ infobox.infobox }}

+

{{ infobox.infobox }}

+ {% if infobox.img_src %}{{ infobox.infobox }}{% endif %} {% if infobox.content %}

{{ infobox.content }}

{% endif %} @@ -11,7 +12,11 @@ {% for attribute in infobox.attributes %} {{ attribute.label }} + {% if attribute.image %} + {{ attribute.image.alt }} + {% else %} {{ attribute.value }} + {% endif %} {% endfor %} @@ -24,5 +29,6 @@ {% endfor %}
{% endif %} +
diff --git a/sources/searx/templates/oscar/navbar.html b/sources/searx/templates/oscar/navbar.html index 0c92b09..c59bcda 100644 --- a/sources/searx/templates/oscar/navbar.html +++ b/sources/searx/templates/oscar/navbar.html @@ -16,7 +16,7 @@ - searx + {{ instance_name }} {% else %} +

This

- -
  • +
  • +

    This is

    - -
  • +
  • + +

    suggestion title diff --git a/sources/searx/tests/engines/test_google_images.py b/sources/tests/unit/engines/test_google_images.py similarity index 92% rename from sources/searx/tests/engines/test_google_images.py rename to sources/tests/unit/engines/test_google_images.py index 876d0af..5f184e0 100644 --- a/sources/searx/tests/engines/test_google_images.py +++ b/sources/tests/unit/engines/test_google_images.py @@ -41,7 +41,7 @@ class TestGoogleImagesEngine(SearxTestCase):

    - {"id":"bQWQ9wz9loJmjM:","isu":"clker.com","ity":"png","md":"/search?tbs\u003dsbi:AMhZZit7u1mHyop9pQisu-5idR-8W_1Itvwc3afChmsjQYPx_1yYMzBvUZgtkcGoojqekKZ-6n_1rjX9ySH0OWA_1eO5OijFY6BBDw_1GApr6xxb1bXJcBcj-DiguMoXWW7cZSG7MRQbwnI5SoDZNXcv_1xGszy886I7NVb_1oRKSliTHtzqbXAxhvYreM","msu":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAQSEgltBZD3DP2WgiG-U42R4G0RFw","oh":598,"os":"13KB","ow":504,"pt":"South Arrow Clip Art at Clker.com - vector clip art online ...","rid":"vlONkeBtERfDuM","s":"Download this image as:","sc":1,"si":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAESEgltBZD3DP2WgiG-U42R4G0RFw","th":245,"tu":"https://thumbnail.url/","tw":206} + {"id":"bQWQ9wz9loJmjM:","isu":"clker.com","ity":"png","md":"/search?tbs\u003dsbi:AMhZZit7u1mHyop9pQisu-5idR-8W_1Itvwc3afChmsjQYPx_1yYMzBvUZgtkcGoojqekKZ-6n_1rjX9ySH0OWA_1eO5OijFY6BBDw_1GApr6xxb1bXJcBcj-DiguMoXWW7cZSG7MRQbwnI5SoDZNXcv_1xGszy886I7NVb_1oRKSliTHtzqbXAxhvYreM","msu":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAQSEgltBZD3DP2WgiG-U42R4G0RFw","oh":598,"os":"13KB","ow":504,"pt":"South Arrow Clip Art at Clker.com - vector clip art online ...","rid":"vlONkeBtERfDuM","s":"Download this image as:","sc":1,"si":"/search?q\u003dsouth\u0026biw\u003d1364\u0026bih\u003d235\u0026tbm\u003disch\u0026tbs\u003dsimg:CAESEgltBZD3DP2WgiG-U42R4G0RFw","th":245,"tu":"https://thumbnail.url/","tw":206,"ru":"a","ou":"b"}
    @@ -52,7 +52,7 @@ class TestGoogleImagesEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], u'South Arrow Clip Art at Clker.com - vector clip art online ...') - self.assertEqual(results[0]['url'], 'http://www.clker.com/clipart-south-arrow.html') + self.assertEqual(results[0]['url'], 'a') self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url/') - self.assertEqual(results[0]['img_src'], 'http://www.clker.com/cliparts/H/X/l/b/0/0/south-arrow-hi.png') + self.assertEqual(results[0]['img_src'], 'b') self.assertEqual(results[0]['content'], 'Download this image as:') diff --git a/sources/searx/tests/engines/test_google_news.py b/sources/tests/unit/engines/test_google_news.py similarity index 100% rename from sources/searx/tests/engines/test_google_news.py rename to sources/tests/unit/engines/test_google_news.py diff --git a/sources/searx/tests/engines/test_kickass.py b/sources/tests/unit/engines/test_kickass.py similarity index 100% rename from sources/searx/tests/engines/test_kickass.py rename to sources/tests/unit/engines/test_kickass.py diff --git a/sources/searx/tests/engines/test_mediawiki.py b/sources/tests/unit/engines/test_mediawiki.py similarity index 100% rename from sources/searx/tests/engines/test_mediawiki.py rename to sources/tests/unit/engines/test_mediawiki.py diff --git a/sources/searx/tests/engines/test_mixcloud.py b/sources/tests/unit/engines/test_mixcloud.py similarity index 100% rename from sources/searx/tests/engines/test_mixcloud.py rename to sources/tests/unit/engines/test_mixcloud.py diff --git a/sources/tests/unit/engines/test_nyaa.py b/sources/tests/unit/engines/test_nyaa.py new file mode 100644 index 0000000..db412e1 --- /dev/null +++ b/sources/tests/unit/engines/test_nyaa.py @@ -0,0 +1,66 @@ +from collections import defaultdict +import mock +from searx.engines import nyaa +from searx.testing import SearxTestCase + + +class TestNyaaEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + dic['pageno'] = 1 + params = nyaa.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('nyaa.se' in params['url']) + + def test_response(self): + resp = mock.Mock(text='') + self.assertEqual(nyaa.response(resp), []) + + html = """ + + + + + + + + + + + + + +
    + + English-translated Anime + + + + Sample torrent title + + + + DL + + 10 MiB136660
    + """ + + resp = mock.Mock(text=html) + results = nyaa.response(resp) + + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + + r = results[0] + self.assertTrue(r['url'].find('www.nyaa.se/?page3') >= 0) + self.assertTrue(r['torrentfile'].find('www.nyaa.se/?page_dl') >= 0) + self.assertTrue(r['content'].find('English-translated Anime') >= 0) + self.assertTrue(r['content'].find('Downloaded 666 times.') >= 0) + + self.assertEqual(r['title'], 'Sample torrent title') + self.assertEqual(r['seed'], 1) + self.assertEqual(r['leech'], 3) + self.assertEqual(r['filesize'], 10 * 1024 * 1024) diff --git a/sources/searx/tests/engines/test_openstreetmap.py b/sources/tests/unit/engines/test_openstreetmap.py similarity index 100% rename from sources/searx/tests/engines/test_openstreetmap.py rename to sources/tests/unit/engines/test_openstreetmap.py diff --git a/sources/searx/tests/engines/test_photon.py b/sources/tests/unit/engines/test_photon.py similarity index 100% rename from sources/searx/tests/engines/test_photon.py rename to sources/tests/unit/engines/test_photon.py diff --git a/sources/searx/tests/engines/test_piratebay.py b/sources/tests/unit/engines/test_piratebay.py similarity index 100% rename from sources/searx/tests/engines/test_piratebay.py rename to sources/tests/unit/engines/test_piratebay.py diff --git a/sources/searx/tests/engines/test_qwant.py b/sources/tests/unit/engines/test_qwant.py similarity index 100% rename from sources/searx/tests/engines/test_qwant.py rename to sources/tests/unit/engines/test_qwant.py diff --git a/sources/tests/unit/engines/test_reddit.py b/sources/tests/unit/engines/test_reddit.py new file mode 100644 index 0000000..9c94f4e --- /dev/null +++ b/sources/tests/unit/engines/test_reddit.py @@ -0,0 +1,71 @@ +from collections import defaultdict +import mock +from searx.engines import reddit +from searx.testing import SearxTestCase +from datetime import datetime + + +class TestRedditEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + params = reddit.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('reddit.com' in params['url']) + + def test_response(self): + resp = mock.Mock(text='{}') + self.assertEqual(reddit.response(resp), []) + + json = """ + { + "kind": "Listing", + "data": { + "children": [{ + "data": { + "url": "http://google2.com/", + "permalink": "http://google.com/", + "title": "Title number one", + "selftext": "Sample", + "created_utc": 1401219957.0, + "thumbnail": "http://image.com/picture.jpg" + } + }, { + "data": { + "url": "https://reddit2.com/", + "permalink": "https://reddit.com/", + "title": "Title number two", + "selftext": "Dominus vobiscum", + "created_utc": 1438792533.0, + "thumbnail": "self" + } + }] + } + } + """ + + resp = mock.Mock(text=json) + results = reddit.response(resp) + + self.assertEqual(len(results), 2) + self.assertEqual(type(results), list) + + # testing first result (picture) + r = results[0] + self.assertEqual(r['url'], 'http://google.com/') + self.assertEqual(r['title'], 'Title number one') + self.assertEqual(r['template'], 'images.html') + self.assertEqual(r['img_src'], 'http://google2.com/') + self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg') + + # testing second result (self-post) + r = results[1] + self.assertEqual(r['url'], 'https://reddit.com/') + self.assertEqual(r['title'], 'Title number two') + self.assertEqual(r['content'], 'Dominus vobiscum') + created = datetime.fromtimestamp(1438792533.0) + self.assertEqual(r['publishedDate'], created) + self.assertTrue('thumbnail_src' not in r) + self.assertTrue('img_src' not in r) diff --git a/sources/searx/tests/engines/test_searchcode_code.py b/sources/tests/unit/engines/test_searchcode_code.py similarity index 100% rename from sources/searx/tests/engines/test_searchcode_code.py rename to sources/tests/unit/engines/test_searchcode_code.py diff --git a/sources/searx/tests/engines/test_searchcode_doc.py b/sources/tests/unit/engines/test_searchcode_doc.py similarity index 100% rename from sources/searx/tests/engines/test_searchcode_doc.py rename to sources/tests/unit/engines/test_searchcode_doc.py diff --git a/sources/searx/tests/engines/test_soundcloud.py b/sources/tests/unit/engines/test_soundcloud.py similarity index 100% rename from sources/searx/tests/engines/test_soundcloud.py rename to sources/tests/unit/engines/test_soundcloud.py diff --git a/sources/searx/tests/engines/test_spotify.py b/sources/tests/unit/engines/test_spotify.py similarity index 100% rename from sources/searx/tests/engines/test_spotify.py rename to sources/tests/unit/engines/test_spotify.py diff --git a/sources/searx/tests/engines/test_stackoverflow.py b/sources/tests/unit/engines/test_stackoverflow.py similarity index 100% rename from sources/searx/tests/engines/test_stackoverflow.py rename to sources/tests/unit/engines/test_stackoverflow.py diff --git a/sources/searx/tests/engines/test_startpage.py b/sources/tests/unit/engines/test_startpage.py similarity index 100% rename from sources/searx/tests/engines/test_startpage.py rename to sources/tests/unit/engines/test_startpage.py diff --git a/sources/searx/tests/engines/test_subtitleseeker.py b/sources/tests/unit/engines/test_subtitleseeker.py similarity index 100% rename from sources/searx/tests/engines/test_subtitleseeker.py rename to sources/tests/unit/engines/test_subtitleseeker.py diff --git a/sources/searx/tests/engines/test_swisscows.py b/sources/tests/unit/engines/test_swisscows.py similarity index 100% rename from sources/searx/tests/engines/test_swisscows.py rename to sources/tests/unit/engines/test_swisscows.py diff --git a/sources/tests/unit/engines/test_tokyotoshokan.py b/sources/tests/unit/engines/test_tokyotoshokan.py new file mode 100644 index 0000000..efe7dbf --- /dev/null +++ b/sources/tests/unit/engines/test_tokyotoshokan.py @@ -0,0 +1,110 @@ +import mock +from collections import defaultdict +from searx.engines import tokyotoshokan +from searx.testing import SearxTestCase +from datetime import datetime + + +class TestTokyotoshokanEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + dic['pageno'] = 1 + params = tokyotoshokan.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('tokyotosho.info' in params['url']) + + def test_response(self): + resp = mock.Mock(text='') + self.assertEqual(tokyotoshokan.response(resp), []) + + html = """ + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + Koyomimonogatari + + Details
    + Authorized: Yes + Submitter: Ohys | + Size: 10.5MB | + Date: 2016-03-26 16:41 UTC | + Comment: sample comment + + S: 53 + L: 18 + C: 0 + ID: 975700 +
    + + + + Owarimonogatari + + Details
    + Submitter: Ohys | + Size: 932.84EB | + Date: QWERTY-03-26 16:41 UTC + + S: 0 +
    + """ + + resp = mock.Mock(text=html) + results = tokyotoshokan.response(resp) + + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + + # testing the first result, which has correct format + # and should have all information fields filled + r = results[0] + self.assertEqual(r['url'], 'http://www.nyaa.se/f') + self.assertEqual(r['title'], 'Koyomimonogatari') + self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4c19eb46b5113685fbd2288ed2531b0b') + self.assertEqual(r['filesize'], int(1024 * 1024 * 10.5)) + self.assertEqual(r['publishedDate'], datetime(2016, 03, 26, 16, 41)) + self.assertEqual(r['content'], 'Comment: sample comment') + self.assertEqual(r['seed'], 53) + self.assertEqual(r['leech'], 18) + + # testing the second result, which does not include magnet link, + # seed & leech info, and has incorrect size & creation date + r = results[1] + self.assertEqual(r['url'], 'http://google.com/q') + self.assertEqual(r['title'], 'Owarimonogatari') + + self.assertFalse('magnetlink' in r) + self.assertFalse('filesize' in r) + self.assertFalse('content' in r) + self.assertFalse('publishedDate' in r) + self.assertFalse('seed' in r) + self.assertFalse('leech' in r) diff --git a/sources/tests/unit/engines/test_torrentz.py b/sources/tests/unit/engines/test_torrentz.py new file mode 100644 index 0000000..2f836f7 --- /dev/null +++ b/sources/tests/unit/engines/test_torrentz.py @@ -0,0 +1,91 @@ +import mock +from collections import defaultdict +from searx.engines import torrentz +from searx.testing import SearxTestCase +from datetime import datetime + + +class TestTorrentzEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dic = defaultdict(dict) + dic['pageno'] = 1 + params = torrentz.request(query, dic) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('torrentz.eu' in params['url']) + + def test_response(self): + resp = mock.Mock(text='') + self.assertEqual(torrentz.response(resp), []) + + html = """ +
    +
    +
    + + Completely valid info + + books ebooks +
    +
    + 1 + + 4 months + + 30 MB + 14 + 1 +
    +
    + +
    +
    + + Invalid hash and date and filesize + + books ebooks +
    +
    + 1 + + 4 months + + 30MB + 5,555 + 1,234,567 +
    +
    +
    + """ + + resp = mock.Mock(text=html) + results = torrentz.response(resp) + + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + + # testing against the first result + r = results[0] + self.assertEqual(r['url'], 'https://torrentz.eu/4362e08b1d80e1820fb2550b752f9f3126fe76d6') + self.assertEqual(r['title'], 'Completely valid info books ebooks') + # 22 Nov 2015 03:01:42 + self.assertEqual(r['publishedDate'], datetime(2015, 11, 22, 3, 1, 42)) + self.assertEqual(r['seed'], 14) + self.assertEqual(r['leech'], 1) + self.assertEqual(r['filesize'], 30 * 1024 * 1024) + self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4362e08b1d80e1820fb2550b752f9f3126fe76d6') + + # testing against the second result + r = results[1] + self.assertEqual(r['url'], 'https://torrentz.eu/poaskdpokaspod') + self.assertEqual(r['title'], 'Invalid hash and date and filesize books ebooks') + self.assertEqual(r['seed'], 5555) + self.assertEqual(r['leech'], 1234567) + + # in the second result we have invalid hash, creation date & torrent size, + # so these tests should fail + self.assertFalse('magnetlink' in r) + self.assertFalse('filesize' in r) + self.assertFalse('publishedDate' in r) diff --git a/sources/searx/tests/engines/test_twitter.py b/sources/tests/unit/engines/test_twitter.py similarity index 100% rename from sources/searx/tests/engines/test_twitter.py rename to sources/tests/unit/engines/test_twitter.py diff --git a/sources/searx/tests/engines/test_vimeo.py b/sources/tests/unit/engines/test_vimeo.py similarity index 100% rename from sources/searx/tests/engines/test_vimeo.py rename to sources/tests/unit/engines/test_vimeo.py diff --git a/sources/tests/unit/engines/test_wikipedia.py b/sources/tests/unit/engines/test_wikipedia.py new file mode 100644 index 0000000..d1c4403 --- /dev/null +++ b/sources/tests/unit/engines/test_wikipedia.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import wikipedia +from searx.testing import SearxTestCase + + +class TestWikipediaEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['language'] = 'fr_FR' + params = wikipedia.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('test_query', params['url']) + self.assertIn('Test_Query', params['url']) + self.assertIn('fr.wikipedia.org', params['url']) + + query = 'Test_Query' + params = wikipedia.request(query, dicto) + self.assertIn('Test_Query', params['url']) + self.assertNotIn('test_query', params['url']) + + dicto['language'] = 'all' + params = wikipedia.request(query, dicto) + self.assertIn('en', params['url']) + + def test_response(self): + dicto = defaultdict(dict) + dicto['language'] = 'fr' + + self.assertRaises(AttributeError, wikipedia.response, None) + self.assertRaises(AttributeError, wikipedia.response, []) + self.assertRaises(AttributeError, wikipedia.response, '') + self.assertRaises(AttributeError, wikipedia.response, '[]') + + # page not found + json = """ + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "-1": { + "ns": 0, + "title": "", + "missing": "" + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + self.assertEqual(wikipedia.response(response), []) + + # normal case + json = """ + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "12345": { + "pageid": 12345, + "ns": 0, + "title": "The Title", + "extract": "The Title is...", + "thumbnail": { + "source": "img_src.jpg" + }, + "pageimage": "img_name.jpg" + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + results = wikipedia.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], u'The Title') + self.assertIn('fr.wikipedia.org/wiki/The_Title', results[0]['url']) + self.assertEqual(results[1]['infobox'], u'The Title') + self.assertIn('fr.wikipedia.org/wiki/The_Title', results[1]['id']) + self.assertIn('The Title is...', results[1]['content']) + self.assertEqual(results[1]['img_src'], 'img_src.jpg') + + # disambiguation page + json = """ + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "12345": { + "pageid": 12345, + "ns": 0, + "title": "The Title", + "extract": "The Title can be:\\nThe Title 1\\nThe Title 2\\nThe Title 3\\nThe Title 4......................................................................................................................................." """ # noqa + json += """ + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + results = wikipedia.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + # no image + json = """ + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "12345": { + "pageid": 12345, + "ns": 0, + "title": "The Title", + "extract": "The Title is......................................................................................................................................................................................." """ # noqa + json += """ + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + results = wikipedia.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertIn('The Title is...', results[1]['content']) + self.assertEqual(results[1]['img_src'], None) + + # title not in first paragraph + json = u""" + { + "batchcomplete": "", + "query": { + "normalized": [], + "pages": { + "12345": { + "pageid": 12345, + "ns": 0, + "title": "披頭四樂隊", + "extract": "披头士乐队....................................................................................................................................................................................................\\n披頭四樂隊...", """ # noqa + json += """ + "thumbnail": { + "source": "img_src.jpg" + }, + "pageimage": "img_name.jpg" + } + } + } + }""" + response = mock.Mock(content=json, search_params=dicto) + results = wikipedia.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[1]['infobox'], u'披頭四樂隊') + self.assertIn(u'披头士乐队...', results[1]['content']) diff --git a/sources/tests/unit/engines/test_wolframalpha_api.py b/sources/tests/unit/engines/test_wolframalpha_api.py new file mode 100644 index 0000000..76404e1 --- /dev/null +++ b/sources/tests/unit/engines/test_wolframalpha_api.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from requests import Request +from searx.engines import wolframalpha_api +from searx.testing import SearxTestCase + + +class TestWolframAlphaAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + params = wolframalpha_api.request(query, dicto) + + # TODO: test api_key + self.assertIn('url', params) + self.assertIn('https://api.wolframalpha.com/v2/query?', params['url']) + self.assertIn(query, params['url']) + self.assertEqual('https://www.wolframalpha.com/input/?i=test_query', params['headers']['Referer']) + + def test_replace_pua_chars(self): + self.assertEqual('i', wolframalpha_api.replace_pua_chars(u'\uf74e')) + + def test_response(self): + self.assertRaises(AttributeError, wolframalpha_api.response, None) + self.assertRaises(AttributeError, wolframalpha_api.response, []) + self.assertRaises(AttributeError, wolframalpha_api.response, '') + self.assertRaises(AttributeError, wolframalpha_api.response, '[]') + + referer_url = 'referer_url' + request = Request(headers={'Referer': referer_url}) + + # test failure + xml = ''' + + ''' + response = mock.Mock(content=xml) + self.assertEqual(wolframalpha_api.response(response), []) + + # test basic case + xml = """ + + + + input_img_alt + input_plaintext</plaintext> + </subpod> + </pod> + <pod title='Result' + scanner='Simplification' + id='Result' + numsubpods='1' + primary='true'> + <subpod title=''> + <img src='result_img_src.gif' + alt='result_img_alt' + title='result_img_title' /> + <plaintext>result_plaintext</plaintext> + </subpod> + </pod> + <pod title='Manipulatives illustration' + scanner='Arithmetic' + id='Illustration' + numsubpods='1'> + <subpod title=''> + <img src='illustration_img_src.gif' + alt='illustration_img_alt' /> + <plaintext>illustration_plaintext</plaintext> + </subpod> + </pod> + </queryresult> + """ + response = mock.Mock(content=xml, request=request) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual('input_plaintext', results[0]['infobox']) + + self.assertEqual(len(results[0]['attributes']), 3) + self.assertEqual('Input', results[0]['attributes'][0]['label']) + self.assertEqual('input_plaintext', results[0]['attributes'][0]['value']) + self.assertEqual('Result', results[0]['attributes'][1]['label']) + self.assertEqual('result_plaintext', results[0]['attributes'][1]['value']) + self.assertEqual('Manipulatives illustration', results[0]['attributes'][2]['label']) + self.assertEqual('illustration_img_src.gif', results[0]['attributes'][2]['image']['src']) + self.assertEqual('illustration_img_alt', results[0]['attributes'][2]['image']['alt']) + + self.assertEqual(len(results[0]['urls']), 1) + + self.assertEqual(referer_url, results[0]['urls'][0]['url']) + self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) + self.assertEqual(referer_url, results[1]['url']) + self.assertEqual('Wolfram|Alpha', results[1]['title']) + + # test calc + xml = """<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='true' + error='false' + numpods='2' + datatypes='' + parsetimedout='false' + id='queryresult_id' + host='http://www5b.wolframalpha.com' + related='related_url' + version='2.6' > + <pod title='Indefinite integral' + scanner='Integral' + id='IndefiniteIntegral' + error='false' + numsubpods='1' + primary='true'> + <subpod title=''> + <img src='integral_image.gif' + alt='integral_img_alt' + title='integral_img_title' /> + <plaintext>integral_plaintext</plaintext> + </subpod> + </pod> + <pod title='Plot of the integral' + scanner='Integral' + id='Plot' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='plot.gif' + alt='plot_alt' + title='' /> + <plaintext></plaintext> + </subpod> + </pod> + </queryresult> + """ + response = mock.Mock(content=xml, request=request) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual('integral_plaintext', results[0]['infobox']) + + self.assertEqual(len(results[0]['attributes']), 2) + self.assertEqual('Indefinite integral', results[0]['attributes'][0]['label']) + self.assertEqual('integral_plaintext', results[0]['attributes'][0]['value']) + self.assertEqual('Plot of the integral', results[0]['attributes'][1]['label']) + self.assertEqual('plot.gif', results[0]['attributes'][1]['image']['src']) + self.assertEqual('plot_alt', results[0]['attributes'][1]['image']['alt']) + + self.assertEqual(len(results[0]['urls']), 1) + + self.assertEqual(referer_url, results[0]['urls'][0]['url']) + self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) + self.assertEqual(referer_url, results[1]['url']) + self.assertEqual('Wolfram|Alpha', results[1]['title']) diff --git a/sources/tests/unit/engines/test_wolframalpha_noapi.py b/sources/tests/unit/engines/test_wolframalpha_noapi.py new file mode 100644 index 0000000..068c1be --- /dev/null +++ b/sources/tests/unit/engines/test_wolframalpha_noapi.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from requests import Request +from searx.engines import wolframalpha_noapi +from searx.testing import SearxTestCase + + +class TestWolframAlphaNoAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + params = wolframalpha_noapi.request(query, dicto) + + self.assertIn('url', params) + self.assertIn('https://www.wolframalpha.com/input/json.jsp', params['url']) + self.assertIn(query, params['url']) + self.assertEqual('https://www.wolframalpha.com/input/?i=test_query', params['headers']['Referer']) + + def test_response(self): + self.assertRaises(AttributeError, wolframalpha_noapi.response, None) + self.assertRaises(AttributeError, wolframalpha_noapi.response, []) + self.assertRaises(AttributeError, wolframalpha_noapi.response, '') + self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]') + + referer_url = 'referer_url' + request = Request(headers={'Referer': referer_url}) + + # test failure + json = ''' + {"queryresult" : { + "success" : false, + "error" : false, + "numpods" : 0, + "id" : "", + "host" : "https:\/\/www5a.wolframalpha.com", + "didyoumeans" : {} + }} + ''' + response = mock.Mock(text=json, request=request) + self.assertEqual(wolframalpha_noapi.response(response), []) + + # test basic case + json = ''' + {"queryresult" : { + "success" : true, + "error" : false, + "numpods" : 6, + "datatypes" : "Math", + "id" : "queryresult_id", + "host" : "https:\/\/www5b.wolframalpha.com", + "related" : "related_url", + "version" : "2.6", + "pods" : [ + { + "title" : "Input", + "scanners" : [ + "Identity" + ], + "id" : "Input", + "error" : false, + "numsubpods" : 1, + "subpods" : [ + { + "title" : "", + "img" : { + "src" : "input_img_src.gif", + "alt" : "input_img_alt", + "title" : "input_img_title" + }, + "plaintext" : "input_plaintext", + "minput" : "input_minput" + } + ] + }, + { + "title" : "Result", + "scanners" : [ + "Simplification" + ], + "id" : "Result", + "error" : false, + "numsubpods" : 1, + "primary" : true, + "subpods" : [ + { + "title" : "", + "img" : { + "src" : "result_img_src.gif", + "alt" : "result_img_alt", + "title" : "result_img_title" + }, + "plaintext" : "result_plaintext", + "moutput" : "result_moutput" + } + ] + }, + { + "title" : "Manipulatives illustration", + "scanners" : [ + "Arithmetic" + ], + "id" : "Illustration", + "error" : false, + "numsubpods" : 1, + "subpods" : [ + { + "title" : "", + "CDFcontent" : "Resizeable", + "img" : { + "src" : "illustration_img_src.gif", + "alt" : "illustration_img_alt", + "title" : "illustration_img_title" + }, + "plaintext" : "illustration_img_plaintext" + } + ] + } + ] + }} + ''' + response = mock.Mock(text=json, request=request) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual('input_plaintext', results[0]['infobox']) + + self.assertEqual(len(results[0]['attributes']), 3) + self.assertEqual('Input', results[0]['attributes'][0]['label']) + self.assertEqual('input_plaintext', results[0]['attributes'][0]['value']) + self.assertEqual('Result', results[0]['attributes'][1]['label']) + self.assertEqual('result_plaintext', results[0]['attributes'][1]['value']) + self.assertEqual('Manipulatives illustration', results[0]['attributes'][2]['label']) + self.assertEqual('illustration_img_src.gif', results[0]['attributes'][2]['image']['src']) + self.assertEqual('illustration_img_alt', results[0]['attributes'][2]['image']['alt']) + + self.assertEqual(len(results[0]['urls']), 1) + + self.assertEqual(referer_url, results[0]['urls'][0]['url']) + self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) + self.assertEqual(referer_url, results[1]['url']) + self.assertEqual('Wolfram|Alpha', results[1]['title']) + + # test calc + json = """ + {"queryresult" : { + "success" : true, + "error" : false, + "numpods" : 2, + "datatypes" : "", + "id" : "queryresult_id", + "host" : "https:\/\/www4b.wolframalpha.com", + "related" : "related_url", + "version" : "2.6", + "pods" : [ + { + "title" : "Indefinite integral", + "scanners" : [ + "Integral" + ], + "id" : "IndefiniteIntegral", + "error" : false, + "numsubpods" : 1, + "primary" : true, + "subpods" : [ + { + "title" : "", + "img" : { + "src" : "integral_img_src.gif", + "alt" : "integral_img_alt", + "title" : "integral_img_title" + }, + "plaintext" : "integral_plaintext", + "minput" : "integral_minput", + "moutput" : "integral_moutput" + } + ] + }, + { + "title" : "Plot of the integral", + "scanners" : [ + "Integral" + ], + "id" : "Plot", + "error" : false, + "numsubpods" : 1, + "subpods" : [ + { + "title" : "", + "img" : { + "src" : "plot.gif", + "alt" : "plot_alt", + "title" : "plot_title" + }, + "plaintext" : "", + "minput" : "plot_minput" + } + ] + } + ] + }} + """ + response = mock.Mock(text=json, request=request) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual('integral_plaintext', results[0]['infobox']) + + self.assertEqual(len(results[0]['attributes']), 2) + self.assertEqual('Indefinite integral', results[0]['attributes'][0]['label']) + self.assertEqual('integral_plaintext', results[0]['attributes'][0]['value']) + self.assertEqual('Plot of the integral', results[0]['attributes'][1]['label']) + self.assertEqual('plot.gif', results[0]['attributes'][1]['image']['src']) + self.assertEqual('plot_alt', results[0]['attributes'][1]['image']['alt']) + + self.assertEqual(len(results[0]['urls']), 1) + + self.assertEqual(referer_url, results[0]['urls'][0]['url']) + self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) + self.assertEqual(referer_url, results[1]['url']) + self.assertEqual('Wolfram|Alpha', results[1]['title']) diff --git a/sources/searx/tests/engines/test_www1x.py b/sources/tests/unit/engines/test_www1x.py similarity index 100% rename from sources/searx/tests/engines/test_www1x.py rename to sources/tests/unit/engines/test_www1x.py diff --git a/sources/searx/tests/engines/test_www500px.py b/sources/tests/unit/engines/test_www500px.py similarity index 100% rename from sources/searx/tests/engines/test_www500px.py rename to sources/tests/unit/engines/test_www500px.py diff --git a/sources/searx/tests/engines/test_yacy.py b/sources/tests/unit/engines/test_yacy.py similarity index 100% rename from sources/searx/tests/engines/test_yacy.py rename to sources/tests/unit/engines/test_yacy.py diff --git a/sources/searx/tests/engines/test_yahoo.py b/sources/tests/unit/engines/test_yahoo.py similarity index 100% rename from sources/searx/tests/engines/test_yahoo.py rename to sources/tests/unit/engines/test_yahoo.py diff --git a/sources/searx/tests/engines/test_yahoo_news.py b/sources/tests/unit/engines/test_yahoo_news.py similarity index 100% rename from sources/searx/tests/engines/test_yahoo_news.py rename to sources/tests/unit/engines/test_yahoo_news.py diff --git a/sources/searx/tests/engines/test_youtube_api.py b/sources/tests/unit/engines/test_youtube_api.py similarity index 100% rename from sources/searx/tests/engines/test_youtube_api.py rename to sources/tests/unit/engines/test_youtube_api.py diff --git a/sources/searx/tests/engines/test_youtube_noapi.py b/sources/tests/unit/engines/test_youtube_noapi.py similarity index 100% rename from sources/searx/tests/engines/test_youtube_noapi.py rename to sources/tests/unit/engines/test_youtube_noapi.py diff --git a/sources/searx/tests/test_plugins.py b/sources/tests/unit/test_plugins.py similarity index 100% rename from sources/searx/tests/test_plugins.py rename to sources/tests/unit/test_plugins.py diff --git a/sources/tests/unit/test_preferences.py b/sources/tests/unit/test_preferences.py new file mode 100644 index 0000000..e418c0a --- /dev/null +++ b/sources/tests/unit/test_preferences.py @@ -0,0 +1,101 @@ +from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, + MultipleChoiceSetting, PluginsSetting, ValidationException) +from searx.testing import SearxTestCase + + +class PluginStub(object): + def __init__(self, id, default_on): + self.id = id + self.default_on = default_on + + +class TestSettings(SearxTestCase): + # map settings + def test_map_setting_invalid_initialization(self): + with self.assertRaises(MissingArgumentException): + setting = MapSetting(3, wrong_argument={'0': 0}) + + def test_map_setting_invalid_default_value(self): + with self.assertRaises(ValidationException): + setting = MapSetting(3, map={'dog': 1, 'bat': 2}) + + def test_map_setting_invalid_choice(self): + setting = MapSetting(2, map={'dog': 1, 'bat': 2}) + with self.assertRaises(ValidationException): + setting.parse('cat') + + def test_map_setting_valid_default(self): + setting = MapSetting(3, map={'dog': 1, 'bat': 2, 'cat': 3}) + self.assertEquals(setting.get_value(), 3) + + def test_map_setting_valid_choice(self): + setting = MapSetting(3, map={'dog': 1, 'bat': 2, 'cat': 3}) + self.assertEquals(setting.get_value(), 3) + setting.parse('bat') + self.assertEquals(setting.get_value(), 2) + + def test_enum_setting_invalid_initialization(self): + with self.assertRaises(MissingArgumentException): + setting = EnumStringSetting('cat', wrong_argument=[0, 1, 2]) + + # enum settings + def test_enum_setting_invalid_initialization(self): + with self.assertRaises(MissingArgumentException): + setting = EnumStringSetting('cat', wrong_argument=[0, 1, 2]) + + def test_enum_setting_invalid_default_value(self): + with self.assertRaises(ValidationException): + setting = EnumStringSetting(3, choices=[0, 1, 2]) + + def test_enum_setting_invalid_choice(self): + setting = EnumStringSetting(0, choices=[0, 1, 2]) + with self.assertRaises(ValidationException): + setting.parse(3) + + def test_enum_setting_valid_default(self): + setting = EnumStringSetting(3, choices=[1, 2, 3]) + self.assertEquals(setting.get_value(), 3) + + def test_enum_setting_valid_choice(self): + setting = EnumStringSetting(3, choices=[1, 2, 3]) + self.assertEquals(setting.get_value(), 3) + setting.parse(2) + self.assertEquals(setting.get_value(), 2) + + # multiple choice settings + def test_multiple_setting_invalid_initialization(self): + with self.assertRaises(MissingArgumentException): + setting = MultipleChoiceSetting(['2'], wrong_argument=['0', '1', '2']) + + def test_multiple_setting_invalid_default_value(self): + with self.assertRaises(ValidationException): + setting = MultipleChoiceSetting(['3', '4'], choices=['0', '1', '2']) + + def test_multiple_setting_invalid_choice(self): + setting = MultipleChoiceSetting(['1', '2'], choices=['0', '1', '2']) + with self.assertRaises(ValidationException): + setting.parse('4, 3') + + def test_multiple_setting_valid_default(self): + setting = MultipleChoiceSetting(['3'], choices=['1', '2', '3']) + self.assertEquals(setting.get_value(), ['3']) + + def test_multiple_setting_valid_choice(self): + setting = MultipleChoiceSetting(['3'], choices=['1', '2', '3']) + self.assertEquals(setting.get_value(), ['3']) + setting.parse('2') + self.assertEquals(setting.get_value(), ['2']) + + # plugins settings + def test_plugins_setting_all_default_enabled(self): + plugin1 = PluginStub('plugin1', True) + plugin2 = PluginStub('plugin2', True) + setting = PluginsSetting(['3'], choices=[plugin1, plugin2]) + self.assertEquals(setting.get_enabled(), set(['plugin1', 'plugin2'])) + + def test_plugins_setting_few_default_enabled(self): + plugin1 = PluginStub('plugin1', True) + plugin2 = PluginStub('plugin2', False) + plugin3 = PluginStub('plugin3', True) + setting = PluginsSetting('name', choices=[plugin1, plugin2, plugin3]) + self.assertEquals(setting.get_enabled(), set(['plugin1', 'plugin3'])) diff --git a/sources/searx/tests/test_results.py b/sources/tests/unit/test_results.py similarity index 100% rename from sources/searx/tests/test_results.py rename to sources/tests/unit/test_results.py diff --git a/sources/searx/tests/test_search.py b/sources/tests/unit/test_search.py similarity index 100% rename from sources/searx/tests/test_search.py rename to sources/tests/unit/test_search.py diff --git a/sources/searx/tests/test_utils.py b/sources/tests/unit/test_utils.py similarity index 100% rename from sources/searx/tests/test_utils.py rename to sources/tests/unit/test_utils.py diff --git a/sources/searx/tests/test_webapp.py b/sources/tests/unit/test_webapp.py similarity index 97% rename from sources/searx/tests/test_webapp.py rename to sources/tests/unit/test_webapp.py index 071c01d..5697017 100644 --- a/sources/searx/tests/test_webapp.py +++ b/sources/tests/unit/test_webapp.py @@ -12,7 +12,6 @@ class ViewsTestCase(SearxTestCase): def setUp(self): webapp.app.config['TESTING'] = True # to get better error messages self.app = webapp.app.test_client() - webapp.default_theme = 'default' # set some defaults self.test_results = [ @@ -43,6 +42,11 @@ class ViewsTestCase(SearxTestCase): webapp.Search.search = search_mock + def get_current_theme_name_mock(override=None): + return 'default' + + webapp.get_current_theme_name = get_current_theme_name_mock + self.maxDiff = None # to see full diffs def test_index_empty(self): diff --git a/sources/versions.cfg b/sources/versions.cfg deleted file mode 100644 index 6aad7a7..0000000 --- a/sources/versions.cfg +++ /dev/null @@ -1,122 +0,0 @@ -[versions] -Babel = 1.3 -Flask = 0.10.1 -Flask-Babel = 0.9 -Jinja2 = 2.7.3 -MarkupSafe = 0.23 -Pygments = 2.0.2 -WebOb = 1.4.1 -WebTest = 2.0.18 -Werkzeug = 0.10.4 -collective.recipe.omelette = 0.16 -coverage = 3.7.1 -decorator = 3.4.2 -docutils = 0.12 -flake8 = 2.4.1 -itsdangerous = 0.24 -mccabe = 0.3.1 -mock = 1.0.1 -pep8 = 1.5.7 -plone.testing = 4.0.13 -pyflakes = 0.8.1 -pytz = 2015.4 -pyyaml = 3.11 -requests = 2.7.0 -robotframework-debuglibrary = 0.3 -robotframework-httplibrary = 0.4.2 -robotframework-selenium2library = 1.7.1 -robotsuite = 1.6.1 -selenium = 2.46.0 -speaklater = 1.3 -unittest2 = 1.0.1 -waitress = 0.8.9 -zc.recipe.testrunner = 2.0.0 -pyopenssl = 0.15.1 -ndg-httpsclient = 0.4.0 -pyasn1 = 0.1.8 -pyasn1-modules = 0.0.6 -certifi = 2015.04.28 - -cffi = 1.1.2 -cryptography = 0.9.1 - -# Required by: -# robotsuite==1.6.1 -# searx==0.7.0 -lxml = 3.4.4 - -# Required by: -# searx==0.7.0 -python-dateutil = 2.4.2 - -# Required by: -# searx==0.7.0 -# zope.exceptions==4.0.7 -# zope.interface==4.1.2 -# zope.testrunner==4.4.9 -setuptools = 18.0.1 - -# Required by: -# WebTest==2.0.18 -beautifulsoup4 = 4.3.2 - -# Required by: -# cryptography==0.9.1 -enum34 = 1.0.4 - -# Required by: -# cryptography==0.9.1 -idna = 2.0 - -# Required by: -# cryptography==0.9.1 -ipaddress = 1.0.7 - -# Required by: -# robotframework-httplibrary==0.4.2 -jsonpatch = 1.11 - -# Required by: -# robotframework-httplibrary==0.4.2 -jsonpointer = 1.9 - -# Required by: -# traceback2==1.4.0 -linecache2 = 1.0.0 - -# Required by: -# cffi==1.1.2 -pycparser = 2.12 - -# Required by: -# robotframework-httplibrary==0.4.2 -robotframework = 2.8.7 - -# Required by: -# robotsuite==1.6.1 -# zope.testrunner==4.4.9 -six = 1.9.0 - -# Required by: -# unittest2==1.0.1 -traceback2 = 1.4.0 - -# Required by: -# collective.recipe.omelette==0.16 -zc.recipe.egg = 2.0.1 - -# Required by: -# zope.testrunner==4.4.9 -zope.exceptions = 4.0.7 - -# Required by: -# zope.testrunner==4.4.9 -zope.interface = 4.1.2 - -# Required by: -# plone.testing==4.0.13 -zope.testing = 4.2.0 - -# Required by: -# zc.recipe.testrunner==2.0.0 -zope.testrunner = 4.4.9