mirror of
https://github.com/YunoHost-Apps/searx_ynh.git
synced 2024-09-03 20:16:30 +02:00
add support noroot url
This commit is contained in:
parent
5a0f8811d5
commit
fc207de1ab
10 changed files with 278 additions and 36 deletions
156
conf/settings.yml-noroot
Normal file
156
conf/settings.yml-noroot
Normal file
|
@ -0,0 +1,156 @@
|
||||||
|
server:
|
||||||
|
port : 8888
|
||||||
|
secret_key : "ultrasecretkey" # change this!
|
||||||
|
debug : True
|
||||||
|
request_timeout : 2.0 # seconds
|
||||||
|
base_url : True
|
||||||
|
|
||||||
|
engines:
|
||||||
|
- name : wikipedia
|
||||||
|
engine : wikipedia
|
||||||
|
number_of_results : 1
|
||||||
|
paging : False
|
||||||
|
shortcut : wp
|
||||||
|
|
||||||
|
- name : bing
|
||||||
|
engine : bing
|
||||||
|
locale : en-US
|
||||||
|
shortcut : bi
|
||||||
|
|
||||||
|
- name : bing news
|
||||||
|
engine : bing_news
|
||||||
|
locale : en-US
|
||||||
|
shortcut : bin
|
||||||
|
|
||||||
|
- name : currency
|
||||||
|
engine : currency_convert
|
||||||
|
categories : general
|
||||||
|
shortcut : cc
|
||||||
|
|
||||||
|
- name : deviantart
|
||||||
|
engine : deviantart
|
||||||
|
categories : images
|
||||||
|
shortcut : da
|
||||||
|
timeout: 3.0
|
||||||
|
|
||||||
|
- name : ddg definitions
|
||||||
|
engine : duckduckgo_definitions
|
||||||
|
shortcut : ddd
|
||||||
|
|
||||||
|
- name : duckduckgo
|
||||||
|
engine : duckduckgo
|
||||||
|
locale : en-us
|
||||||
|
shortcut : ddg
|
||||||
|
|
||||||
|
- name : filecrop
|
||||||
|
engine : filecrop
|
||||||
|
categories : files
|
||||||
|
shortcut : fc
|
||||||
|
|
||||||
|
- name : flickr
|
||||||
|
engine : flickr
|
||||||
|
categories : images
|
||||||
|
shortcut : fl
|
||||||
|
timeout: 3.0
|
||||||
|
|
||||||
|
- name : github
|
||||||
|
engine : github
|
||||||
|
categories : it
|
||||||
|
shortcut : gh
|
||||||
|
|
||||||
|
- name : google
|
||||||
|
engine : google
|
||||||
|
shortcut : go
|
||||||
|
|
||||||
|
- name : google images
|
||||||
|
engine : google_images
|
||||||
|
shortcut : goi
|
||||||
|
|
||||||
|
- name : google news
|
||||||
|
engine : google_news
|
||||||
|
shortcut : gon
|
||||||
|
|
||||||
|
- name : piratebay
|
||||||
|
engine : piratebay
|
||||||
|
categories : videos, music, files
|
||||||
|
shortcut : tpb
|
||||||
|
|
||||||
|
- name : soundcloud
|
||||||
|
engine : soundcloud
|
||||||
|
categories : music
|
||||||
|
shortcut : sc
|
||||||
|
|
||||||
|
- name : stackoverflow
|
||||||
|
engine : stackoverflow
|
||||||
|
categories : it
|
||||||
|
shortcut : st
|
||||||
|
|
||||||
|
- name : startpage
|
||||||
|
engine : startpage
|
||||||
|
base_url : 'https://startpage.com/'
|
||||||
|
search_url : 'https://startpage.com/do/search'
|
||||||
|
shortcut : sp
|
||||||
|
|
||||||
|
# +30% page load time
|
||||||
|
# - name : ixquick
|
||||||
|
# engine : startpage
|
||||||
|
# base_url : 'https://www.ixquick.com/'
|
||||||
|
# search_url : 'https://www.ixquick.com/do/search'
|
||||||
|
|
||||||
|
- name : twitter
|
||||||
|
engine : twitter
|
||||||
|
categories : social media
|
||||||
|
shortcut : tw
|
||||||
|
|
||||||
|
# maybe in a fun category
|
||||||
|
# - name : uncyclopedia
|
||||||
|
# engine : mediawiki
|
||||||
|
# categories : general
|
||||||
|
# shortcut : unc
|
||||||
|
# url : https://uncyclopedia.wikia.com/
|
||||||
|
|
||||||
|
# tmp suspended - too slow, too many errors
|
||||||
|
# - name : urbandictionary
|
||||||
|
# engine : xpath
|
||||||
|
# search_url : http://www.urbandictionary.com/define.php?term={query}
|
||||||
|
# url_xpath : //div[@class="word"]//a/@href
|
||||||
|
# title_xpath : //div[@class="word"]//a
|
||||||
|
# content_xpath : //div[@class="definition"]
|
||||||
|
# shortcut : ud
|
||||||
|
|
||||||
|
- name : yahoo
|
||||||
|
engine : yahoo
|
||||||
|
shortcut : yh
|
||||||
|
|
||||||
|
- name : yahoo news
|
||||||
|
engine : yahoo_news
|
||||||
|
shortcut : yhn
|
||||||
|
|
||||||
|
- name : youtube
|
||||||
|
engine : youtube
|
||||||
|
categories : videos
|
||||||
|
shortcut : yt
|
||||||
|
|
||||||
|
- name : dailymotion
|
||||||
|
engine : dailymotion
|
||||||
|
locale : en_US
|
||||||
|
categories : videos
|
||||||
|
shortcut : dm
|
||||||
|
|
||||||
|
- name : vimeo
|
||||||
|
engine : vimeo
|
||||||
|
categories : videos
|
||||||
|
results_xpath : //div[@id="browse_content"]/ol/li
|
||||||
|
url_xpath : ./a/@href
|
||||||
|
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
|
||||||
|
content_xpath : ./a/img/@src
|
||||||
|
shortcut : vm
|
||||||
|
|
||||||
|
locales:
|
||||||
|
en : English
|
||||||
|
de : Deutsch
|
||||||
|
hu : Magyar
|
||||||
|
fr : Français
|
||||||
|
es : Español
|
||||||
|
it : Italiano
|
||||||
|
nl : Nederlands
|
|
@ -26,7 +26,16 @@
|
||||||
},
|
},
|
||||||
"example": "/searx",
|
"example": "/searx",
|
||||||
"default": "/searx"
|
"default": "/searx"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "public_site",
|
||||||
|
"ask": {
|
||||||
|
"en": "Is it a public Searx ?"
|
||||||
|
},
|
||||||
|
"choices": ["Yes", "No"],
|
||||||
|
"default": "Yes"
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,12 @@ then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
#Configuration Searx
|
#Configuration Searx
|
||||||
sudo cp ../conf/settings.yml /opt/searx/searx/
|
if [ $path != "/" ];
|
||||||
|
then
|
||||||
|
sudo cp ../conf/settings.yml-noroot /opt/searx/searx/settings.yml
|
||||||
|
else
|
||||||
|
sudo cp ../conf/settings.yml /opt/searx/searx/
|
||||||
|
fi
|
||||||
sudo sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" /opt/searx/searx/settings.yml
|
sudo sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" /opt/searx/searx/settings.yml
|
||||||
|
|
||||||
# Set permissions to searx directory
|
# Set permissions to searx directory
|
||||||
|
@ -62,13 +67,15 @@ else
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Fix permission
|
# Fix permission
|
||||||
#sudo chmod 755 /etc/searx/
|
#sudo find /opt/searx/ -type d -exec chmod 2755 {} \;
|
||||||
#sudo find /opt/yunohost/searx/ -type d -exec chmod 2755 {} \;
|
#sudo find /opt/searx/ -type f -exec chmod g+r,o+r {} \;
|
||||||
#sudo find /opt/yunohost/searx/ -type f -exec chmod g+r,o+r {} \;
|
|
||||||
#sudo chmod 644 /etc/searx/*
|
|
||||||
|
|
||||||
## Reload Nginx and regenerate SSOwat conf
|
## Reload Nginx and regenerate SSOwat conf
|
||||||
sudo service nginx reload
|
sudo service nginx reload
|
||||||
sudo service uwsgi restart
|
sudo service uwsgi restart
|
||||||
#sudo yunohost app setting searx skipped_uris -v "/"
|
|
||||||
|
if [ $is_public = "Yes" ];
|
||||||
|
then
|
||||||
|
sudo yunohost app setting searx skipped_uris -v "/"
|
||||||
|
fi
|
||||||
sudo yunohost app ssowatconf
|
sudo yunohost app ssowatconf
|
||||||
|
|
26
sources/AUTHORS.rst
Normal file
26
sources/AUTHORS.rst
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
Searx was created and is maintained by Adam Tauber.
|
||||||
|
|
||||||
|
Major contributing authors:
|
||||||
|
|
||||||
|
- Adam Tauber <asciimoo@gmail.com> `@asciimoo <https://github.com/asciimoo>`_
|
||||||
|
- Matej Cotman
|
||||||
|
- Thomas Pointhuber
|
||||||
|
- Alexandre Flament
|
||||||
|
|
||||||
|
People who have submitted patches/translates, reported bugs, consulted features or
|
||||||
|
generally made searx better:
|
||||||
|
|
||||||
|
- Laszlo Hammerl
|
||||||
|
- Stefan Marsiske
|
||||||
|
- Gabor Nagy
|
||||||
|
- @pw3t
|
||||||
|
- @rhapsodhy
|
||||||
|
- András Veres-Szentkirályi
|
||||||
|
- Benjamin Sonntag
|
||||||
|
- @HLFH
|
||||||
|
- @TheRadialActive
|
||||||
|
- @Okhin
|
||||||
|
- André Koot
|
||||||
|
- Alejandro León Aznar
|
||||||
|
- rike
|
||||||
|
- dp
|
|
@ -154,16 +154,24 @@ def score_results(results):
|
||||||
# deduplication + scoring
|
# deduplication + scoring
|
||||||
for i, res in enumerate(flat_res):
|
for i, res in enumerate(flat_res):
|
||||||
res['parsed_url'] = urlparse(res['url'])
|
res['parsed_url'] = urlparse(res['url'])
|
||||||
|
res['host'] = res['parsed_url'].netloc
|
||||||
|
|
||||||
|
if res['host'].startswith('www.'):
|
||||||
|
res['host'] = res['host'].replace('www.', '', 1)
|
||||||
|
|
||||||
res['engines'] = [res['engine']]
|
res['engines'] = [res['engine']]
|
||||||
weight = 1.0
|
weight = 1.0
|
||||||
|
|
||||||
if hasattr(engines[res['engine']], 'weight'):
|
if hasattr(engines[res['engine']], 'weight'):
|
||||||
weight = float(engines[res['engine']].weight)
|
weight = float(engines[res['engine']].weight)
|
||||||
|
|
||||||
score = int((flat_len - i) / engines_len) * weight + 1
|
score = int((flat_len - i) / engines_len) * weight + 1
|
||||||
duplicated = False
|
duplicated = False
|
||||||
|
|
||||||
for new_res in results:
|
for new_res in results:
|
||||||
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
||||||
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
|
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
|
||||||
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
|
if res['host'] == new_res['host'] and\
|
||||||
p1 == p2 and\
|
p1 == p2 and\
|
||||||
res['parsed_url'].query == new_res['parsed_url'].query and\
|
res['parsed_url'].query == new_res['parsed_url'].query and\
|
||||||
res.get('template') == new_res.get('template'):
|
res.get('template') == new_res.get('template'):
|
||||||
|
|
|
@ -1,35 +1,52 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
#from json import loads
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
|
from lxml import html
|
||||||
|
from time import time
|
||||||
|
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
||||||
url = 'https://secure.flickr.com/'
|
url = 'https://secure.flickr.com/'
|
||||||
search_url = url+'search/?{query}&page={page}'
|
search_url = url+'search/?{query}&page={page}'
|
||||||
results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]' # noqa
|
results_xpath = '//div[@class="view display-item-tile"]/figure/div'
|
||||||
|
|
||||||
paging = True
|
paging = True
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'text': query}),
|
||||||
page=params['pageno'])
|
page=params['pageno'])
|
||||||
|
time_string = str(int(time())-3)
|
||||||
|
params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh'
|
||||||
|
params['cookies']['xb'] = '421409'
|
||||||
|
params['cookies']['localization'] = 'en-us'
|
||||||
|
params['cookies']['flrbp'] = time_string +\
|
||||||
|
'-3a8cdb85a427a33efda421fbda347b2eaf765a54'
|
||||||
|
params['cookies']['flrbs'] = time_string +\
|
||||||
|
'-ed142ae8765ee62c9ec92a9513665e0ee1ba6776'
|
||||||
|
params['cookies']['flrb'] = '9'
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
global base_url
|
|
||||||
results = []
|
results = []
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
for result in dom.xpath(results_xpath):
|
for result in dom.xpath(results_xpath):
|
||||||
href = urljoin(url, result.attrib.get('href'))
|
img = result.xpath('.//img')
|
||||||
img = result.xpath('.//img')[0]
|
|
||||||
title = img.attrib.get('alt', '')
|
if not img:
|
||||||
img_src = img.attrib.get('data-defer-src')
|
continue
|
||||||
|
|
||||||
|
img = img[0]
|
||||||
|
img_src = 'https:'+img.attrib.get('src')
|
||||||
|
|
||||||
if not img_src:
|
if not img_src:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
href = urljoin(url, result.xpath('.//a')[0].attrib.get('href'))
|
||||||
|
title = img.attrib.get('alt', '')
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
'title': title,
|
'title': title,
|
||||||
'img_src': img_src,
|
'img_src': img_src,
|
||||||
|
|
|
@ -2,6 +2,7 @@ from urlparse import urljoin
|
||||||
from cgi import escape
|
from cgi import escape
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from operator import itemgetter
|
||||||
|
|
||||||
categories = ['videos', 'music']
|
categories = ['videos', 'music']
|
||||||
|
|
||||||
|
@ -29,14 +30,27 @@ def response(resp):
|
||||||
results = []
|
results = []
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
search_res = dom.xpath('//table[@id="searchResult"]//tr')
|
search_res = dom.xpath('//table[@id="searchResult"]//tr')
|
||||||
|
|
||||||
if not search_res:
|
if not search_res:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
for result in search_res[1:]:
|
for result in search_res[1:]:
|
||||||
link = result.xpath('.//div[@class="detName"]//a')[0]
|
link = result.xpath('.//div[@class="detName"]//a')[0]
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
href = urljoin(url, link.attrib.get('href'))
|
||||||
title = ' '.join(link.xpath('.//text()'))
|
title = ' '.join(link.xpath('.//text()'))
|
||||||
content = escape(' '.join(result.xpath(content_xpath)))
|
content = escape(' '.join(result.xpath(content_xpath)))
|
||||||
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
|
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
|
||||||
|
|
||||||
|
if seed.isdigit():
|
||||||
|
seed = int(seed)
|
||||||
|
else:
|
||||||
|
seed = 0
|
||||||
|
|
||||||
|
if leech.isdigit():
|
||||||
|
leech = int(leech)
|
||||||
|
else:
|
||||||
|
leech = 0
|
||||||
|
|
||||||
magnetlink = result.xpath(magnet_xpath)[0]
|
magnetlink = result.xpath(magnet_xpath)[0]
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -45,4 +59,5 @@ def response(resp):
|
||||||
'leech': leech,
|
'leech': leech,
|
||||||
'magnetlink': magnetlink.attrib['href'],
|
'magnetlink': magnetlink.attrib['href'],
|
||||||
'template': 'torrent.html'})
|
'template': 'torrent.html'})
|
||||||
return results
|
|
||||||
|
return sorted(results, key=itemgetter('seed'), reverse=True)
|
||||||
|
|
|
@ -51,7 +51,7 @@ class ViewsTestCase(SearxTestCase):
|
||||||
result.data
|
result.data
|
||||||
)
|
)
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
'<p class="content">first <span class="highlight">test</span> content<br /></p>',
|
'<p class="content">first <span class="highlight">test</span> content<br /></p>', # noqa
|
||||||
result.data
|
result.data
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,9 @@ import re
|
||||||
from random import choice
|
from random import choice
|
||||||
|
|
||||||
ua_versions = ('26.0', '27.0', '28.0')
|
ua_versions = ('26.0', '27.0', '28.0')
|
||||||
ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0')
|
ua_os = ('Windows NT 6.3; WOW64',
|
||||||
|
'X11; Linux x86_64',
|
||||||
|
'X11; Linux x86')
|
||||||
ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
|
ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,7 +30,8 @@ def highlight_content(content, query):
|
||||||
query = query.decode('utf-8')
|
query = query.decode('utf-8')
|
||||||
if content.lower().find(query.lower()) > -1:
|
if content.lower().find(query.lower()) > -1:
|
||||||
query_regex = u'({0})'.format(re.escape(query))
|
query_regex = u'({0})'.format(re.escape(query))
|
||||||
content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U)
|
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
|
||||||
|
content, flags=re.I | re.U)
|
||||||
else:
|
else:
|
||||||
regex_parts = []
|
regex_parts = []
|
||||||
for chunk in query.split():
|
for chunk in query.split():
|
||||||
|
@ -37,7 +40,8 @@ def highlight_content(content, query):
|
||||||
else:
|
else:
|
||||||
regex_parts.append(u'{0}'.format(re.escape(chunk)))
|
regex_parts.append(u'{0}'.format(re.escape(chunk)))
|
||||||
query_regex = u'({0})'.format('|'.join(regex_parts))
|
query_regex = u'({0})'.format('|'.join(regex_parts))
|
||||||
content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U)
|
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
|
||||||
|
content, flags=re.I | re.U)
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue