searx_ynh/sources/searx/engines/generalfile.py

"""
 General Files (Files)

 @website     http://www.general-files.org
 @provide-api no (nothing found)

 @using-api   no (because nothing found)
 @results     HTML (using search portal)
 @stable      no (HTML can change)
 @parse       url, title, content

 @todo        detect torrents?
"""

from lxml import html

# engine dependent config
categories = ['files']
paging = True

# search-url
base_url = 'http://www.general-file.com'
search_url = base_url + '/files-{letter}/{query}/{pageno}'

# specific xpath variables
result_xpath = '//table[@class="block-file"]'
title_xpath = './/h2/a//text()'
url_xpath = './/h2/a/@href'
content_xpath = './/p//text()'


# do search-request
def request(query, params):

    params['url'] = search_url.format(query=query,
                                      letter=query[0],
                                      pageno=params['pageno'])

    return params


# get response from search-request
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for result in dom.xpath(result_xpath):
        url = result.xpath(url_xpath)[0]

        # skip fast download links
        if not url.startswith('/'):
            continue

        # append result
        results.append({'url': base_url + url,
                        'title': ''.join(result.xpath(title_xpath)),
                        'content': ''.join(result.xpath(content_xpath))})

    # return results
    return results
update version 0.8 2015-09-08 23:05:37 +02:00			`"""`
			`General Files (Files)`

			`@website http://www.general-files.org`
			`@provide-api no (nothing found)`

			`@using-api no (because nothing found)`
			`@results HTML (using search portal)`
			`@stable no (HTML can change)`
			`@parse url, title, content`

			`@todo detect torrents?`
			`"""`
update searx 2014-12-01 12:26:38 +01:00
			`from lxml import html`

			`# engine dependent config`
			`categories = ['files']`
			`paging = True`

			`# search-url`
			`base_url = 'http://www.general-file.com'`
			`search_url = base_url + '/files-{letter}/{query}/{pageno}'`

			`# specific xpath variables`
			`result_xpath = '//table[@class="block-file"]'`
			`title_xpath = './/h2/a//text()'`
			`url_xpath = './/h2/a/@href'`
			`content_xpath = './/p//text()'`


			`# do search-request`
			`def request(query, params):`

			`params['url'] = search_url.format(query=query,`
			`letter=query[0],`
			`pageno=params['pageno'])`

			`return params`


			`# get response from search-request`
			`def response(resp):`
			`results = []`

			`dom = html.fromstring(resp.text)`

			`# parse results`
			`for result in dom.xpath(result_xpath):`
			`url = result.xpath(url_xpath)[0]`

			`# skip fast download links`
			`if not url.startswith('/'):`
			`continue`

			`# append result`
			`results.append({'url': base_url + url,`
			`'title': ''.join(result.xpath(title_xpath)),`
			`'content': ''.join(result.xpath(content_xpath))})`

			`# return results`
			`return results`