searx_ynh/sources/searx/engines/kickass.py

"""
 Kickass Torrent (Videos, Music, Files)

 @website     https://kickass.so
 @provide-api no (nothing found)

 @using-api   no
 @results     HTML (using search portal)
 @stable      yes (HTML can change)
 @parse       url, title, content, seed, leech, magnetlink
"""

from urlparse import urljoin
from cgi import escape
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text

# engine dependent config
categories = ['videos', 'music', 'files']
paging = True

# search-url
url = 'https://kickass.to/'
search_url = url + 'search/{search_term}/{pageno}/'

# specific xpath variables
magnet_xpath = './/a[@title="Torrent magnet link"]'
torrent_xpath = './/a[@title="Download torrent file"]'
content_xpath = './/span[@class="font11px lightgrey block"]'


# do search-request
def request(query, params):
    params['url'] = search_url.format(search_term=quote(query),
                                      pageno=params['pageno'])

    return params


# get response from search-request
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    search_res = dom.xpath('//table[@class="data"]//tr')

    # return empty array if nothing is found
    if not search_res:
        return []

    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//a[@class="cellMainLink"]')[0]
        href = urljoin(url, link.attrib['href'])
        title = extract_text(link)
        content = escape(extract_text(result.xpath(content_xpath)))
        seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
        leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
        filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0]
        filesize_multiplier = result.xpath('.//td[contains(@class, "nobr")]//span/text()')[0]
        files = result.xpath('.//td[contains(@class, "center")][2]/text()')[0]

        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0

        # convert leech to int if possible
        if leech.isdigit():
            leech = int(leech)
        else:
            leech = 0

        # convert filesize to byte if possible
        try:
            filesize = float(filesize)

            # convert filesize to byte
            if filesize_multiplier == 'TB':
                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
            elif filesize_multiplier == 'GB':
                filesize = int(filesize * 1024 * 1024 * 1024)
            elif filesize_multiplier == 'MB':
                filesize = int(filesize * 1024 * 1024)
            elif filesize_multiplier == 'KB':
                filesize = int(filesize * 1024)
        except:
            filesize = None

        # convert files to int if possible
        if files.isdigit():
            files = int(files)
        else:
            files = None

        magnetlink = result.xpath(magnet_xpath)[0].attrib['href']

        torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
        torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")

        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'files': files,
                        'magnetlink': magnetlink,
                        'torrentfile': torrentfileurl,
                        'template': 'torrent.html'})

    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
update version 0.8 2015-09-08 23:05:37 +02:00			`"""`
			`Kickass Torrent (Videos, Music, Files)`

			`@website https://kickass.so`
			`@provide-api no (nothing found)`

			`@using-api no`
			`@results HTML (using search portal)`
			`@stable yes (HTML can change)`
			`@parse url, title, content, seed, leech, magnetlink`
			`"""`
update version 0.6 2015-01-13 16:40:30 +01:00
			`from urlparse import urljoin`
			`from cgi import escape`
			`from urllib import quote`
			`from lxml import html`
			`from operator import itemgetter`
update searx 0.7 2015-02-09 13:30:16 +01:00			`from searx.engines.xpath import extract_text`
update version 0.6 2015-01-13 16:40:30 +01:00
			`# engine dependent config`
			`categories = ['videos', 'music', 'files']`
			`paging = True`

			`# search-url`
update sources 2015-02-17 12:45:54 +01:00			`url = 'https://kickass.to/'`
update version 0.6 2015-01-13 16:40:30 +01:00			`search_url = url + 'search/{search_term}/{pageno}/'`

			`# specific xpath variables`
			`magnet_xpath = './/a[@title="Torrent magnet link"]'`
update version 2015-01-13 17:13:08 +01:00			`torrent_xpath = './/a[@title="Download torrent file"]'`
			`content_xpath = './/span[@class="font11px lightgrey block"]'`
update version 0.6 2015-01-13 16:40:30 +01:00

			`# do search-request`
			`def request(query, params):`
			`params['url'] = search_url.format(search_term=quote(query),`
			`pageno=params['pageno'])`

			`return params`


			`# get response from search-request`
			`def response(resp):`
			`results = []`

			`dom = html.fromstring(resp.text)`

			`search_res = dom.xpath('//table[@class="data"]//tr')`

			`# return empty array if nothing is found`
			`if not search_res:`
			`return []`

			`# parse results`
			`for result in search_res[1:]:`
			`link = result.xpath('.//a[@class="cellMainLink"]')[0]`
			`href = urljoin(url, link.attrib['href'])`
update searx 0.7 2015-02-09 13:30:16 +01:00			`title = extract_text(link)`
			`content = escape(extract_text(result.xpath(content_xpath)))`
update version 0.6 2015-01-13 16:40:30 +01:00			`seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]`
			`leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]`
update version 2015-01-13 17:13:08 +01:00			`filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0]`
			`filesize_multiplier = result.xpath('.//td[contains(@class, "nobr")]//span/text()')[0]`
			`files = result.xpath('.//td[contains(@class, "center")][2]/text()')[0]`
update version 0.6 2015-01-13 16:40:30 +01:00
			`# convert seed to int if possible`
			`if seed.isdigit():`
			`seed = int(seed)`
			`else:`
			`seed = 0`

			`# convert leech to int if possible`
			`if leech.isdigit():`
			`leech = int(leech)`
			`else:`
			`leech = 0`

update version 2015-01-13 17:13:08 +01:00			`# convert filesize to byte if possible`
			`try:`
			`filesize = float(filesize)`

			`# convert filesize to byte`
			`if filesize_multiplier == 'TB':`
			`filesize = int(filesize * 1024 * 1024 * 1024 * 1024)`
			`elif filesize_multiplier == 'GB':`
			`filesize = int(filesize * 1024 * 1024 * 1024)`
			`elif filesize_multiplier == 'MB':`
			`filesize = int(filesize * 1024 * 1024)`
update searx 0.7 2015-02-09 13:30:16 +01:00			`elif filesize_multiplier == 'KB':`
update version 2015-01-13 17:13:08 +01:00			`filesize = int(filesize * 1024)`
			`except:`
			`filesize = None`

			`# convert files to int if possible`
			`if files.isdigit():`
			`files = int(files)`
			`else:`
			`files = None`

update version 0.6 2015-01-13 16:40:30 +01:00			`magnetlink = result.xpath(magnet_xpath)[0].attrib['href']`

update version 2015-01-13 17:13:08 +01:00			`torrentfile = result.xpath(torrent_xpath)[0].attrib['href']`
			`torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")`

update version 0.6 2015-01-13 16:40:30 +01:00			`# append result`
			`results.append({'url': href,`
			`'title': title,`
			`'content': content,`
			`'seed': seed,`
			`'leech': leech,`
update version 2015-01-13 17:13:08 +01:00			`'filesize': filesize,`
			`'files': files,`
update version 0.6 2015-01-13 16:40:30 +01:00			`'magnetlink': magnetlink,`
update version 2015-01-13 17:13:08 +01:00			`'torrentfile': torrentfileurl,`
update version 0.6 2015-01-13 16:40:30 +01:00			`'template': 'torrent.html'})`

			`# return results sorted by seeder`
			`return sorted(results, key=itemgetter('seed'), reverse=True)`