Add Nyaa.se search engine
This commit is contained in:
		
							parent
							
								
									d748b8419a
								
							
						
					
					
						commit
						e5677ae6b6
					
				
							
								
								
									
										115
									
								
								searx/engines/nyaa.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								searx/engines/nyaa.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,115 @@ | ||||
| """ | ||||
|  Nyaa.se (Anime Bittorrent tracker) | ||||
| 
 | ||||
|  @website      http://www.nyaa.se/ | ||||
|  @provide-api  no | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title, content, seed, leech, torrentfile | ||||
| """ | ||||
| 
 | ||||
| from cgi import escape | ||||
| from urllib import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'images', 'videos', 'music'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'http://www.nyaa.se/' | ||||
| search_url = base_url + '?page=search&{query}&offset={offset}' | ||||
| 
 | ||||
| # xpath queries | ||||
| xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' | ||||
| xpath_category = './/td[@class="tlisticon"]/a' | ||||
| xpath_title = './/td[@class="tlistname"]/a' | ||||
| xpath_torrent_file = './/td[@class="tlistdownload"]/a' | ||||
| xpath_filesize = './/td[@class="tlistsize"]/text()' | ||||
| xpath_seeds = './/td[@class="tlistsn"]/text()' | ||||
| xpath_leeches = './/td[@class="tlistln"]/text()' | ||||
| xpath_downloads = './/td[@class="tlistdn"]/text()' | ||||
| 
 | ||||
| 
 | ||||
| # convert a variable to integer or return 0 if it's not a number | ||||
| def int_or_zero(num): | ||||
|     if isinstance(num, list): | ||||
|         if len(num) < 1: | ||||
|             return 0 | ||||
|         num = num[0] | ||||
|     if num.isdigit(): | ||||
|         return int(num) | ||||
|     return 0 | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     query = urlencode({'term': query}) | ||||
|     params['url'] = search_url.format(query=query, offset=params['pageno']) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     for result in dom.xpath(xpath_results): | ||||
|         # category in which our torrent belongs | ||||
|         category = result.xpath(xpath_category)[0].attrib.get('title') | ||||
| 
 | ||||
|         # torrent title | ||||
|         page_a = result.xpath(xpath_title)[0] | ||||
|         title = escape(extract_text(page_a)) | ||||
| 
 | ||||
|         # link to the page | ||||
|         href = page_a.attrib.get('href') | ||||
| 
 | ||||
|         # link to the torrent file | ||||
|         torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') | ||||
| 
 | ||||
|         # torrent size | ||||
|         try: | ||||
|             file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') | ||||
| 
 | ||||
|             # convert torrent size to bytes. | ||||
|             # if there is no correct index in this dictionary, | ||||
|             # the try block fails as it should | ||||
|             multiplier = { | ||||
|                 'KIB': 1024, | ||||
|                 'MIB': 1024 ** 2, | ||||
|                 'GIB': 1024 ** 3, | ||||
|                 'TIB': 1024 ** 4 | ||||
|             }[suffix.upper()] | ||||
| 
 | ||||
|             file_size = int(float(file_size) * multiplier) | ||||
|         except Exception as e: | ||||
|             file_size = None | ||||
| 
 | ||||
|         # seed count | ||||
|         seed = int_or_zero(result.xpath(xpath_seeds)) | ||||
| 
 | ||||
|         # leech count | ||||
|         leech = int_or_zero(result.xpath(xpath_leeches)) | ||||
| 
 | ||||
|         # torrent downloads count | ||||
|         downloads = int_or_zero(result.xpath(xpath_downloads)) | ||||
| 
 | ||||
|         # content string contains all information not included into template | ||||
|         content = 'Category: "{category}". Downloaded {downloads} times.' | ||||
|         content = content.format(category=category, downloads=downloads) | ||||
|         content = escape(content) | ||||
| 
 | ||||
|         results.append({'url': href, | ||||
|                         'title': title, | ||||
|                         'content': content, | ||||
|                         'seed': seed, | ||||
|                         'leech': leech, | ||||
|                         'filesize': file_size, | ||||
|                         'torrentfile': torrent_link, | ||||
|                         'template': 'torrent.html'}) | ||||
| 
 | ||||
|     return results | ||||
| @ -175,6 +175,10 @@ engines: | ||||
|     engine : mixcloud | ||||
|     shortcut : mc | ||||
| 
 | ||||
|   - name : nyaa | ||||
|     engine : nyaa | ||||
|     shortcut : nt | ||||
| 
 | ||||
|   - name : openstreetmap | ||||
|     engine : openstreetmap | ||||
|     shortcut : osm | ||||
|  | ||||
							
								
								
									
										66
									
								
								tests/unit/engines/test_nyaa.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								tests/unit/engines/test_nyaa.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | ||||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import nyaa | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| class TestNyaaEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dic = defaultdict(dict) | ||||
|         dic['pageno'] = 1 | ||||
|         params = nyaa.request(query, dic) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('nyaa.se' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         resp = mock.Mock(text='<html></html>') | ||||
|         self.assertEqual(nyaa.response(resp), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <table class="tlist"> | ||||
|           <tbody> | ||||
|             <tr class="trusted tlistrow"> | ||||
|               <td class="tlisticon"> | ||||
|                 <a href="//www.nyaa.se" title="English-translated Anime"> | ||||
|                    <img src="//files.nyaa.se" alt="English-translated Anime"> | ||||
|                 </a> | ||||
|               </td> | ||||
|               <td class="tlistname"> | ||||
|                 <a href="//www.nyaa.se/?page3"> | ||||
|                   Sample torrent title | ||||
|                 </a> | ||||
|               </td> | ||||
|               <td class="tlistdownload"> | ||||
|                 <a href="//www.nyaa.se/?page_dl" title="Download"> | ||||
|                   <img src="//files.nyaa.se/www-dl.png" alt="DL"> | ||||
|                 </a> | ||||
|               </td> | ||||
|               <td class="tlistsize">10 MiB</td> | ||||
|               <td class="tlistsn">1</td> | ||||
|               <td class="tlistln">3</td> | ||||
|               <td class="tlistdn">666</td> | ||||
|               <td class="tlistmn">0</td> | ||||
|             </tr> | ||||
|           </tbody> | ||||
|         </table> | ||||
|         """ | ||||
| 
 | ||||
|         resp = mock.Mock(text=html) | ||||
|         results = nyaa.response(resp) | ||||
| 
 | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
| 
 | ||||
|         r = results[0] | ||||
|         self.assertTrue(r['url'].find('www.nyaa.se/?page3') >= 0) | ||||
|         self.assertTrue(r['torrentfile'].find('www.nyaa.se/?page_dl') >= 0) | ||||
|         self.assertTrue(r['content'].find('English-translated Anime') >= 0) | ||||
|         self.assertTrue(r['content'].find('Downloaded 666 times.') >= 0) | ||||
| 
 | ||||
|         self.assertEqual(r['title'], 'Sample torrent title') | ||||
|         self.assertEqual(r['seed'], 1) | ||||
|         self.assertEqual(r['leech'], 3) | ||||
|         self.assertEqual(r['filesize'], 10 * 1024 * 1024) | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Kirill Isakov
						Kirill Isakov