Ticket #3438: tracker_icons.py

File tracker_icons.py, 19.2 KB (added by Bader Bohamad, 4 years ago)

file related issue

Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2010 John Garland <johnnybg+deluge@gmail.com>
4#
5# This file is part of Deluge and is licensed under GNU General Public License 3.0, or later, with
6# the additional special exception to link portions of this program with the OpenSSL library.
7# See LICENSE for more details.
8#
9
10from __future__ import unicode_literals
11
12import logging
13import os
14from tempfile import mkstemp
15
16from twisted.internet import defer, threads
17from twisted.web.error import PageRedirect
18from twisted.web.resource import ForbiddenResource, NoResource
19
20from deluge.component import Component
21from deluge.configmanager import get_config_dir
22from deluge.decorators import proxy
23from deluge.httpdownloader import download_file
24
25try:
26 from html.parser import HTMLParser
27 from urllib.parse import urljoin, urlparse
28except ImportError:
29 # PY2 fallback
30 from HTMLParser import HTMLParser
31 from urlparse import urljoin, urlparse # pylint: disable=ungrouped-imports
32
33try:
34 from PIL import Image
35except ImportError:
36 Image = None
37
38log = logging.getLogger(__name__)
39
40
41class TrackerIcon(object):
42 """
43 Represents a tracker's icon
44 """
45
46 def __init__(self, filename):
47 """
48 Initialises a new TrackerIcon object
49
50 :param filename: the filename of the icon
51 :type filename: string
52 """
53 self.filename = os.path.abspath(filename)
54 self.mimetype = extension_to_mimetype(self.filename.rpartition('.')[2])
55 self.data = None
56 self.icon_cache = None
57
58 def __eq__(self, other):
59 """
60 Compares this TrackerIcon with another to determine if they're equal
61
62 :param other: the TrackerIcon to compare to
63 :type other: TrackerIcon
64 :returns: whether or not they're equal
65 :rtype: boolean
66 """
67 return (
68 os.path.samefile(self.filename, other.filename)
69 or self.get_mimetype() == other.get_mimetype()
70 and self.get_data() == other.get_data()
71 )
72
73 def get_mimetype(self):
74 """
75 Returns the mimetype of this TrackerIcon's image
76
77 :returns: the mimetype of the image
78 :rtype: string
79 """
80 return self.mimetype
81
82 def get_data(self):
83 """
84 Returns the TrackerIcon's image data as a string
85
86 :returns: the image data
87 :rtype: string
88 """
89 if not self.data:
90 with open(self.filename, 'rb') as _file:
91 self.data = _file.read()
92 return self.data
93
94 def get_filename(self, full=True):
95 """
96 Returns the TrackerIcon image's filename
97
98 :param full: an (optional) arg to indicate whether or not to
99 return the full path
100 :type full: boolean
101 :returns: the path of the TrackerIcon's image
102 :rtype: string
103 """
104 return self.filename if full else os.path.basename(self.filename)
105
106 def set_cached_icon(self, data):
107 """
108 Set the cached icon data.
109
110 """
111 self.icon_cache = data
112
113 def get_cached_icon(self):
114 """
115 Returns the cached icon data.
116
117 """
118 return self.icon_cache
119
120
121class TrackerIcons(Component):
122 """
123 A TrackerIcon factory class
124 """
125
126 def __init__(self, icon_dir=None, no_icon=None):
127 """
128 Initialises a new TrackerIcons object
129
130 :param icon_dir: the (optional) directory of where to store the icons
131 :type icon_dir: string
132 :param no_icon: the (optional) path name of the icon to show when no icon
133 can be fetched
134 :type no_icon: string
135 """
136 Component.__init__(self, 'TrackerIcons')
137 if not icon_dir:
138 icon_dir = get_config_dir('icons')
139 self.dir = icon_dir
140 if not os.path.isdir(self.dir):
141 os.makedirs(self.dir)
142
143 self.icons = {}
144 for icon in os.listdir(self.dir):
145 if icon != no_icon:
146 host = icon_name_to_host(icon)
147 try:
148 self.icons[host] = TrackerIcon(os.path.join(self.dir, icon))
149 except KeyError:
150 log.warning('invalid icon %s', icon)
151 if no_icon:
152 self.icons[None] = TrackerIcon(no_icon)
153 else:
154 self.icons[None] = None
155 self.icons[''] = self.icons[None]
156
157 self.pending = {}
158 self.redirects = {}
159
160 def has(self, host):
161 """
162 Returns True or False if the tracker icon for the given host exists or not.
163
164 :param host: the host for the TrackerIcon
165 :type host: string
166 :returns: True or False
167 :rtype: bool
168 """
169 return host.lower() in self.icons
170
171 def get(self, host):
172 """
173 Returns a TrackerIcon for the given tracker's host
174 from the icon cache.
175
176 :param host: the host for the TrackerIcon
177 :type host: string
178 :returns: the TrackerIcon for the host
179 :rtype: TrackerIcon
180 """
181 host = host.lower()
182 if host in self.icons:
183 return self.icons[host]
184 else:
185 return None
186
187 def fetch(self, host):
188 """
189 Fetches (downloads) the icon for the given host.
190 When the icon is downloaded a callback is fired
191 on the the queue of callers to this function.
192
193 :param host: the host to obtain the TrackerIcon for
194 :type host: string
195 :returns: a Deferred which fires with the TrackerIcon for the given host
196 :rtype: Deferred
197 """
198 host = host.lower()
199 if host in self.icons:
200 # We already have it, so let's return it
201 d = defer.succeed(self.icons[host])
202 elif host in self.pending:
203 # We're in the middle of getting it
204 # Add ourselves to the waiting list
205 d = defer.Deferred()
206 self.pending[host].append(d)
207 else:
208 # We need to fetch it
209 self.pending[host] = []
210 # Start callback chain
211 d = self.download_page(host)
212 d.addCallbacks(
213 self.on_download_page_complete,
214 self.on_download_page_fail,
215 errbackArgs=(host,),
216 )
217 d.addCallback(self.parse_html_page)
218 d.addCallbacks(
219 self.on_parse_complete, self.on_parse_fail, callbackArgs=(host,)
220 )
221 d.addCallback(self.download_icon, host)
222 d.addCallbacks(
223 self.on_download_icon_complete,
224 self.on_download_icon_fail,
225 callbackArgs=(host,),
226 errbackArgs=(host,),
227 )
228 d.addCallback(self.resize_icon)
229 d.addCallback(self.store_icon, host)
230 return d
231
232 def download_page(self, host, url=None):
233 """
234 Downloads a tracker host's page
235 If no url is provided, it bases the url on the host
236
237 :param host: the tracker host
238 :type host: string
239 :param url: the (optional) url of the host
240 :type url: string
241 :returns: the filename of the tracker host's page
242 :rtype: Deferred
243 """
244 if not url:
245 url = self.host_to_url(host)
246 log.debug('Downloading %s %s', host, url)
247 tmp_fd, tmp_file = mkstemp(prefix='deluge_ticon.')
248 os.close(tmp_fd)
249 return download_file(url, tmp_file, force_filename=True, handle_redirects=False)
250
251 def on_download_page_complete(self, page):
252 """
253 Runs any download clean up functions
254
255 :param page: the page that finished downloading
256 :type page: string
257 :returns: the page that finished downloading
258 :rtype: string
259 """
260 log.debug('Finished downloading %s', page)
261 return page
262
263 def on_download_page_fail(self, f, host):
264 """
265 Recovers from download error
266
267 :param f: the failure that occurred
268 :type f: Failure
269 :param host: the name of the host whose page failed to download
270 :type host: string
271 :returns: a Deferred if recovery was possible
272 else the original failure
273 :rtype: Deferred or Failure
274 """
275 error_msg = f.getErrorMessage()
276 log.debug('Error downloading page: %s', error_msg)
277 d = f
278 if f.check(PageRedirect):
279 # Handle redirect errors
280 location = urljoin(self.host_to_url(host), error_msg.split(' to ')[1])
281 self.redirects[host] = url_to_host(location)
282 d = self.download_page(host, url=location)
283 d.addCallbacks(
284 self.on_download_page_complete,
285 self.on_download_page_fail,
286 errbackArgs=(host,),
287 )
288
289 return d
290
291 @proxy(threads.deferToThread)
292 def parse_html_page(self, page):
293 """
294 Parses the html page for favicons
295
296 :param page: the page to parse
297 :type page: string
298 :returns: a Deferred which callbacks a list of available favicons (url, type)
299 :rtype: Deferred
300 """
301 with open(page, 'r') as _file:
302 parser = FaviconParser()
303 for line in _file:
304 parser.feed(line)
305 if parser.left_head:
306 break
307 parser.close()
308 try:
309 os.remove(page)
310 except OSError as ex:
311 log.warning('Could not remove temp file: %s', ex)
312
313 return parser.get_icons()
314
315 def on_parse_complete(self, icons, host):
316 """
317 Runs any parse clean up functions
318
319 :param icons: the icons that were extracted from the page
320 :type icons: list
321 :param host: the host the icons are for
322 :type host: string
323 :returns: the icons that were extracted from the page
324 :rtype: list
325 """
326 log.debug('Parse Complete, got icons for %s: %s', host, icons)
327 url = self.host_to_url(host)
328 icons = [(urljoin(url, icon), mimetype) for icon, mimetype in icons]
329 log.debug('Icon urls from %s: %s', host, icons)
330 return icons
331
332 def on_parse_fail(self, f):
333 """
334 Recovers from a parse error
335
336 :param f: the failure that occurred
337 :type f: Failure
338 :returns: a Deferred if recovery was possible
339 else the original failure
340 :rtype: Deferred or Failure
341 """
342 log.debug('Error parsing page: %s', f.getErrorMessage())
343 return f
344
345 def download_icon(self, icons, host):
346 """
347 Downloads the first available icon from icons
348
349 :param icons: a list of icons
350 :type icons: list
351 :param host: the tracker's host name
352 :type host: string
353 :returns: a Deferred which fires with the downloaded icon's filename
354 :rtype: Deferred
355 """
356 if len(icons) == 0:
357 raise NoIconsError('empty icons list')
358 (url, mimetype) = icons.pop(0)
359 d = download_file(
360 url,
361 os.path.join(self.dir, host_to_icon_name(host, mimetype)),
362 force_filename=True,
363 )
364 d.addCallback(self.check_icon_is_valid)
365 if icons:
366 d.addErrback(self.on_download_icon_fail, host, icons)
367 return d
368
369 @proxy(threads.deferToThread)
370 def check_icon_is_valid(self, icon_name):
371 """
372 Performs a sanity check on icon_name
373
374 :param icon_name: the name of the icon to check
375 :type icon_name: string
376 :returns: the name of the validated icon
377 :rtype: string
378 :raises: InvalidIconError
379 """
380
381 if Image:
382 try:
383 with Image.open(icon_name):
384 pass
385 except IOError as ex:
386 raise InvalidIconError(ex)
387 else:
388 if not os.path.getsize(icon_name):
389 raise InvalidIconError('empty icon')
390
391 return icon_name
392
393 def on_download_icon_complete(self, icon_name, host):
394 """
395 Runs any download cleanup functions
396
397 :param icon_name: the filename of the icon that finished downloading
398 :type icon_name: string
399 :param host: the host the icon completed to download for
400 :type host: string
401 :returns: the icon that finished downloading
402 :rtype: TrackerIcon
403 """
404 log.debug('Successfully downloaded from %s: %s', host, icon_name)
405 return TrackerIcon(icon_name)
406
407 def on_download_icon_fail(self, f, host, icons=None):
408 """
409 Recovers from a download error
410
411 :param f: the failure that occurred
412 :type f: Failure
413 :param host: the host the icon failed to download for
414 :type host: string
415 :param icons: the (optional) list of remaining icons
416 :type icons: list
417 :returns: a Deferred if recovery was possible
418 else the original failure
419 :rtype: Deferred or Failure
420 """
421 if not icons:
422 icons = []
423 error_msg = f.getErrorMessage()
424 log.debug('Error downloading icon from %s: %s', host, error_msg)
425 d = f
426 if f.check(PageRedirect):
427 # Handle redirect errors
428 location = urljoin(self.host_to_url(host), error_msg.split(' to ')[1])
429 d = self.download_icon(
430 [(location, extension_to_mimetype(location.rpartition('.')[2]))]
431 + icons,
432 host,
433 )
434 if not icons:
435 d.addCallbacks(
436 self.on_download_icon_complete,
437 self.on_download_icon_fail,
438 callbackArgs=(host,),
439 errbackArgs=(host,),
440 )
441 elif f.check(NoResource, ForbiddenResource) and icons:
442 d = self.download_icon(icons, host)
443 elif f.check(NoIconsError):
444 # No icons, try favicon.ico as an act of desperation
445 d = self.download_icon(
446 [
447 (
448 urljoin(self.host_to_url(host), 'favicon.ico'),
449 extension_to_mimetype('ico'),
450 )
451 ],
452 host,
453 )
454 d.addCallbacks(
455 self.on_download_icon_complete,
456 self.on_download_icon_fail,
457 callbackArgs=(host,),
458 errbackArgs=(host,),
459 )
460 else:
461 # No icons :(
462 # Return the None Icon
463 d = self.icons[None]
464
465 return d
466
467 @proxy(threads.deferToThread)
468 def resize_icon(self, icon):
469 """
470 Resizes the given icon to be 16x16 pixels
471
472 :param icon: the icon to resize
473 :type icon: TrackerIcon
474 :returns: the resized icon
475 :rtype: TrackerIcon
476 """
477 # Requires Pillow(PIL) to resize.
478 if icon and Image:
479 filename = icon.get_filename()
480 with Image.open(filename) as img:
481 if img.size > (16, 16):
482 new_filename = filename.rpartition('.')[0] + '.png'
483 img = img.resize((16, 16), Image.ANTIALIAS)
484 img.save(new_filename)
485 if new_filename != filename:
486 os.remove(filename)
487 icon = TrackerIcon(new_filename)
488 return icon
489
490 def store_icon(self, icon, host):
491 """
492 Stores the icon for the given host
493 Callbacks any pending deferreds waiting on this icon
494
495 :param icon: the icon to store
496 :type icon: TrackerIcon or None
497 :param host: the host to store it for
498 :type host: string
499 :returns: the stored icon
500 :rtype: TrackerIcon or None
501 """
502 self.icons[host] = icon
503 for d in self.pending[host]:
504 d.callback(icon)
505 del self.pending[host]
506 return icon
507
508 def host_to_url(self, host):
509 """
510 Given a host, returns the URL to fetch
511
512 :param host: the tracker host
513 :type host: string
514 :returns: the url of the tracker
515 :rtype: string
516 """
517 if host in self.redirects:
518 host = self.redirects[host]
519 return 'http://%s/' % host
520
521
522# ------- HELPER CLASSES ------
523
524
525class FaviconParser(HTMLParser):
526 """
527 A HTMLParser which extracts favicons from a HTML page
528 """
529
530 def __init__(self):
531 self.icons = []
532 self.left_head = False
533 HTMLParser.__init__(self)
534
535 def handle_starttag(self, tag, attrs):
536 if (
537 tag == 'link'
538 and ('rel', 'icon') in attrs
539 or ('rel', 'shortcut icon') in attrs
540 ):
541 href = None
542 icon_type = None
543 for attr, value in attrs:
544 if attr == 'href':
545 href = value
546 elif attr == 'type':
547 icon_type = value
548 if href:
549 try:
550 mimetype = extension_to_mimetype(href.rpartition('.')[2])
551 except KeyError:
552 pass
553 else:
554 icon_type = mimetype
555 if icon_type:
556 self.icons.append((href, icon_type))
557
558 def handle_endtag(self, tag):
559 if tag == 'head':
560 self.left_head = True
561
562 def get_icons(self):
563 """
564 Returns a list of favicons extracted from the HTML page
565
566 :returns: a list of favicons
567 :rtype: list
568 """
569 return self.icons
570
571
572# ------ HELPER FUNCTIONS ------
573
574
575def url_to_host(url):
576 """
577 Given a URL, returns the host it belongs to
578
579 :param url: the URL in question
580 :type url: string
581 :returns: the host of the given URL
582 :rtype: string
583 """
584 return urlparse(url).hostname
585
586
587def host_to_icon_name(host, mimetype):
588 """
589 Given a host, returns the appropriate icon name
590
591 :param host: the host in question
592 :type host: string
593 :param mimetype: the mimetype of the icon
594 :type mimetype: string
595 :returns: the icon's filename
596 :rtype: string
597
598 """
599 return host + '.' + mimetype_to_extension(mimetype)
600
601
602def icon_name_to_host(icon):
603 """
604 Given a host's icon name, returns the host name
605
606 :param icon: the icon name
607 :type icon: string
608 :returns: the host name
609 :rtype: string
610 """
611 return icon.rpartition('.')[0]
612
613
614MIME_MAP = {
615 'image/gif': 'gif',
616 'image/jpeg': 'jpg',
617 'image/png': 'png',
618 'image/vnd.microsoft.icon': 'ico',
619 'image/x-icon': 'ico',
620 'gif': 'image/gif',
621 'jpg': 'image/jpeg',
622 'jpeg': 'image/jpeg',
623 'png': 'image/png',
624 'ico': 'image/vnd.microsoft.icon',
625}
626
627
628def mimetype_to_extension(mimetype):
629 """
630 Given a mimetype, returns the appropriate filename extension
631
632 :param mimetype: the mimetype
633 :type mimetype: string
634 :returns: the filename extension for the given mimetype
635 :rtype: string
636 :raises KeyError: if given an invalid mimetype
637 """
638 return MIME_MAP[mimetype.lower()]
639
640
641def extension_to_mimetype(extension):
642 """
643 Given a filename extension, returns the appropriate mimetype
644
645 :param extension: the filename extension
646 :type extension: string
647 :returns: the mimetype for the given filename extension
648 :rtype: string
649 :raises KeyError: if given an invalid filename extension
650 """
651 return MIME_MAP[extension.lower()]
652
653
654# ------ EXCEPTIONS ------
655
656
657class NoIconsError(Exception):
658 pass
659
660
661class InvalidIconError(Exception):
662 pass