Ticket #3438: tracker_icons.py

File tracker_icons.py, 19.2 KB (added by bader77uk, 3 years ago)

file related issue

Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2010 John Garland <johnnybg+deluge@gmail.com>
4#
5# This file is part of Deluge and is licensed under GNU General Public License 3.0, or later, with
6# the additional special exception to link portions of this program with the OpenSSL library.
7# See LICENSE for more details.
8#
9
10from __future__ import unicode_literals
11
12import logging
13import os
14from tempfile import mkstemp
15
16from twisted.internet import defer, threads
17from twisted.web.error import PageRedirect
18from twisted.web.resource import ForbiddenResource, NoResource
19
20from deluge.component import Component
21from deluge.configmanager import get_config_dir
22from deluge.decorators import proxy
23from deluge.httpdownloader import download_file
24
25try:
26    from html.parser import HTMLParser
27    from urllib.parse import urljoin, urlparse
28except ImportError:
29    # PY2 fallback
30    from HTMLParser import HTMLParser
31    from urlparse import urljoin, urlparse  # pylint: disable=ungrouped-imports
32
33try:
34    from PIL import Image
35except ImportError:
36    Image = None
37
38log = logging.getLogger(__name__)
39
40
41class TrackerIcon(object):
42    """
43    Represents a tracker's icon
44    """
45
46    def __init__(self, filename):
47        """
48        Initialises a new TrackerIcon object
49
50        :param filename: the filename of the icon
51        :type filename: string
52        """
53        self.filename = os.path.abspath(filename)
54        self.mimetype = extension_to_mimetype(self.filename.rpartition('.')[2])
55        self.data = None
56        self.icon_cache = None
57
58    def __eq__(self, other):
59        """
60        Compares this TrackerIcon with another to determine if they're equal
61
62        :param other: the TrackerIcon to compare to
63        :type other: TrackerIcon
64        :returns: whether or not they're equal
65        :rtype: boolean
66        """
67        return (
68            os.path.samefile(self.filename, other.filename)
69            or self.get_mimetype() == other.get_mimetype()
70            and self.get_data() == other.get_data()
71        )
72
73    def get_mimetype(self):
74        """
75        Returns the mimetype of this TrackerIcon's image
76
77        :returns: the mimetype of the image
78        :rtype: string
79        """
80        return self.mimetype
81
82    def get_data(self):
83        """
84        Returns the TrackerIcon's image data as a string
85
86        :returns: the image data
87        :rtype: string
88        """
89        if not self.data:
90            with open(self.filename, 'rb') as _file:
91                self.data = _file.read()
92        return self.data
93
94    def get_filename(self, full=True):
95        """
96        Returns the TrackerIcon image's filename
97
98        :param full: an (optional) arg to indicate whether or not to
99                     return the full path
100        :type full: boolean
101        :returns: the path of the TrackerIcon's image
102        :rtype: string
103        """
104        return self.filename if full else os.path.basename(self.filename)
105
106    def set_cached_icon(self, data):
107        """
108        Set the cached icon data.
109
110        """
111        self.icon_cache = data
112
113    def get_cached_icon(self):
114        """
115        Returns the cached icon data.
116
117        """
118        return self.icon_cache
119
120
121class TrackerIcons(Component):
122    """
123    A TrackerIcon factory class
124    """
125
126    def __init__(self, icon_dir=None, no_icon=None):
127        """
128        Initialises a new TrackerIcons object
129
130        :param icon_dir: the (optional) directory of where to store the icons
131        :type icon_dir: string
132        :param no_icon: the (optional) path name of the icon to show when no icon
133                       can be fetched
134        :type no_icon: string
135        """
136        Component.__init__(self, 'TrackerIcons')
137        if not icon_dir:
138            icon_dir = get_config_dir('icons')
139        self.dir = icon_dir
140        if not os.path.isdir(self.dir):
141            os.makedirs(self.dir)
142
143        self.icons = {}
144        for icon in os.listdir(self.dir):
145            if icon != no_icon:
146                host = icon_name_to_host(icon)
147                try:
148                    self.icons[host] = TrackerIcon(os.path.join(self.dir, icon))
149                except KeyError:
150                    log.warning('invalid icon %s', icon)
151        if no_icon:
152            self.icons[None] = TrackerIcon(no_icon)
153        else:
154            self.icons[None] = None
155        self.icons[''] = self.icons[None]
156
157        self.pending = {}
158        self.redirects = {}
159
160    def has(self, host):
161        """
162        Returns True or False if the tracker icon for the given host exists or not.
163
164        :param host: the host for the TrackerIcon
165        :type host: string
166        :returns: True or False
167        :rtype: bool
168        """
169        return host.lower() in self.icons
170
171    def get(self, host):
172        """
173        Returns a TrackerIcon for the given tracker's host
174        from the icon cache.
175
176        :param host: the host for the TrackerIcon
177        :type host: string
178        :returns: the TrackerIcon for the host
179        :rtype: TrackerIcon
180        """
181        host = host.lower()
182        if host in self.icons:
183            return self.icons[host]
184        else:
185            return None
186
187    def fetch(self, host):
188        """
189        Fetches (downloads) the icon for the given host.
190        When the icon is downloaded a callback is fired
191        on the the queue of callers to this function.
192
193        :param host: the host to obtain the TrackerIcon for
194        :type host: string
195        :returns: a Deferred which fires with the TrackerIcon for the given host
196        :rtype: Deferred
197        """
198        host = host.lower()
199        if host in self.icons:
200            # We already have it, so let's return it
201            d = defer.succeed(self.icons[host])
202        elif host in self.pending:
203            # We're in the middle of getting it
204            # Add ourselves to the waiting list
205            d = defer.Deferred()
206            self.pending[host].append(d)
207        else:
208            # We need to fetch it
209            self.pending[host] = []
210            # Start callback chain
211            d = self.download_page(host)
212            d.addCallbacks(
213                self.on_download_page_complete,
214                self.on_download_page_fail,
215                errbackArgs=(host,),
216            )
217            d.addCallback(self.parse_html_page)
218            d.addCallbacks(
219                self.on_parse_complete, self.on_parse_fail, callbackArgs=(host,)
220            )
221            d.addCallback(self.download_icon, host)
222            d.addCallbacks(
223                self.on_download_icon_complete,
224                self.on_download_icon_fail,
225                callbackArgs=(host,),
226                errbackArgs=(host,),
227            )
228            d.addCallback(self.resize_icon)
229            d.addCallback(self.store_icon, host)
230        return d
231
232    def download_page(self, host, url=None):
233        """
234        Downloads a tracker host's page
235        If no url is provided, it bases the url on the host
236
237        :param host: the tracker host
238        :type host: string
239        :param url: the (optional) url of the host
240        :type url: string
241        :returns: the filename of the tracker host's page
242        :rtype: Deferred
243        """
244        if not url:
245            url = self.host_to_url(host)
246        log.debug('Downloading %s %s', host, url)
247        tmp_fd, tmp_file = mkstemp(prefix='deluge_ticon.')
248        os.close(tmp_fd)
249        return download_file(url, tmp_file, force_filename=True, handle_redirects=False)
250
251    def on_download_page_complete(self, page):
252        """
253        Runs any download clean up functions
254
255        :param page: the page that finished downloading
256        :type page: string
257        :returns: the page that finished downloading
258        :rtype: string
259        """
260        log.debug('Finished downloading %s', page)
261        return page
262
263    def on_download_page_fail(self, f, host):
264        """
265        Recovers from download error
266
267        :param f: the failure that occurred
268        :type f: Failure
269        :param host: the name of the host whose page failed to download
270        :type host: string
271        :returns: a Deferred if recovery was possible
272                  else the original failure
273        :rtype: Deferred or Failure
274        """
275        error_msg = f.getErrorMessage()
276        log.debug('Error downloading page: %s', error_msg)
277        d = f
278        if f.check(PageRedirect):
279            # Handle redirect errors
280            location = urljoin(self.host_to_url(host), error_msg.split(' to ')[1])
281            self.redirects[host] = url_to_host(location)
282            d = self.download_page(host, url=location)
283            d.addCallbacks(
284                self.on_download_page_complete,
285                self.on_download_page_fail,
286                errbackArgs=(host,),
287            )
288
289        return d
290
291    @proxy(threads.deferToThread)
292    def parse_html_page(self, page):
293        """
294        Parses the html page for favicons
295
296        :param page: the page to parse
297        :type page: string
298        :returns: a Deferred which callbacks a list of available favicons (url, type)
299        :rtype: Deferred
300        """
301        with open(page, 'r') as _file:
302            parser = FaviconParser()
303            for line in _file:
304                parser.feed(line)
305                if parser.left_head:
306                    break
307            parser.close()
308        try:
309            os.remove(page)
310        except OSError as ex:
311            log.warning('Could not remove temp file: %s', ex)
312
313        return parser.get_icons()
314
315    def on_parse_complete(self, icons, host):
316        """
317        Runs any parse clean up functions
318
319        :param icons: the icons that were extracted from the page
320        :type icons: list
321        :param host: the host the icons are for
322        :type host: string
323        :returns: the icons that were extracted from the page
324        :rtype: list
325        """
326        log.debug('Parse Complete, got icons for %s: %s', host, icons)
327        url = self.host_to_url(host)
328        icons = [(urljoin(url, icon), mimetype) for icon, mimetype in icons]
329        log.debug('Icon urls from %s: %s', host, icons)
330        return icons
331
332    def on_parse_fail(self, f):
333        """
334        Recovers from a parse error
335
336        :param f: the failure that occurred
337        :type f: Failure
338        :returns: a Deferred if recovery was possible
339                  else the original failure
340        :rtype: Deferred or Failure
341        """
342        log.debug('Error parsing page: %s', f.getErrorMessage())
343        return f
344
345    def download_icon(self, icons, host):
346        """
347        Downloads the first available icon from icons
348
349        :param icons: a list of icons
350        :type icons: list
351        :param host: the tracker's host name
352        :type host: string
353        :returns: a Deferred which fires with the downloaded icon's filename
354        :rtype: Deferred
355        """
356        if len(icons) == 0:
357            raise NoIconsError('empty icons list')
358        (url, mimetype) = icons.pop(0)
359        d = download_file(
360            url,
361            os.path.join(self.dir, host_to_icon_name(host, mimetype)),
362            force_filename=True,
363        )
364        d.addCallback(self.check_icon_is_valid)
365        if icons:
366            d.addErrback(self.on_download_icon_fail, host, icons)
367        return d
368
369    @proxy(threads.deferToThread)
370    def check_icon_is_valid(self, icon_name):
371        """
372        Performs a sanity check on icon_name
373
374        :param icon_name: the name of the icon to check
375        :type icon_name: string
376        :returns: the name of the validated icon
377        :rtype: string
378        :raises: InvalidIconError
379        """
380
381        if Image:
382            try:
383                with Image.open(icon_name):
384                    pass
385            except IOError as ex:
386                raise InvalidIconError(ex)
387        else:
388            if not os.path.getsize(icon_name):
389                raise InvalidIconError('empty icon')
390
391        return icon_name
392
393    def on_download_icon_complete(self, icon_name, host):
394        """
395        Runs any download cleanup functions
396
397        :param icon_name: the filename of the icon that finished downloading
398        :type icon_name: string
399        :param host: the host the icon completed to download for
400        :type host: string
401        :returns: the icon that finished downloading
402        :rtype: TrackerIcon
403        """
404        log.debug('Successfully downloaded from %s: %s', host, icon_name)
405        return TrackerIcon(icon_name)
406
407    def on_download_icon_fail(self, f, host, icons=None):
408        """
409        Recovers from a download error
410
411        :param f: the failure that occurred
412        :type f: Failure
413        :param host: the host the icon failed to download for
414        :type host: string
415        :param icons: the (optional) list of remaining icons
416        :type icons: list
417        :returns: a Deferred if recovery was possible
418                  else the original failure
419        :rtype: Deferred or Failure
420        """
421        if not icons:
422            icons = []
423        error_msg = f.getErrorMessage()
424        log.debug('Error downloading icon from %s: %s', host, error_msg)
425        d = f
426        if f.check(PageRedirect):
427            # Handle redirect errors
428            location = urljoin(self.host_to_url(host), error_msg.split(' to ')[1])
429            d = self.download_icon(
430                [(location, extension_to_mimetype(location.rpartition('.')[2]))]
431                + icons,
432                host,
433            )
434            if not icons:
435                d.addCallbacks(
436                    self.on_download_icon_complete,
437                    self.on_download_icon_fail,
438                    callbackArgs=(host,),
439                    errbackArgs=(host,),
440                )
441        elif f.check(NoResource, ForbiddenResource) and icons:
442            d = self.download_icon(icons, host)
443        elif f.check(NoIconsError):
444            # No icons, try favicon.ico as an act of desperation
445            d = self.download_icon(
446                [
447                    (
448                        urljoin(self.host_to_url(host), 'favicon.ico'),
449                        extension_to_mimetype('ico'),
450                    )
451                ],
452                host,
453            )
454            d.addCallbacks(
455                self.on_download_icon_complete,
456                self.on_download_icon_fail,
457                callbackArgs=(host,),
458                errbackArgs=(host,),
459            )
460        else:
461            # No icons :(
462            # Return the None Icon
463            d = self.icons[None]
464
465        return d
466
467    @proxy(threads.deferToThread)
468    def resize_icon(self, icon):
469        """
470        Resizes the given icon to be 16x16 pixels
471
472        :param icon: the icon to resize
473        :type icon: TrackerIcon
474        :returns: the resized icon
475        :rtype: TrackerIcon
476        """
477        # Requires Pillow(PIL) to resize.
478        if icon and Image:
479            filename = icon.get_filename()
480            with Image.open(filename) as img:
481                if img.size > (16, 16):
482                    new_filename = filename.rpartition('.')[0] + '.png'
483                    img = img.resize((16, 16), Image.ANTIALIAS)
484                    img.save(new_filename)
485                    if new_filename != filename:
486                        os.remove(filename)
487                        icon = TrackerIcon(new_filename)
488        return icon
489
490    def store_icon(self, icon, host):
491        """
492        Stores the icon for the given host
493        Callbacks any pending deferreds waiting on this icon
494
495        :param icon: the icon to store
496        :type icon: TrackerIcon or None
497        :param host: the host to store it for
498        :type host: string
499        :returns: the stored icon
500        :rtype: TrackerIcon or None
501        """
502        self.icons[host] = icon
503        for d in self.pending[host]:
504            d.callback(icon)
505        del self.pending[host]
506        return icon
507
508    def host_to_url(self, host):
509        """
510        Given a host, returns the URL to fetch
511
512        :param host: the tracker host
513        :type host: string
514        :returns: the url of the tracker
515        :rtype: string
516        """
517        if host in self.redirects:
518            host = self.redirects[host]
519        return 'http://%s/' % host
520
521
522# ------- HELPER CLASSES ------
523
524
525class FaviconParser(HTMLParser):
526    """
527    A HTMLParser which extracts favicons from a HTML page
528    """
529
530    def __init__(self):
531        self.icons = []
532        self.left_head = False
533        HTMLParser.__init__(self)
534
535    def handle_starttag(self, tag, attrs):
536        if (
537            tag == 'link'
538            and ('rel', 'icon') in attrs
539            or ('rel', 'shortcut icon') in attrs
540        ):
541            href = None
542            icon_type = None
543            for attr, value in attrs:
544                if attr == 'href':
545                    href = value
546                elif attr == 'type':
547                    icon_type = value
548            if href:
549                try:
550                    mimetype = extension_to_mimetype(href.rpartition('.')[2])
551                except KeyError:
552                    pass
553                else:
554                    icon_type = mimetype
555                if icon_type:
556                    self.icons.append((href, icon_type))
557
558    def handle_endtag(self, tag):
559        if tag == 'head':
560            self.left_head = True
561
562    def get_icons(self):
563        """
564        Returns a list of favicons extracted from the HTML page
565
566        :returns: a list of favicons
567        :rtype: list
568        """
569        return self.icons
570
571
572# ------ HELPER FUNCTIONS ------
573
574
575def url_to_host(url):
576    """
577    Given a URL, returns the host it belongs to
578
579    :param url: the URL in question
580    :type url: string
581    :returns: the host of the given URL
582    :rtype: string
583    """
584    return urlparse(url).hostname
585
586
587def host_to_icon_name(host, mimetype):
588    """
589    Given a host, returns the appropriate icon name
590
591    :param host: the host in question
592    :type host: string
593    :param mimetype: the mimetype of the icon
594    :type mimetype: string
595    :returns: the icon's filename
596    :rtype: string
597
598    """
599    return host + '.' + mimetype_to_extension(mimetype)
600
601
602def icon_name_to_host(icon):
603    """
604    Given a host's icon name, returns the host name
605
606    :param icon: the icon name
607    :type icon: string
608    :returns: the host name
609    :rtype: string
610    """
611    return icon.rpartition('.')[0]
612
613
614MIME_MAP = {
615    'image/gif': 'gif',
616    'image/jpeg': 'jpg',
617    'image/png': 'png',
618    'image/vnd.microsoft.icon': 'ico',
619    'image/x-icon': 'ico',
620    'gif': 'image/gif',
621    'jpg': 'image/jpeg',
622    'jpeg': 'image/jpeg',
623    'png': 'image/png',
624    'ico': 'image/vnd.microsoft.icon',
625}
626
627
628def mimetype_to_extension(mimetype):
629    """
630    Given a mimetype, returns the appropriate filename extension
631
632    :param mimetype: the mimetype
633    :type mimetype: string
634    :returns: the filename extension for the given mimetype
635    :rtype: string
636    :raises KeyError: if given an invalid mimetype
637    """
638    return MIME_MAP[mimetype.lower()]
639
640
641def extension_to_mimetype(extension):
642    """
643    Given a filename extension, returns the appropriate mimetype
644
645    :param extension: the filename extension
646    :type extension: string
647    :returns: the mimetype for the given filename extension
648    :rtype: string
649    :raises KeyError: if given an invalid filename extension
650    """
651    return MIME_MAP[extension.lower()]
652
653
654#  ------ EXCEPTIONS ------
655
656
657class NoIconsError(Exception):
658    pass
659
660
661class InvalidIconError(Exception):
662    pass