From 70e9b6fd47ba511c6a4658efa2d3f3cc913cf813 Mon Sep 17 00:00:00 2001 From: Nick Daniels Date: Wed, 19 Dec 2012 14:20:11 +0000 Subject: [PATCH 1/3] Fix TypeError for Python 2.7.x --- youtube_dl/FileDownloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index e8c62ce07..d46ee29db 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -454,7 +454,7 @@ class FileDownloader(object): self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') return try: - with io.open(encodeFilename(infofn), 'w', 'utf-8') as infof: + with io.open(encodeFilename(infofn), 'wb') as infof: json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) json.dump(json_info_dict, infof) except (OSError, IOError): From 11cdd6a76530a30af23c0b045de67b4a628dfca6 Mon Sep 17 00:00:00 2001 From: Nick Daniels Date: Wed, 19 Dec 2012 14:21:14 +0000 Subject: [PATCH 2/3] Refactor IDParser to search for elements by any attribute not just ID --- youtube_dl/utils.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 25b67db06..e697ad4ad 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -201,10 +201,11 @@ def htmlentity_transform(matchobj): return (u'&%s;' % entity) compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix -class IDParser(compat_html_parser.HTMLParser): - """Modified HTMLParser that isolates a tag with the specified id""" - def __init__(self, id): - self.id = id +class AttrParser(compat_html_parser.HTMLParser): + """Modified HTMLParser that isolates a tag with the specified attribute""" + def __init__(self, attribute, value): + self.attribute = attribute + self.value = value self.result = None self.started = False self.depth = {} @@ -229,7 +230,7 @@ class IDParser(compat_html_parser.HTMLParser): attrs = dict(attrs) if self.started: self.find_startpos(None) - if 'id' in attrs and attrs['id'] == self.id: + if self.attribute in attrs and attrs[self.attribute] == self.value: self.result = [tag] self.started = True self.watch_startpos = True @@ -267,8 +268,12 @@ class IDParser(compat_html_parser.HTMLParser): return '\n'.join(lines).strip() def get_element_by_id(id, html): - """Return the content of the tag with the specified id in the passed HTML document""" - parser = IDParser(id) + """Return the content of the tag with the specified ID in the passed HTML document""" + return get_element_by_attribute("id", id, html) + +def get_element_by_attribute(attribute, value, html): + """Return the content of the tag with the specified attribute in the passed HTML document""" + parser = AttrParser(attribute, value) try: parser.loads(html) except compat_html_parser.HTMLParseError: From e516d7f2cd50bc42e50fdf5405139af189bf69c3 Mon Sep 17 00:00:00 2001 From: Nick Daniels Date: Wed, 19 Dec 2012 14:21:39 +0000 Subject: [PATCH 3/3] Update Vimeo Info Extractor to get pull in the description properly --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 9a41dde57..cf5b51bd8 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -999,7 +999,7 @@ class VimeoIE(InfoExtractor): video_thumbnail = config["video"]["thumbnail"] # Extract video description - video_description = get_element_by_id("description", webpage) + video_description = get_element_by_attribute("itemprop", "description", webpage) if video_description: video_description = clean_html(video_description) else: video_description = ''