From caa7e0736f1469a003983a63b9b0be9fe181e286 Mon Sep 17 00:00:00 2001
From: anovicecodemonkey <novicecodemonkey1@mailinator.com>
Date: Thu, 30 Jan 2014 00:56:35 +1030
Subject: [PATCH] Add support for The Guardian website

Hi all,
This commit adds support for TheGuardian.com. Website of The Guardian newspaper.

As I am new to Python programming and this is my first contribution to the project, I would appreciate any and all feedback.

Regards.
---
 youtube_dl/extractor/theguardian.py | 31 +++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 youtube_dl/extractor/theguardian.py

diff --git a/youtube_dl/extractor/theguardian.py b/youtube_dl/extractor/theguardian.py
new file mode 100644
index 000000000..b3f64244a
--- /dev/null
+++ b/youtube_dl/extractor/theguardian.py
@@ -0,0 +1,31 @@
+import re
+from .common import InfoExtractor
+class TheGuardianIE(InfoExtractor):
+     _VALID_URL = r'http://(?:www\.)?theguardian\.com/.*/.*/.*/.*/(?P<video_id>.*)/?'
+
+     _TEST = {
+    u'url': u'http://www.theguardian.com/world/video/2014/jan/29/president-barack-obama-state-union-address-video',
+    u'file': u'president-barack-obama-state-union-address-video.mp4',
+    u'md5': u'c3c4d57157bd28a20e877a0ec796a6cc',
+    u'info_dict': {
+        u"title": u"President Barack Obama delivers State of the Union address – video"
+    }
+}
+
+     def _real_extract(self, url):
+       mobj = re.match(self._VALID_URL, url)
+       video_id = (mobj.group('video_id'))
+       webpage_url = (url)
+       webpage = self._download_webpage(webpage_url, video_id)  
+       # Log that we are starting to parse the page.
+       self.report_extraction(video_id)
+       # Search for the video url (which is always a .mp4 file; the path to which is set in the JSON JWPlayerOptions() object.)
+       # Sometimes there's whitespace that also needs to be accounted for. 
+       video_url  = self._html_search_regex(r'file\s*:\s*\'(.*)\',', webpage, u'video URL') # e.g. file : 'video.mp4'
+
+       return [{
+          'id':        video_id,
+          'url':       video_url,
+          'ext':       'mp4',
+          'title':     self._og_search_title(webpage),
+      }]