From 1e4f123cb1ecace7a8543ce5f1086ff58aa0cff8 Mon Sep 17 00:00:00 2001
From: Mayank Juneja <mayankjuneja1@gmail.com>
Date: Thu, 24 Oct 2013 14:53:56 +0530
Subject: [PATCH] version upgrade: changes in videolectures.net streaming
 player

---
 vl-download | 98 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 79 insertions(+), 19 deletions(-)

diff --git a/vl-download b/vl-download
index 6229d74..227dee6 100755
--- a/vl-download
+++ b/vl-download
@@ -14,11 +14,12 @@ import re
 import os
 import sys
 from BeautifulSoup import BeautifulSoup
-from optparse      import OptionParser
+from optparse import OptionParser
+from operator import itemgetter
 
 # Script Description
 desc = "videolectures.net video downloader"
-vers = "0.1"
+vers = "0.2"
 # Initialize Option Parser
 parser = OptionParser(description=desc,version = vers)
 
@@ -34,7 +35,6 @@ parser.add_option("-o","--outfile",
                 help="Output file Name",
                 metavar="outfile")
 
-
 # Parse arguments
 (options,args) = parser.parse_args()
 
@@ -42,7 +42,6 @@ parser.add_option("-o","--outfile",
 if options.url is None:
         # No URL specified
         parser.error("--url option is required")
-
         sys.exit(1)
 if options.outfile is None :
         # Output file not specified
@@ -61,27 +60,88 @@ request_headers =  {
         'Cache-Control'		: 'max-age=0'
        }
 
+url = url.rstrip("/")
+metadata_url = url + "/video/1/smil.xml"
 
-req = urllib2.Request(url, None, request_headers)
+req = urllib2.Request(metadata_url, None, headers=request_headers)
 response = urllib2.urlopen(req)
 html_page = response.read()
 
 html_page = BeautifulSoup(html_page)
 
 ## Find the metadata (Author, Title, Date)
-metadata = html_page.findAll(id='vl_desc')[0]
-title  = metadata.findAll('h2')[0].contents[0].encode('ascii','ignore')
-author = metadata.findAll("div",{"class":"lec_data"})[0].findAll("a")[0].contents[0].encode('ascii','ignore')
-
-## Find the rtmp data
-rtmp_section = html_page.findAll('script')[5].contents[0]
-rtmp_path = re.findall('clip.url.*\n',rtmp_section)[0].split("=")[1][2:-3]
-rtmp_url  = re.findall('clip.netConnectionUrl.*\n',rtmp_section)[0].split("=")[1][2:-3]
-
-## Begin downloading
+title = html_page.findAll('meta', {'name':'title'}) or ''
+author = html_page.findAll('meta', {'name':'author'}) or ''
+if title: title = title[0].get('content', '').encode('ascii','ignore')
+if author: author = author[0].get('content', '').encode('ascii','ignore')
+
+videos = []
+video_tags = html_page.findAll("video")
+for tag in video_tags:
+    video = {
+        'id' : int(tag.get('id', -1)),
+        'proto' : tag.get('proto', ''),
+        'width' : int(tag.get('width', -1)),
+        'height' : int(tag.get('height', -1)),
+        'size' : int(tag.get('size', -1)),
+        'ext' : tag.get('ext', ''),
+        'type' : tag.get('type', ''),
+        'src' : tag.get('src', ''),
+        'streamer' : tag.get('streamer', '')
+    }
+    videos.append(video)
+
+# Get the videos with maximum size
+max_size = max(videos, key=itemgetter('size'))['size']
+videos = [video for video in videos if video['size'] == max_size]
+
+video_urls = {}
+for video in videos:
+    video_urls[video['proto']] = {
+        'streamer' :  video.get('streamer', ''),
+        'src' :  video.get('src', ''),
+        'ext' :  video.get('ext', '')
+    }
 
 if(outfile==""):
-	outfile = author.replace(" ","_") + "-" + title.replace(" ","_")
-cmd = ("rtmpdump -r %s -y %s -o \"%s.flv\"")%(rtmp_url, rtmp_path, outfile)
-os.system(cmd)
-
+    outfile = title.replace(" ","_") + "-" + author.replace(" ","_")
+
+print "Downloading video: ", outfile
+
+# Try for http
+if 'http' in video_urls.keys():
+    v = video_urls['http']
+    outfile = outfile + "." + v['ext']
+
+    req = urllib2.Request(v['src'], None, headers=request_headers)
+    u = urllib2.urlopen(req)
+    meta = u.info()
+    file_size = int(meta.getheaders("Content-Length")[0])
+    f = open(outfile, 'wb')
+
+    file_size_dl = 0
+    block_sz = 8192
+    while True:
+        buffer = u.read(block_sz)
+        if not buffer:
+            break
+        file_size_dl += len(buffer)
+        f.write(buffer)
+        status = r"Status: [%3.2f%%]" % (file_size_dl * 100. / file_size)
+        status = status + chr(8)*(len(status)+1)
+        print status,
+    f.close()
+
+# Try for rtmp
+elif 'rtmp' in video_urls.keys():
+    v = video_urls['rtmp']
+    outfile = outfile + "." + v['ext']
+    rtmp_path = v['streamer']
+    rtmp_url  = v['src']
+
+    cmd = ("rtmpdump -r %s -y %s -o \"%s.flv\"")%(rtmp_url, rtmp_path, outfile)
+    print cmd
+    os.system(cmd)
+
+else:
+    print "Could not find a supported protocol"
\ No newline at end of file