Martin
0
Q:

python webscrapping downloading all the videos in a playlist

# Importing libraries 
import bs4 as bs 
import sys 
import urllib.request 
from PyQt5.QtWebEngineWidgets import QWebEnginePage 
from PyQt5.QtWidgets import QApplication 
from PyQt5.QtCore import QUrl 
import pytube # library for downloading youtube videos 
  
  
class Page(QWebEnginePage): 
    def __init__(self, url): 
        self.app = QApplication(sys.argv) 
        QWebEnginePage.__init__(self) 
        self.html = '' 
        self.loadFinished.connect(self._on_load_finished) 
        self.load(QUrl(url)) 
        self.app.exec_() 
  
    def _on_load_finished(self): 
        self.html = self.toHtml(self.Callable) 
        print('Load finished') 
  
    def Callable(self, html_str): 
        self.html = html_str 
        self.app.quit() 
  
  
links = [] 
  
  
def exact_link(link): 
    vid_id = link.split('=') 
    # print(vid_id) 
    str = "" 
    for i in vid_id[0:2]: 
        str += i + "="
  
    str_new = str[0:len(str) - 1] 
    index = str_new.find("&") 
  
    new_link = "https://www.youtube.com" + str_new[0:index] 
    return new_link 
  
  
url = "https://www.youtube.com/watch?v=lcJzw0JGfeE&list=PLqM7alHXFySENpNgw27MzGxLzNJuC_Kdj"
# Scraping and extracting the video 
# links from the given playlist url 
page = Page(url) 
count = 0
  
soup = bs.BeautifulSoup(page.html, 'html.parser') 
for link in soup.find_all('a', id='thumbnail'): 
      
    # not using first link because it is 
    # playlist link not particular video link 
    if count == 0:  
        count += 1
        continue
    else: 
        vid_src = link['href'] 
        # print(vid_src) 
        # keeping the format of link to be 
        # given to pytube otherwise in some cases 
        new_link = exact_link(vid_src)  
          
        # error might occur due to this 
        # print(new_link) 
          
        # appending the link to the links array 
        links.append(new_link)  
  
# print(links) 
  
# downloading each video from 
# the link in the links array 
for link in links:  
    yt = pytube.YouTube(link) 
      
    # Downloaded video will be the best quality video 
    stream = yt.streams.filter(progressive=True, 
                file_extension='mp4').order_by( 
                'resolution').desc().first()  
    try: 
        stream.download() 
        # printing the links downloaded 
        print("Downloaded: ", link)  
    except: 
        print('Some error in downloading: ', link) 
0

vquality=input("Enter the video quality (1080,720,480,360,240,144):")
vquality=vquality+"p"

for link in playlist:
    yt = YouTube(link)
    videos= yt.streams.filter(mime_type="video/mp4",res=vquality)
    video=videos[0]
    video.download("Downloads")
    print(yt.title+" - has been downloaded !!!")

0

for links in soup.find_all('a'):
        link=links.get('href')
        if (link[0:6]=="/watch" and link[0]!="#"):
            link="https://www.youtube.com"+link
            link=str(link)
            playlist.append(link)

print(playlist)
"""
For example, a playlist with 6 videos

Enter the Youtube Playlist URL : https://www.youtube.com/playlist?list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-
['https://www.youtube.com/watch?v=iyL9-EE3ngk&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-',
 'https://www.youtube.com/watch?v=iyL9-EE3ngk&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-',
 'https://www.youtube.com/watch?v=iyL9-EE3ngk&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=2&t=0s', 
'https://www.youtube.com/watch?v=iyL9-EE3ngk&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=2&t=0s', 
'https://www.youtube.com/watch?v=G7E8YrOiYrQ&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=3&t=0s', 
'https://www.youtube.com/watch?v=G7E8YrOiYrQ&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=3&t=0s',
 'https://www.youtube.com/watch?v=79D4Y1cUK7I&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=4&t=0s',
 'https://www.youtube.com/watch?v=79D4Y1cUK7I&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=4&t=0s',
 'https://www.youtube.com/watch?v=MUe0FPx8kSE&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=5&t=0s',
 'https://www.youtube.com/watch?v=MUe0FPx8kSE&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=5&t=0s',
 'https://www.youtube.com/watch?v=UkpmjbHYV0Y&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=6&t=0s',
 'https://www.youtube.com/watch?v=UkpmjbHYV0Y&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=6&t=0s',
 'https://www.youtube.com/watch?v=WTOFLmB9ge0&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=7&t=0s',
 'https://www.youtube.com/watch?v=WTOFLmB9ge0&list=PLGzz7pyosmlJfx9ivigemSouoZR9uLT2-&index=7&t=0s'
]
"""
0

playlist=[]
url=input("Enter the Youtube Playlist URL : ") #Takes the Playlist Link
data= requests.get(url)
soup=bs4.BeautifulSoup(data.text,'html.parser')

0

from pytube import YouTube
import bs4
import requests

0

New to Communities?

Join the community