Python tkinter – YouTube captions scraper | Content generator

In this post we will be creating a Python script that will scrape YouTube captions and post these to a WordPress site as a blog. The prerequisite of this project is that you have a WordPress site with the post by option enabled, more on how to do this can be found here. To download YouTube captions we will be using pytube which is a lightweight, Pythonic, dependency-free, library for downloading YouTube Videos. The library is very easy to use and quite intuitive. This project also makes use of the tkinter framework to enable us to a GUI.

The GUI of this project contains various elements, these include:

  • A search bar and search button – This will allow us to query search terms on YouTube.
  • A list box and scroll bar – This will retrieve any search results returned from YouTube, selecting a result within the list box will then download the captions for it.
  • A text area – This will present to us the captions, giving us the ability to apply edits if required.
  • An input field – This is where we will define the title of the post as it will appear on WordPress.
  • Site buttons – The clicking of these buttons will send the content of the captions along with the post title to WordPress via post by email.

The script will also contain three functions which are:

  • SearchContent – This function takes our search query and retrieves any YouTube video results via pytube. Any results found are then placed within our list box.
  • GetContent – This function takes our list box selection and using pytube will retrieve the captions for the YouTube video – Note that it will only return the captions if they are in English, this does not include those that have been automatically generated.
  • SendOffSite1 – This function will take the captions and a user given post title and then email both to WordPress via post by email.

The full source code for this project can be found below:

from tkinter import *
from pytube import YouTube
import time
from pytube import Search
from bs4 import BeautifulSoup
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart


def SearchContent():
    Query = str(SearchQuery.get())
    if Query == "":
        Statuslbl.config(text = "!!!Please Enter search query!!!")
    else:
        s = Search(Query)
        listNodes.delete(0,END)
        for v in s.results:
            listNodes.insert(END, str(f"{v.title}||{v.watch_url}\n"))
            Statuslbl.config(text = "Please Select a Result")
  

def GetContent():
    for i in listNodes.curselection():
        Selection = (listNodes.get(i))
    Selection = Selection.split("||")
    URL = Selection[1]
    yt = YouTube(URL)
    try:
        txtfld2.delete(1.0,"end")
        VideoCaptions = yt.captions.get_by_language_code('en')
        CaptionXML =(VideoCaptions.xml_captions)
        soup = BeautifulSoup(CaptionXML, features = "lxml")
        Content = (soup.get_text())
        Content = str(Content)
        Content = Content.replace("\n", " ")
        txtfld2.insert(1.0, Content)
    except:
        Statuslbl.config(text = "!!!Captions Not Available!!!")
        pass
    
def SendOffSite1():
    Content = str(txtfld2.get("1.0",END))
    Title = str(txtfld1.get())
    if Title == "":
        Statuslbl.config(text = "!!!Please Prep Content Entirely!!!")
    else:
        Email= "FromEmail"
        Password= "Password"
        ToAddress= "WordPress Post By email address"
        msg = MIMEText(Content)
        msg['Subject'] = Title
        msg['from'] = Email
        msg['To'] = ToAddress
        server = smtplib.SMTP('smtp-mail.outlook.com', 587)
        server.starttls()
        server.ehlo()
        server.login(Email,Password)
        server.sendmail(Email, ToAddress, msg.as_string())
        server.quit()

        txtfld2.delete(1.0,"end")
        txtfld1.delete(0, 'end')
        Statuslbl.config(text = "!!!Content Posted to Site #1!!!")
        

window=Tk()
window.configure(bg='black')

lbl=Label(window, text="YouTube Content Scraper", bg='black', fg='white', font=("System", 34))
lbl.place(x=150, y=5)    

SearchQuery=Entry(window, text="This is Entry Widget", bd="5",width="55")
SearchQuery.place(x=225, y=65)

btn=Button(window, text="Search",bg='white', fg='black',command=SearchContent)
btn.place(x=390, y=100)

lbl=Label(window, text="Select Site...", bg='black', fg='white', font=("System", 11))
lbl.place(x=390, y=145)

btn1=Button(window, text="Post to site 1",bg='white', fg='black',command=SendOffSite1)
btn1.place(x=390, y=170)

btn2=Button(window, text="Post to site 2",bg='white', fg='black')
btn2.place(x=480, y=170)

Statuslbl=Label(window, text="", bg='black', fg='red', width="25", font=("System", 11))
Statuslbl.place(x=575, y=170)

lbl=Label(window, text="Enter Post Title.", bg='black', fg='white', font=("System", 11))
lbl.place(x=390, y=205)

txtfld1=Entry(window, text="Thiis is Entry Widget", bd="5",width="63")
txtfld1.place(x=390, y=230)

lbl=Label(window, text="Search Results", bg='black', fg='white', font=("System", 11))
lbl.place(x=10, y=120)

listNodes = Listbox(window, width=45, height=6, bd="5",font=("System", 8))
listNodes.pack(side="left", fill="y")
listNodes.place(x=10, y=145)

scrollbar = Scrollbar(window, orient="vertical")
scrollbar.config(command=listNodes.yview)
scrollbar.place(x=365, y=145, height=114)

listNodes.config(yscrollcommand=scrollbar.set)

for i in range(10):
    listNodes.insert(END, str(i))

btn=Button(window, text="Get Content",bg='white', fg='black',command=GetContent)
btn.place(x=10, y=265)

lbl=Label(window, text="Content Scraped", bg='black', fg='white', font=("System", 11))
lbl.place(x=10, y=300)

txtfld2=Text(window, bd="5",width="95")
txtfld2.place(x=10, y=325, height=300)


window.title('Scriptopia')
window.geometry("800x640")
window.mainloop()


Leave a Reply