pyDownload

16/01/2019 - 11:39 pm
import urllib.request
import os
import sys
import time
import re

class lewDownload:
    header = {"User-Agent": "lewDownload"}
    images = ["png", "jpg", "jpeg", "gif"]

    try:
        from bs4 import BeautifulSoup
    except ImportError:
        print(' [ERROR] Please install BeautifulSoup by running: pip install beautifulsoup4')
        sys.exit(0)

    def fdownload(url, filename):
        if not url.startswith(download_url):
            url = download_url + url
        print(" [INFO] Downloading file {} from {}".format(filename, url))
        req = urllib.request.Request(url, headers=lewDownload.header)
        data = urllib.request.urlopen(req)
        filename = filename.replace('%20', ' ')
        os.makedirs(os.path.dirname('C:/pyDownload/downloads/'+filename), exist_ok=True)
        with open('downloads/'+filename, 'b+w') as file:
            file.write(data.read())
        time.sleep(0.5)
    
    def download_all(url, filetype):
        print(' [INFO] Downloading all {} files from {}'.format(filetype.upper(), url))
    
        i = 0
        html = lewDownload.get_page_contents(url)
        if html is None:
            print("ERROR CONNECTING TO WEBSITE")
            return False
        html = html.read()
        soup = lewDownload.BeautifulSoup(html, "html.parser")
    
        if filetype in lewDownload.images:
            links = soup.find_all('img', src=True)
        else:
            links = soup.find_all('a', href=True)
    
        for tag in links:
            if filetype in lewDownload.images:
                if os.path.splitext(os.path.basename(tag['src']))[1] == "."+filetype:
                    lewDownload.fdownload(tag['src'], os.path.basename(tag['src']))
            else:
                if os.path.splitext(os.path.basename(tag['href']))[1] == "."+filetype:
                    lewDownload.fdownload(tag['href'], os.path.basename(tag['href']))


    def get_page_contents(url):
        req = urllib.request.Request(url, headers=lewDownload.header)
        page = urllib.request.urlopen(req)
        if page.getcode()  == 200:
            return page
        else:
            return None

    def valid_url(url):
        regex = re.compile(
            r'^(?:http)s?://'
            r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' 
            r'localhost|' 
            r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
            r'(?::\d+)?'
            r'(?:/?|[/?]\S+)$', re.IGNORECASE)
        if not url.startswith("http://") and not url.startswith("https://"):
            url = "http://" + url
        if re.match(regex, url) is not None and url is not None:
            return True
        else:
            return False


download_url = ""
while not lewDownload.valid_url(download_url):
    download_url = input(" > Enter URL to download files from: ")

download_filetype = input(" > Enter filetype to download: ")

lewDownload.download_all(download_url, download_filetype)
rename_folder_name = input(" > Rename download folder? (Y/N): ")
if rename_folder_name.lower() == "y" or rename_folder_name.lower() == "yes":
    folder_name = input(" > Enter folder name: ")
    os.rename("downloads", folder_name)
    print(" [INFO] Renamed downloads folder to {}".format(folder_name))