music_script/download.py

import requests
import re
import os
import subprocess
import concurrent.futures
import pwd
import grp
from urllib.parse import urljoin, unquote
from bs4 import BeautifulSoup
import sys

BASE_URL = f"http://{sys.argv[1]}:8000"
MAX_WORKERS = 4

def get_folders():
    try:
        response = requests.get(BASE_URL, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')
        folders = []

        for link in soup.find_all('a', href=True):
            href = link['href']
            if href.endswith('/') and href not in ['/', '../']:

                folder_name = unquote(href)
                folders.append(folder_name)

        return sorted(set(folders))

    except Exception as e:
        print(f"Error fetching folder list: {e}")
        return []

def change_ownership(folder_path):
    try:
        current_user = pwd.getpwuid(os.getuid())
        uid = current_user.pw_uid
        gid = current_user.pw_gid

        for root, dirs, files in os.walk(folder_path):
            os.chown(root, uid, gid)
            for file in files:
                file_path = os.path.join(root, file)
                try:
                    os.chown(file_path, uid, gid)
                except (OSError, IOError) as e:
                    print(f"    Warning: Could not change ownership of {file_path}: {e}")

        print(f"    ✓ Changed ownership of {folder_path} to {current_user.pw_name}")

    except Exception as e:
        print(f"    Warning: Could not change ownership of {folder_path}: {e}")

def download_folder(folder):
    clean_folder = folder.rstrip('/')

    if os.path.exists(clean_folder):
        print(f"⏭ Skipping existing folder: {clean_folder}")
        return True

    print(f"Starting download of: {clean_folder}")

    try:
        cmd = [
            "wget",
            "--recursive",
            "--no-parent",
            "--no-host-directories",
            "--reject=index.html*",
            "--continue",
            "--progress=bar",
            "--tries=3",
            "--timeout=30",
            urljoin(BASE_URL, folder)
        ]

        result = subprocess.run(cmd, capture_output=True, text=True)

        if result.returncode == 0:
            print(f"✓ Completed download of: {clean_folder}")

            if os.path.exists(clean_folder):
                change_ownership(clean_folder)

            return True
        else:
            print(f"✗ Failed to download: {clean_folder}")
            print(f"Error: {result.stderr}")
            return False

    except Exception as e:
        print(f"✗ Exception downloading {folder}: {e}")
        return False

def main():
    print("Discovering folders from server...")
    folders = get_folders()

    if not folders:
        print("No folders found on the server!")
        return

    existing_folders = []
    new_folders = []

    for folder in folders:
        clean_folder = folder.rstrip('/')
        if os.path.exists(clean_folder):
            existing_folders.append(clean_folder)
        else:
            new_folders.append(folder)

    print(f"Found {len(folders)} total folders:")
    if existing_folders:
        print(f"  Existing folders (will be skipped): {len(existing_folders)}")
        for folder in existing_folders:
            print(f"    - {folder}")
    if new_folders:
        print(f"  New folders to download: {len(new_folders)}")
        for folder in new_folders:
            print(f"    - {folder.rstrip('/')}")

    if not new_folders:
        print("\nAll folders already exist. Nothing to download!")
        return

    response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ")
    if response.lower() != 'y':
        print("Download cancelled.")
        return

    print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...")

    success_count = 0
    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders}

        for future in concurrent.futures.as_completed(future_to_folder):
            if future.result():
                success_count += 1

    print(f"\nDownload summary:")
    print(f"  Successfully downloaded: {success_count}/{len(new_folders)} folders")
    print(f"  Files saved in: current directory")

if __name__ == "__main__":
    main()