music_script/download.py

151 lines
4.5 KiB
Python

import requests
import re
import os
import subprocess
import concurrent.futures
import pwd
import grp
from urllib.parse import urljoin, unquote
from bs4 import BeautifulSoup
import sys
BASE_URL = f"http://{sys.argv[1]}:8000"
MAX_WORKERS = 4
def get_folders():
try:
response = requests.get(BASE_URL, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
folders = []
for link in soup.find_all('a', href=True):
href = link['href']
if href.endswith('/') and href not in ['/', '../']:
folder_name = unquote(href)
folders.append(folder_name)
return sorted(set(folders))
except Exception as e:
print(f"Error fetching folder list: {e}")
return []
def change_ownership(folder_path):
try:
current_user = pwd.getpwuid(os.getuid())
uid = current_user.pw_uid
gid = current_user.pw_gid
for root, dirs, files in os.walk(folder_path):
os.chown(root, uid, gid)
for file in files:
file_path = os.path.join(root, file)
try:
os.chown(file_path, uid, gid)
except (OSError, IOError) as e:
print(f" Warning: Could not change ownership of {file_path}: {e}")
print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}")
except Exception as e:
print(f" Warning: Could not change ownership of {folder_path}: {e}")
def download_folder(folder):
clean_folder = folder.rstrip('/')
if os.path.exists(clean_folder):
print(f"⏭ Skipping existing folder: {clean_folder}")
return True
print(f"Starting download of: {clean_folder}")
try:
cmd = [
"wget",
"--recursive",
"--no-parent",
"--no-host-directories",
"--reject=index.html*",
"--continue",
"--progress=bar",
"--tries=3",
"--timeout=30",
urljoin(BASE_URL, folder)
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print(f"✓ Completed download of: {clean_folder}")
if os.path.exists(clean_folder):
change_ownership(clean_folder)
return True
else:
print(f"✗ Failed to download: {clean_folder}")
print(f"Error: {result.stderr}")
return False
except Exception as e:
print(f"✗ Exception downloading {folder}: {e}")
return False
def main():
print("Discovering folders from server...")
folders = get_folders()
if not folders:
print("No folders found on the server!")
return
existing_folders = []
new_folders = []
for folder in folders:
clean_folder = folder.rstrip('/')
if os.path.exists(clean_folder):
existing_folders.append(clean_folder)
else:
new_folders.append(folder)
print(f"Found {len(folders)} total folders:")
if existing_folders:
print(f" Existing folders (will be skipped): {len(existing_folders)}")
for folder in existing_folders:
print(f" - {folder}")
if new_folders:
print(f" New folders to download: {len(new_folders)}")
for folder in new_folders:
print(f" - {folder.rstrip('/')}")
if not new_folders:
print("\nAll folders already exist. Nothing to download!")
return
response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ")
if response.lower() != 'y':
print("Download cancelled.")
return
print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...")
success_count = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders}
for future in concurrent.futures.as_completed(future_to_folder):
if future.result():
success_count += 1
print(f"\nDownload summary:")
print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders")
print(f" Files saved in: current directory")
if __name__ == "__main__":
main()