From 4451fb81e68d6264ee217f2ded5dcdd97c845ffd Mon Sep 17 00:00:00 2001 From: Jarrett Minton Date: Wed, 27 Aug 2025 00:20:52 -0600 Subject: [PATCH] Upload files to "/" --- download.py | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 download.py diff --git a/download.py b/download.py new file mode 100644 index 0000000..7c3ea65 --- /dev/null +++ b/download.py @@ -0,0 +1,154 @@ + +import requests +import re +import os +import subprocess +import concurrent.futures +import pwd +import grp +from urllib.parse import urljoin, unquote +from bs4 import BeautifulSoup + +BASE_URL = "http://0.0.0.0:8000" +MAX_WORKERS = 4 + +def get_folders(): + try: + response = requests.get(BASE_URL, timeout=10) + response.raise_for_status() + + # Parse HTML to find directory links + soup = BeautifulSoup(response.text, 'html.parser') + folders = [] + + # Look for links that end with '/' (directories) + for link in soup.find_all('a', href=True): + href = link['href'] + if href.endswith('/') and href not in ['/', '../']: + # Decode URL-encoded folder names + folder_name = unquote(href) + folders.append(folder_name) + + return sorted(set(folders)) + + except Exception as e: + print(f"Error fetching folder list: {e}") + return [] + +def change_ownership(folder_path): + try: + current_user = pwd.getpwuid(os.getuid()) + uid = current_user.pw_uid + gid = current_user.pw_gid + + for root, dirs, files in os.walk(folder_path): + os.chown(root, uid, gid) + for file in files: + file_path = os.path.join(root, file) + try: + os.chown(file_path, uid, gid) + except (OSError, IOError) as e: + print(f" Warning: Could not change ownership of {file_path}: {e}") + + print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}") + + except Exception as e: + print(f" Warning: Could not change ownership of {folder_path}: {e}") + +def download_folder(folder): + clean_folder = folder.rstrip('/') + + if os.path.exists(clean_folder): + print(f"⏭ Skipping existing folder: {clean_folder}") + return True + + print(f"Starting download of: {clean_folder}") + + try: + cmd = [ + "wget", + "--recursive", + "--no-parent", + "--no-host-directories", + "--reject=index.html*", + "--continue", + "--progress=bar", + "--tries=3", + "--timeout=30", + urljoin(BASE_URL, folder) + ] + + # Run wget + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + print(f"✓ Completed download of: {clean_folder}") + + # Change ownership of downloaded folder + if os.path.exists(clean_folder): + change_ownership(clean_folder) + + return True + else: + print(f"✗ Failed to download: {clean_folder}") + print(f"Error: {result.stderr}") + return False + + except Exception as e: + print(f"✗ Exception downloading {folder}: {e}") + return False + +def main(): + print("Discovering folders from server...") + folders = get_folders() + + if not folders: + print("No folders found on the server!") + return + + existing_folders = [] + new_folders = [] + + for folder in folders: + clean_folder = folder.rstrip('/') + if os.path.exists(clean_folder): + existing_folders.append(clean_folder) + else: + new_folders.append(folder) + + print(f"Found {len(folders)} total folders:") + if existing_folders: + print(f" Existing folders (will be skipped): {len(existing_folders)}") + for folder in existing_folders: + print(f" - {folder}") + if new_folders: + print(f" New folders to download: {len(new_folders)}") + for folder in new_folders: + print(f" - {folder.rstrip('/')}") + + if not new_folders: + print("\nAll folders already exist. Nothing to download!") + return + + response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ") + if response.lower() != 'y': + print("Download cancelled.") + return + + print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...") + + success_count = 0 + with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders} + + for future in concurrent.futures.as_completed(future_to_folder): + if future.result(): + success_count += 1 + + print(f"\nDownload summary:") + print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders") + print(f" Files saved in: current directory") + +if __name__ == "__main__": + main() +