import requests import re import os import subprocess import concurrent.futures import pwd import grp from urllib.parse import urljoin, unquote from bs4 import BeautifulSoup import sys BASE_URL = f"http://{sys.argv[1]}:8000" MAX_WORKERS = 4 def get_folders(): try: response = requests.get(BASE_URL, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') folders = [] for link in soup.find_all('a', href=True): href = link['href'] if href.endswith('/') and href not in ['/', '../']: folder_name = unquote(href) folders.append(folder_name) return sorted(set(folders)) except Exception as e: print(f"Error fetching folder list: {e}") return [] def change_ownership(folder_path): try: current_user = pwd.getpwuid(os.getuid()) uid = current_user.pw_uid gid = current_user.pw_gid for root, dirs, files in os.walk(folder_path): os.chown(root, uid, gid) for file in files: file_path = os.path.join(root, file) try: os.chown(file_path, uid, gid) except (OSError, IOError) as e: print(f" Warning: Could not change ownership of {file_path}: {e}") print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}") except Exception as e: print(f" Warning: Could not change ownership of {folder_path}: {e}") def download_folder(folder): clean_folder = folder.rstrip('/') if os.path.exists(clean_folder): print(f"⏭ Skipping existing folder: {clean_folder}") return True print(f"Starting download of: {clean_folder}") try: cmd = [ "wget", "--recursive", "--no-parent", "--no-host-directories", "--reject=index.html*", "--continue", "--progress=bar", "--tries=3", "--timeout=30", urljoin(BASE_URL, folder) ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode == 0: print(f"✓ Completed download of: {clean_folder}") if os.path.exists(clean_folder): change_ownership(clean_folder) return True else: print(f"✗ Failed to download: {clean_folder}") print(f"Error: {result.stderr}") return False except Exception as e: print(f"✗ Exception downloading {folder}: {e}") return False def main(): print("Discovering folders from server...") folders = get_folders() if not folders: print("No folders found on the server!") return existing_folders = [] new_folders = [] for folder in folders: clean_folder = folder.rstrip('/') if os.path.exists(clean_folder): existing_folders.append(clean_folder) else: new_folders.append(folder) print(f"Found {len(folders)} total folders:") if existing_folders: print(f" Existing folders (will be skipped): {len(existing_folders)}") for folder in existing_folders: print(f" - {folder}") if new_folders: print(f" New folders to download: {len(new_folders)}") for folder in new_folders: print(f" - {folder.rstrip('/')}") if not new_folders: print("\nAll folders already exist. Nothing to download!") return response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ") if response.lower() != 'y': print("Download cancelled.") return print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...") success_count = 0 with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders} for future in concurrent.futures.as_completed(future_to_folder): if future.result(): success_count += 1 print(f"\nDownload summary:") print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders") print(f" Files saved in: current directory") if __name__ == "__main__": main()