diff --git a/download.py b/download.py index 7c3ea65..c4a572b 100644 --- a/download.py +++ b/download.py @@ -1,154 +1,149 @@ - -import requests -import re -import os -import subprocess -import concurrent.futures -import pwd -import grp -from urllib.parse import urljoin, unquote -from bs4 import BeautifulSoup - -BASE_URL = "http://0.0.0.0:8000" -MAX_WORKERS = 4 - -def get_folders(): - try: - response = requests.get(BASE_URL, timeout=10) - response.raise_for_status() - - # Parse HTML to find directory links - soup = BeautifulSoup(response.text, 'html.parser') - folders = [] - - # Look for links that end with '/' (directories) - for link in soup.find_all('a', href=True): - href = link['href'] - if href.endswith('/') and href not in ['/', '../']: - # Decode URL-encoded folder names - folder_name = unquote(href) - folders.append(folder_name) - - return sorted(set(folders)) - - except Exception as e: - print(f"Error fetching folder list: {e}") - return [] - -def change_ownership(folder_path): - try: - current_user = pwd.getpwuid(os.getuid()) - uid = current_user.pw_uid - gid = current_user.pw_gid - - for root, dirs, files in os.walk(folder_path): - os.chown(root, uid, gid) - for file in files: - file_path = os.path.join(root, file) - try: - os.chown(file_path, uid, gid) - except (OSError, IOError) as e: - print(f" Warning: Could not change ownership of {file_path}: {e}") - - print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}") - - except Exception as e: - print(f" Warning: Could not change ownership of {folder_path}: {e}") - -def download_folder(folder): - clean_folder = folder.rstrip('/') - - if os.path.exists(clean_folder): - print(f"⏭ Skipping existing folder: {clean_folder}") - return True - - print(f"Starting download of: {clean_folder}") - - try: - cmd = [ - "wget", - "--recursive", - "--no-parent", - "--no-host-directories", - "--reject=index.html*", - "--continue", - "--progress=bar", - "--tries=3", - "--timeout=30", - urljoin(BASE_URL, folder) - ] - - # Run wget - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - print(f"✓ Completed download of: {clean_folder}") - - # Change ownership of downloaded folder - if os.path.exists(clean_folder): - change_ownership(clean_folder) - - return True - else: - print(f"✗ Failed to download: {clean_folder}") - print(f"Error: {result.stderr}") - return False - - except Exception as e: - print(f"✗ Exception downloading {folder}: {e}") - return False - -def main(): - print("Discovering folders from server...") - folders = get_folders() - - if not folders: - print("No folders found on the server!") - return - - existing_folders = [] - new_folders = [] - - for folder in folders: - clean_folder = folder.rstrip('/') - if os.path.exists(clean_folder): - existing_folders.append(clean_folder) - else: - new_folders.append(folder) - - print(f"Found {len(folders)} total folders:") - if existing_folders: - print(f" Existing folders (will be skipped): {len(existing_folders)}") - for folder in existing_folders: - print(f" - {folder}") - if new_folders: - print(f" New folders to download: {len(new_folders)}") - for folder in new_folders: - print(f" - {folder.rstrip('/')}") - - if not new_folders: - print("\nAll folders already exist. Nothing to download!") - return - - response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ") - if response.lower() != 'y': - print("Download cancelled.") - return - - print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...") - - success_count = 0 - with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: - future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders} - - for future in concurrent.futures.as_completed(future_to_folder): - if future.result(): - success_count += 1 - - print(f"\nDownload summary:") - print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders") - print(f" Files saved in: current directory") - -if __name__ == "__main__": - main() - +import requests +import re +import os +import subprocess +import concurrent.futures +import pwd +import grp +from urllib.parse import urljoin, unquote +from bs4 import BeautifulSoup + +BASE_URL = "http://0.0.0.0:8000" +MAX_WORKERS = 4 + +def get_folders(): + try: + response = requests.get(BASE_URL, timeout=10) + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'html.parser') + folders = [] + + for link in soup.find_all('a', href=True): + href = link['href'] + if href.endswith('/') and href not in ['/', '../']: + + folder_name = unquote(href) + folders.append(folder_name) + + return sorted(set(folders)) + + except Exception as e: + print(f"Error fetching folder list: {e}") + return [] + +def change_ownership(folder_path): + try: + current_user = pwd.getpwuid(os.getuid()) + uid = current_user.pw_uid + gid = current_user.pw_gid + + for root, dirs, files in os.walk(folder_path): + os.chown(root, uid, gid) + for file in files: + file_path = os.path.join(root, file) + try: + os.chown(file_path, uid, gid) + except (OSError, IOError) as e: + print(f" Warning: Could not change ownership of {file_path}: {e}") + + print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}") + + except Exception as e: + print(f" Warning: Could not change ownership of {folder_path}: {e}") + +def download_folder(folder): + clean_folder = folder.rstrip('/') + + if os.path.exists(clean_folder): + print(f"⏭ Skipping existing folder: {clean_folder}") + return True + + print(f"Starting download of: {clean_folder}") + + try: + cmd = [ + "wget", + "--recursive", + "--no-parent", + "--no-host-directories", + "--reject=index.html*", + "--continue", + "--progress=bar", + "--tries=3", + "--timeout=30", + urljoin(BASE_URL, folder) + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + print(f"✓ Completed download of: {clean_folder}") + + if os.path.exists(clean_folder): + change_ownership(clean_folder) + + return True + else: + print(f"✗ Failed to download: {clean_folder}") + print(f"Error: {result.stderr}") + return False + + except Exception as e: + print(f"✗ Exception downloading {folder}: {e}") + return False + +def main(): + print("Discovering folders from server...") + folders = get_folders() + + if not folders: + print("No folders found on the server!") + return + + existing_folders = [] + new_folders = [] + + for folder in folders: + clean_folder = folder.rstrip('/') + if os.path.exists(clean_folder): + existing_folders.append(clean_folder) + else: + new_folders.append(folder) + + print(f"Found {len(folders)} total folders:") + if existing_folders: + print(f" Existing folders (will be skipped): {len(existing_folders)}") + for folder in existing_folders: + print(f" - {folder}") + if new_folders: + print(f" New folders to download: {len(new_folders)}") + for folder in new_folders: + print(f" - {folder.rstrip('/')}") + + if not new_folders: + print("\nAll folders already exist. Nothing to download!") + return + + response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ") + if response.lower() != 'y': + print("Download cancelled.") + return + + print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...") + + success_count = 0 + with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders} + + for future in concurrent.futures.as_completed(future_to_folder): + if future.result(): + success_count += 1 + + print(f"\nDownload summary:") + print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders") + print(f" Files saved in: current directory") + +if __name__ == "__main__": + main() +