Update download.py

2025-08-27 00:22:01 -06:00 · 2025-08-27 00:22:01 -06:00 · 75dfd73a34
commit 75dfd73a34
parent 4451fb81e6
1 changed files with 149 additions and 154 deletions
--- a/download.py
+++ b/download.py
@ -1,154 +1,149 @@
-
+import requests
-import requests
+import re
-import re
+import os
-import os
+import subprocess
-import subprocess
+import concurrent.futures
-import concurrent.futures
+import pwd
-import pwd
+import grp
-import grp
+from urllib.parse import urljoin, unquote
-from urllib.parse import urljoin, unquote
+from bs4 import BeautifulSoup
-from bs4 import BeautifulSoup
+
-
+BASE_URL = "http://0.0.0.0:8000"
-BASE_URL = "http://0.0.0.0:8000"
+MAX_WORKERS = 4
-MAX_WORKERS = 4
+
-
+def get_folders():
-def get_folders():
+    try:
-    try:
+        response = requests.get(BASE_URL, timeout=10)
-        response = requests.get(BASE_URL, timeout=10)
+        response.raise_for_status()
-        response.raise_for_status()
+    
-        
+        soup = BeautifulSoup(response.text, 'html.parser')
-        # Parse HTML to find directory links
+        folders = []
-        soup = BeautifulSoup(response.text, 'html.parser')
+        
-        folders = []
+        for link in soup.find_all('a', href=True):
-        
+            href = link['href']
-        # Look for links that end with '/' (directories)
+            if href.endswith('/') and href not in ['/', '../']:
-        for link in soup.find_all('a', href=True):
+            
-            href = link['href']
+                folder_name = unquote(href)
-            if href.endswith('/') and href not in ['/', '../']:
+                folders.append(folder_name)
-                # Decode URL-encoded folder names
+        
-                folder_name = unquote(href)
+        return sorted(set(folders))
-                folders.append(folder_name)
+    
-        
+    except Exception as e:
-        return sorted(set(folders))
+        print(f"Error fetching folder list: {e}")
-    
+        return []
-    except Exception as e:
+
-        print(f"Error fetching folder list: {e}")
+def change_ownership(folder_path):
-        return []
+    try:
-
+        current_user = pwd.getpwuid(os.getuid())
-def change_ownership(folder_path):
+        uid = current_user.pw_uid
-    try:
+        gid = current_user.pw_gid
-        current_user = pwd.getpwuid(os.getuid())
+        
-        uid = current_user.pw_uid
+        for root, dirs, files in os.walk(folder_path):
-        gid = current_user.pw_gid
+            os.chown(root, uid, gid)
-        
+            for file in files:
-        for root, dirs, files in os.walk(folder_path):
+                file_path = os.path.join(root, file)
-            os.chown(root, uid, gid)
+                try:
-            for file in files:
+                    os.chown(file_path, uid, gid)
-                file_path = os.path.join(root, file)
+                except (OSError, IOError) as e:
-                try:
+                    print(f"    Warning: Could not change ownership of {file_path}: {e}")
-                    os.chown(file_path, uid, gid)
+        
-                except (OSError, IOError) as e:
+        print(f"    ✓ Changed ownership of {folder_path} to {current_user.pw_name}")
-                    print(f"    Warning: Could not change ownership of {file_path}: {e}")
+        
-        
+    except Exception as e:
-        print(f"    ✓ Changed ownership of {folder_path} to {current_user.pw_name}")
+        print(f"    Warning: Could not change ownership of {folder_path}: {e}")
-        
+
-    except Exception as e:
+def download_folder(folder):
-        print(f"    Warning: Could not change ownership of {folder_path}: {e}")
+    clean_folder = folder.rstrip('/')
-
+    
-def download_folder(folder):
+    if os.path.exists(clean_folder):
-    clean_folder = folder.rstrip('/')
+        print(f"⏭ Skipping existing folder: {clean_folder}")
-    
+        return True
-    if os.path.exists(clean_folder):
+    
-        print(f"⏭ Skipping existing folder: {clean_folder}")
+    print(f"Starting download of: {clean_folder}")
-        return True
+    
-    
+    try:
-    print(f"Starting download of: {clean_folder}")
+        cmd = [
-    
+            "wget",
-    try:
+            "--recursive",
-        cmd = [
+            "--no-parent",
-            "wget",
+            "--no-host-directories",
-            "--recursive",
+            "--reject=index.html*",
-            "--no-parent",
+            "--continue",
-            "--no-host-directories",
+            "--progress=bar",
-            "--reject=index.html*",
+            "--tries=3",
-            "--continue",
+            "--timeout=30",
-            "--progress=bar",
+            urljoin(BASE_URL, folder)
-            "--tries=3",
+        ]
-            "--timeout=30",
+        
-            urljoin(BASE_URL, folder)
+        result = subprocess.run(cmd, capture_output=True, text=True)
-        ]
+        
-        
+        if result.returncode == 0:
-        # Run wget
+            print(f"✓ Completed download of: {clean_folder}")
-        result = subprocess.run(cmd, capture_output=True, text=True)
+        
-        
+            if os.path.exists(clean_folder):
-        if result.returncode == 0:
+                change_ownership(clean_folder)
-            print(f"✓ Completed download of: {clean_folder}")
+            
-            
+            return True
-            # Change ownership of downloaded folder
+        else:
-            if os.path.exists(clean_folder):
+            print(f"✗ Failed to download: {clean_folder}")
-                change_ownership(clean_folder)
+            print(f"Error: {result.stderr}")
-            
+            return False
-            return True
+            
-        else:
+    except Exception as e:
-            print(f"✗ Failed to download: {clean_folder}")
+        print(f"✗ Exception downloading {folder}: {e}")
-            print(f"Error: {result.stderr}")
+        return False
-            return False
+
-            
+def main():
-    except Exception as e:
+    print("Discovering folders from server...")
-        print(f"✗ Exception downloading {folder}: {e}")
+    folders = get_folders()
-        return False
+    
-
+    if not folders:
-def main():
+        print("No folders found on the server!")
-    print("Discovering folders from server...")
+        return
-    folders = get_folders()
+    
-    
+    existing_folders = []
-    if not folders:
+    new_folders = []
-        print("No folders found on the server!")
+    
-        return
+    for folder in folders:
-    
+        clean_folder = folder.rstrip('/')
-    existing_folders = []
+        if os.path.exists(clean_folder):
-    new_folders = []
+            existing_folders.append(clean_folder)
-    
+        else:
-    for folder in folders:
+            new_folders.append(folder)
-        clean_folder = folder.rstrip('/')
+    
-        if os.path.exists(clean_folder):
+    print(f"Found {len(folders)} total folders:")
-            existing_folders.append(clean_folder)
+    if existing_folders:
-        else:
+        print(f"  Existing folders (will be skipped): {len(existing_folders)}")
-            new_folders.append(folder)
+        for folder in existing_folders:
-    
+            print(f"    - {folder}")
-    print(f"Found {len(folders)} total folders:")
+    if new_folders:
-    if existing_folders:
+        print(f"  New folders to download: {len(new_folders)}")
-        print(f"  Existing folders (will be skipped): {len(existing_folders)}")
+        for folder in new_folders:
-        for folder in existing_folders:
+            print(f"    - {folder.rstrip('/')}")
-            print(f"    - {folder}")
+    
-    if new_folders:
+    if not new_folders:
-        print(f"  New folders to download: {len(new_folders)}")
+        print("\nAll folders already exist. Nothing to download!")
-        for folder in new_folders:
+        return
-            print(f"    - {folder.rstrip('/')}")
+    
-    
+    response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ")
-    if not new_folders:
+    if response.lower() != 'y':
-        print("\nAll folders already exist. Nothing to download!")
+        print("Download cancelled.")
-        return
+        return
-    
+    
-    response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ")
+    print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...")
-    if response.lower() != 'y':
+    
-        print("Download cancelled.")
+    success_count = 0
-        return
+    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
-    
+        future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders}
-    print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...")
+        
-    
+        for future in concurrent.futures.as_completed(future_to_folder):
-    success_count = 0
+            if future.result():
-    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+                success_count += 1
-        future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders}
+    
-        
+    print(f"\nDownload summary:")
-        for future in concurrent.futures.as_completed(future_to_folder):
+    print(f"  Successfully downloaded: {success_count}/{len(new_folders)} folders")
-            if future.result():
+    print(f"  Files saved in: current directory")
-                success_count += 1
+
-    
+if __name__ == "__main__":
-    print(f"\nDownload summary:")
+    main()
-    print(f"  Successfully downloaded: {success_count}/{len(new_folders)} folders")
+
    print(f"  Files saved in: current directory")
 if __name__ == "__main__":
    main()