Update download.py

This commit is contained in:
Jarrett Minton 2025-08-27 00:22:01 -06:00
parent 4451fb81e6
commit 75dfd73a34

View File

@ -1,154 +1,149 @@
import requests
import requests import re
import re import os
import os import subprocess
import subprocess import concurrent.futures
import concurrent.futures import pwd
import pwd import grp
import grp from urllib.parse import urljoin, unquote
from urllib.parse import urljoin, unquote from bs4 import BeautifulSoup
from bs4 import BeautifulSoup
BASE_URL = "http://0.0.0.0:8000"
BASE_URL = "http://0.0.0.0:8000" MAX_WORKERS = 4
MAX_WORKERS = 4
def get_folders():
def get_folders(): try:
try: response = requests.get(BASE_URL, timeout=10)
response = requests.get(BASE_URL, timeout=10) response.raise_for_status()
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Parse HTML to find directory links folders = []
soup = BeautifulSoup(response.text, 'html.parser')
folders = [] for link in soup.find_all('a', href=True):
href = link['href']
# Look for links that end with '/' (directories) if href.endswith('/') and href not in ['/', '../']:
for link in soup.find_all('a', href=True):
href = link['href'] folder_name = unquote(href)
if href.endswith('/') and href not in ['/', '../']: folders.append(folder_name)
# Decode URL-encoded folder names
folder_name = unquote(href) return sorted(set(folders))
folders.append(folder_name)
except Exception as e:
return sorted(set(folders)) print(f"Error fetching folder list: {e}")
return []
except Exception as e:
print(f"Error fetching folder list: {e}") def change_ownership(folder_path):
return [] try:
current_user = pwd.getpwuid(os.getuid())
def change_ownership(folder_path): uid = current_user.pw_uid
try: gid = current_user.pw_gid
current_user = pwd.getpwuid(os.getuid())
uid = current_user.pw_uid for root, dirs, files in os.walk(folder_path):
gid = current_user.pw_gid os.chown(root, uid, gid)
for file in files:
for root, dirs, files in os.walk(folder_path): file_path = os.path.join(root, file)
os.chown(root, uid, gid) try:
for file in files: os.chown(file_path, uid, gid)
file_path = os.path.join(root, file) except (OSError, IOError) as e:
try: print(f" Warning: Could not change ownership of {file_path}: {e}")
os.chown(file_path, uid, gid)
except (OSError, IOError) as e: print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}")
print(f" Warning: Could not change ownership of {file_path}: {e}")
except Exception as e:
print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}") print(f" Warning: Could not change ownership of {folder_path}: {e}")
except Exception as e: def download_folder(folder):
print(f" Warning: Could not change ownership of {folder_path}: {e}") clean_folder = folder.rstrip('/')
def download_folder(folder): if os.path.exists(clean_folder):
clean_folder = folder.rstrip('/') print(f"⏭ Skipping existing folder: {clean_folder}")
return True
if os.path.exists(clean_folder):
print(f"⏭ Skipping existing folder: {clean_folder}") print(f"Starting download of: {clean_folder}")
return True
try:
print(f"Starting download of: {clean_folder}") cmd = [
"wget",
try: "--recursive",
cmd = [ "--no-parent",
"wget", "--no-host-directories",
"--recursive", "--reject=index.html*",
"--no-parent", "--continue",
"--no-host-directories", "--progress=bar",
"--reject=index.html*", "--tries=3",
"--continue", "--timeout=30",
"--progress=bar", urljoin(BASE_URL, folder)
"--tries=3", ]
"--timeout=30",
urljoin(BASE_URL, folder) result = subprocess.run(cmd, capture_output=True, text=True)
]
if result.returncode == 0:
# Run wget print(f"✓ Completed download of: {clean_folder}")
result = subprocess.run(cmd, capture_output=True, text=True)
if os.path.exists(clean_folder):
if result.returncode == 0: change_ownership(clean_folder)
print(f"✓ Completed download of: {clean_folder}")
return True
# Change ownership of downloaded folder else:
if os.path.exists(clean_folder): print(f"✗ Failed to download: {clean_folder}")
change_ownership(clean_folder) print(f"Error: {result.stderr}")
return False
return True
else: except Exception as e:
print(f"✗ Failed to download: {clean_folder}") print(f"✗ Exception downloading {folder}: {e}")
print(f"Error: {result.stderr}") return False
return False
def main():
except Exception as e: print("Discovering folders from server...")
print(f"✗ Exception downloading {folder}: {e}") folders = get_folders()
return False
if not folders:
def main(): print("No folders found on the server!")
print("Discovering folders from server...") return
folders = get_folders()
existing_folders = []
if not folders: new_folders = []
print("No folders found on the server!")
return for folder in folders:
clean_folder = folder.rstrip('/')
existing_folders = [] if os.path.exists(clean_folder):
new_folders = [] existing_folders.append(clean_folder)
else:
for folder in folders: new_folders.append(folder)
clean_folder = folder.rstrip('/')
if os.path.exists(clean_folder): print(f"Found {len(folders)} total folders:")
existing_folders.append(clean_folder) if existing_folders:
else: print(f" Existing folders (will be skipped): {len(existing_folders)}")
new_folders.append(folder) for folder in existing_folders:
print(f" - {folder}")
print(f"Found {len(folders)} total folders:") if new_folders:
if existing_folders: print(f" New folders to download: {len(new_folders)}")
print(f" Existing folders (will be skipped): {len(existing_folders)}") for folder in new_folders:
for folder in existing_folders: print(f" - {folder.rstrip('/')}")
print(f" - {folder}")
if new_folders: if not new_folders:
print(f" New folders to download: {len(new_folders)}") print("\nAll folders already exist. Nothing to download!")
for folder in new_folders: return
print(f" - {folder.rstrip('/')}")
response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ")
if not new_folders: if response.lower() != 'y':
print("\nAll folders already exist. Nothing to download!") print("Download cancelled.")
return return
response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ") print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...")
if response.lower() != 'y':
print("Download cancelled.") success_count = 0
return with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders}
print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...")
for future in concurrent.futures.as_completed(future_to_folder):
success_count = 0 if future.result():
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: success_count += 1
future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders}
print(f"\nDownload summary:")
for future in concurrent.futures.as_completed(future_to_folder): print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders")
if future.result(): print(f" Files saved in: current directory")
success_count += 1
if __name__ == "__main__":
print(f"\nDownload summary:") main()
print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders")
print(f" Files saved in: current directory")
if __name__ == "__main__":
main()