Update download.py
This commit is contained in:
parent
4451fb81e6
commit
75dfd73a34
303
download.py
303
download.py
@ -1,154 +1,149 @@
|
|||||||
|
import requests
|
||||||
import requests
|
import re
|
||||||
import re
|
import os
|
||||||
import os
|
import subprocess
|
||||||
import subprocess
|
import concurrent.futures
|
||||||
import concurrent.futures
|
import pwd
|
||||||
import pwd
|
import grp
|
||||||
import grp
|
from urllib.parse import urljoin, unquote
|
||||||
from urllib.parse import urljoin, unquote
|
from bs4 import BeautifulSoup
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
BASE_URL = "http://0.0.0.0:8000"
|
||||||
BASE_URL = "http://0.0.0.0:8000"
|
MAX_WORKERS = 4
|
||||||
MAX_WORKERS = 4
|
|
||||||
|
def get_folders():
|
||||||
def get_folders():
|
try:
|
||||||
try:
|
response = requests.get(BASE_URL, timeout=10)
|
||||||
response = requests.get(BASE_URL, timeout=10)
|
response.raise_for_status()
|
||||||
response.raise_for_status()
|
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
# Parse HTML to find directory links
|
folders = []
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
|
||||||
folders = []
|
for link in soup.find_all('a', href=True):
|
||||||
|
href = link['href']
|
||||||
# Look for links that end with '/' (directories)
|
if href.endswith('/') and href not in ['/', '../']:
|
||||||
for link in soup.find_all('a', href=True):
|
|
||||||
href = link['href']
|
folder_name = unquote(href)
|
||||||
if href.endswith('/') and href not in ['/', '../']:
|
folders.append(folder_name)
|
||||||
# Decode URL-encoded folder names
|
|
||||||
folder_name = unquote(href)
|
return sorted(set(folders))
|
||||||
folders.append(folder_name)
|
|
||||||
|
except Exception as e:
|
||||||
return sorted(set(folders))
|
print(f"Error fetching folder list: {e}")
|
||||||
|
return []
|
||||||
except Exception as e:
|
|
||||||
print(f"Error fetching folder list: {e}")
|
def change_ownership(folder_path):
|
||||||
return []
|
try:
|
||||||
|
current_user = pwd.getpwuid(os.getuid())
|
||||||
def change_ownership(folder_path):
|
uid = current_user.pw_uid
|
||||||
try:
|
gid = current_user.pw_gid
|
||||||
current_user = pwd.getpwuid(os.getuid())
|
|
||||||
uid = current_user.pw_uid
|
for root, dirs, files in os.walk(folder_path):
|
||||||
gid = current_user.pw_gid
|
os.chown(root, uid, gid)
|
||||||
|
for file in files:
|
||||||
for root, dirs, files in os.walk(folder_path):
|
file_path = os.path.join(root, file)
|
||||||
os.chown(root, uid, gid)
|
try:
|
||||||
for file in files:
|
os.chown(file_path, uid, gid)
|
||||||
file_path = os.path.join(root, file)
|
except (OSError, IOError) as e:
|
||||||
try:
|
print(f" Warning: Could not change ownership of {file_path}: {e}")
|
||||||
os.chown(file_path, uid, gid)
|
|
||||||
except (OSError, IOError) as e:
|
print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}")
|
||||||
print(f" Warning: Could not change ownership of {file_path}: {e}")
|
|
||||||
|
except Exception as e:
|
||||||
print(f" ✓ Changed ownership of {folder_path} to {current_user.pw_name}")
|
print(f" Warning: Could not change ownership of {folder_path}: {e}")
|
||||||
|
|
||||||
except Exception as e:
|
def download_folder(folder):
|
||||||
print(f" Warning: Could not change ownership of {folder_path}: {e}")
|
clean_folder = folder.rstrip('/')
|
||||||
|
|
||||||
def download_folder(folder):
|
if os.path.exists(clean_folder):
|
||||||
clean_folder = folder.rstrip('/')
|
print(f"⏭ Skipping existing folder: {clean_folder}")
|
||||||
|
return True
|
||||||
if os.path.exists(clean_folder):
|
|
||||||
print(f"⏭ Skipping existing folder: {clean_folder}")
|
print(f"Starting download of: {clean_folder}")
|
||||||
return True
|
|
||||||
|
try:
|
||||||
print(f"Starting download of: {clean_folder}")
|
cmd = [
|
||||||
|
"wget",
|
||||||
try:
|
"--recursive",
|
||||||
cmd = [
|
"--no-parent",
|
||||||
"wget",
|
"--no-host-directories",
|
||||||
"--recursive",
|
"--reject=index.html*",
|
||||||
"--no-parent",
|
"--continue",
|
||||||
"--no-host-directories",
|
"--progress=bar",
|
||||||
"--reject=index.html*",
|
"--tries=3",
|
||||||
"--continue",
|
"--timeout=30",
|
||||||
"--progress=bar",
|
urljoin(BASE_URL, folder)
|
||||||
"--tries=3",
|
]
|
||||||
"--timeout=30",
|
|
||||||
urljoin(BASE_URL, folder)
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
]
|
|
||||||
|
if result.returncode == 0:
|
||||||
# Run wget
|
print(f"✓ Completed download of: {clean_folder}")
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
if os.path.exists(clean_folder):
|
||||||
if result.returncode == 0:
|
change_ownership(clean_folder)
|
||||||
print(f"✓ Completed download of: {clean_folder}")
|
|
||||||
|
return True
|
||||||
# Change ownership of downloaded folder
|
else:
|
||||||
if os.path.exists(clean_folder):
|
print(f"✗ Failed to download: {clean_folder}")
|
||||||
change_ownership(clean_folder)
|
print(f"Error: {result.stderr}")
|
||||||
|
return False
|
||||||
return True
|
|
||||||
else:
|
except Exception as e:
|
||||||
print(f"✗ Failed to download: {clean_folder}")
|
print(f"✗ Exception downloading {folder}: {e}")
|
||||||
print(f"Error: {result.stderr}")
|
return False
|
||||||
return False
|
|
||||||
|
def main():
|
||||||
except Exception as e:
|
print("Discovering folders from server...")
|
||||||
print(f"✗ Exception downloading {folder}: {e}")
|
folders = get_folders()
|
||||||
return False
|
|
||||||
|
if not folders:
|
||||||
def main():
|
print("No folders found on the server!")
|
||||||
print("Discovering folders from server...")
|
return
|
||||||
folders = get_folders()
|
|
||||||
|
existing_folders = []
|
||||||
if not folders:
|
new_folders = []
|
||||||
print("No folders found on the server!")
|
|
||||||
return
|
for folder in folders:
|
||||||
|
clean_folder = folder.rstrip('/')
|
||||||
existing_folders = []
|
if os.path.exists(clean_folder):
|
||||||
new_folders = []
|
existing_folders.append(clean_folder)
|
||||||
|
else:
|
||||||
for folder in folders:
|
new_folders.append(folder)
|
||||||
clean_folder = folder.rstrip('/')
|
|
||||||
if os.path.exists(clean_folder):
|
print(f"Found {len(folders)} total folders:")
|
||||||
existing_folders.append(clean_folder)
|
if existing_folders:
|
||||||
else:
|
print(f" Existing folders (will be skipped): {len(existing_folders)}")
|
||||||
new_folders.append(folder)
|
for folder in existing_folders:
|
||||||
|
print(f" - {folder}")
|
||||||
print(f"Found {len(folders)} total folders:")
|
if new_folders:
|
||||||
if existing_folders:
|
print(f" New folders to download: {len(new_folders)}")
|
||||||
print(f" Existing folders (will be skipped): {len(existing_folders)}")
|
for folder in new_folders:
|
||||||
for folder in existing_folders:
|
print(f" - {folder.rstrip('/')}")
|
||||||
print(f" - {folder}")
|
|
||||||
if new_folders:
|
if not new_folders:
|
||||||
print(f" New folders to download: {len(new_folders)}")
|
print("\nAll folders already exist. Nothing to download!")
|
||||||
for folder in new_folders:
|
return
|
||||||
print(f" - {folder.rstrip('/')}")
|
|
||||||
|
response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ")
|
||||||
if not new_folders:
|
if response.lower() != 'y':
|
||||||
print("\nAll folders already exist. Nothing to download!")
|
print("Download cancelled.")
|
||||||
return
|
return
|
||||||
|
|
||||||
response = input(f"\nDo you want to download these {len(new_folders)} new folders? (y/N): ")
|
print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...")
|
||||||
if response.lower() != 'y':
|
|
||||||
print("Download cancelled.")
|
success_count = 0
|
||||||
return
|
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||||
|
future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders}
|
||||||
print(f"\nStarting parallel downloads with {MAX_WORKERS} workers...")
|
|
||||||
|
for future in concurrent.futures.as_completed(future_to_folder):
|
||||||
success_count = 0
|
if future.result():
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
success_count += 1
|
||||||
future_to_folder = {executor.submit(download_folder, folder): folder for folder in new_folders}
|
|
||||||
|
print(f"\nDownload summary:")
|
||||||
for future in concurrent.futures.as_completed(future_to_folder):
|
print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders")
|
||||||
if future.result():
|
print(f" Files saved in: current directory")
|
||||||
success_count += 1
|
|
||||||
|
if __name__ == "__main__":
|
||||||
print(f"\nDownload summary:")
|
main()
|
||||||
print(f" Successfully downloaded: {success_count}/{len(new_folders)} folders")
|
|
||||||
print(f" Files saved in: current directory")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user