# -*- coding: utf-8 -*- """ Download images from itemImageURL stored in the database """ import os import requests import mysql.connector from pathlib import Path from urllib.parse import urlparse import hashlib # Database configuration DB_CONFIG = { 'host': 'localhost', 'port': 31175, 'user': 'root', 'password': 'vLuH6WhOTMm5O9CarrAX4S5F', 'database': 'aliexpress', 'charset': 'utf8mb4' } # Image folder IMG_FOLDER = 'img' def ensure_folder_exists(): """Create the img folder if it doesn't exist""" Path(IMG_FOLDER).mkdir(exist_ok=True) print(f"Image folder '{IMG_FOLDER}' ready") def get_image_urls_from_db(): """Retrieve all image URLs and their IDs from the database""" try: conn = mysql.connector.connect(**DB_CONFIG) cursor = conn.cursor() # Get image URLs with their IDs (not distinct, as we need to update each row) query = """ SELECT id, itemImageURL, orderNumber FROM items WHERE itemImageURL IS NOT NULL AND itemImageURL != '' AND (itemImage IS NULL OR itemImage = '') ORDER BY orderNumber """ cursor.execute(query) results = cursor.fetchall() cursor.close() conn.close() return results except mysql.connector.Error as err: print(f"Database error: {err}") return [] def generate_filename(url, item_id, order_number): """Generate a unique filename based on URL, item ID and order number""" # Extract file extension from URL parsed_url = urlparse(url) path = parsed_url.path ext = os.path.splitext(path)[1] # If no extension found, try to extract from query params or use .jpg as default if not ext or ext == '': if '.jpg' in url.lower(): ext = '.jpg' elif '.png' in url.lower(): ext = '.png' elif '.avif' in url.lower(): ext = '.avif' elif '.webp' in url.lower(): ext = '.webp' else: ext = '.jpg' # Default # Create a hash of the URL for uniqueness url_hash = hashlib.md5(url.encode()).hexdigest()[:8] # Combine item ID, order number and hash for filename filename = f"{item_id}_{order_number}_{url_hash}{ext}" return filename def download_image(url, filepath): """Download an image from URL and save to filepath""" try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } response = requests.get(url, headers=headers, timeout=30, stream=True) response.raise_for_status() # Write image to file with open(filepath, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) return True except requests.exceptions.RequestException as e: print(f" Error downloading: {e}") return False def update_item_image_path(item_id, relative_path): """Update the itemImage field in the database""" try: conn = mysql.connector.connect(**DB_CONFIG) cursor = conn.cursor() update_query = """ UPDATE items SET itemImage = %s WHERE id = %s """ cursor.execute(update_query, (relative_path, item_id)) conn.commit() cursor.close() conn.close() return True except mysql.connector.Error as err: print(f" Database error: {err}") return False def main(): """Main function to download all images""" print("Starting image download process...") # Ensure img folder exists ensure_folder_exists() # Get image URLs from database print("\nFetching image URLs from database...") image_data = get_image_urls_from_db() if not image_data: print("No image URLs found in database") return print(f"Found {len(image_data)} image(s) to download") # Download each image downloaded = 0 skipped = 0 failed = 0 for idx, (item_id, url, order_number) in enumerate(image_data, 1): filename = generate_filename(url, item_id, order_number or 'unknown') filepath = os.path.join(IMG_FOLDER, filename) relative_path = os.path.join(IMG_FOLDER, filename) # Check if file already exists if os.path.exists(filepath): print(f"[{idx}/{len(image_data)}] Skipped (already exists): {filename}") # Still update the database if not already set update_item_image_path(item_id, relative_path) skipped += 1 continue print(f"[{idx}/{len(image_data)}] Downloading: {filename}") if download_image(url, filepath): downloaded += 1 print(f" ✓ Saved to {filepath}") # Update database with local image path if update_item_image_path(item_id, relative_path): print(f" ✓ Updated database") else: print(f" ✗ Failed to update database") else: failed += 1 # Summary print("\n" + "="*50) print("Download Summary:") print(f" Downloaded: {downloaded}") print(f" Skipped (already exists): {skipped}") print(f" Failed: {failed}") print(f" Total: {len(image_data)}") print("="*50) if __name__ == '__main__': main()