Files
Maison/AliExpress/download_images.py
2026-02-10 12:12:11 +01:00

197 lines
5.4 KiB
Python

# -*- coding: utf-8 -*-
"""
Download images from itemImageURL stored in the database
"""
import os
import requests
import mysql.connector
from pathlib import Path
from urllib.parse import urlparse
import hashlib
# Database configuration
DB_CONFIG = {
'host': 'localhost',
'port': 31175,
'user': 'root',
'password': 'vLuH6WhOTMm5O9CarrAX4S5F',
'database': 'aliexpress',
'charset': 'utf8mb4'
}
# Image folder
IMG_FOLDER = 'img'
def ensure_folder_exists():
"""Create the img folder if it doesn't exist"""
Path(IMG_FOLDER).mkdir(exist_ok=True)
print(f"Image folder '{IMG_FOLDER}' ready")
def get_image_urls_from_db():
"""Retrieve all image URLs and their IDs from the database"""
try:
conn = mysql.connector.connect(**DB_CONFIG)
cursor = conn.cursor()
# Get image URLs with their IDs (not distinct, as we need to update each row)
query = """
SELECT id, itemImageURL, orderNumber
FROM items
WHERE itemImageURL IS NOT NULL AND itemImageURL != ''
AND (itemImage IS NULL OR itemImage = '')
ORDER BY orderNumber
"""
cursor.execute(query)
results = cursor.fetchall()
cursor.close()
conn.close()
return results
except mysql.connector.Error as err:
print(f"Database error: {err}")
return []
def generate_filename(url, item_id, order_number):
"""Generate a unique filename based on URL, item ID and order number"""
# Extract file extension from URL
parsed_url = urlparse(url)
path = parsed_url.path
ext = os.path.splitext(path)[1]
# If no extension found, try to extract from query params or use .jpg as default
if not ext or ext == '':
if '.jpg' in url.lower():
ext = '.jpg'
elif '.png' in url.lower():
ext = '.png'
elif '.avif' in url.lower():
ext = '.avif'
elif '.webp' in url.lower():
ext = '.webp'
else:
ext = '.jpg' # Default
# Create a hash of the URL for uniqueness
url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
# Combine item ID, order number and hash for filename
filename = f"{item_id}_{order_number}_{url_hash}{ext}"
return filename
def download_image(url, filepath):
"""Download an image from URL and save to filepath"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, headers=headers, timeout=30, stream=True)
response.raise_for_status()
# Write image to file
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
return True
except requests.exceptions.RequestException as e:
print(f" Error downloading: {e}")
return False
def update_item_image_path(item_id, relative_path):
"""Update the itemImage field in the database"""
try:
conn = mysql.connector.connect(**DB_CONFIG)
cursor = conn.cursor()
update_query = """
UPDATE items
SET itemImage = %s
WHERE id = %s
"""
cursor.execute(update_query, (relative_path, item_id))
conn.commit()
cursor.close()
conn.close()
return True
except mysql.connector.Error as err:
print(f" Database error: {err}")
return False
def main():
"""Main function to download all images"""
print("Starting image download process...")
# Ensure img folder exists
ensure_folder_exists()
# Get image URLs from database
print("\nFetching image URLs from database...")
image_data = get_image_urls_from_db()
if not image_data:
print("No image URLs found in database")
return
print(f"Found {len(image_data)} image(s) to download")
# Download each image
downloaded = 0
skipped = 0
failed = 0
for idx, (item_id, url, order_number) in enumerate(image_data, 1):
filename = generate_filename(url, item_id, order_number or 'unknown')
filepath = os.path.join(IMG_FOLDER, filename)
relative_path = os.path.join(IMG_FOLDER, filename)
# Check if file already exists
if os.path.exists(filepath):
print(f"[{idx}/{len(image_data)}] Skipped (already exists): {filename}")
# Still update the database if not already set
update_item_image_path(item_id, relative_path)
skipped += 1
continue
print(f"[{idx}/{len(image_data)}] Downloading: {filename}")
if download_image(url, filepath):
downloaded += 1
print(f" ✓ Saved to {filepath}")
# Update database with local image path
if update_item_image_path(item_id, relative_path):
print(f" ✓ Updated database")
else:
print(f" ✗ Failed to update database")
else:
failed += 1
# Summary
print("\n" + "="*50)
print("Download Summary:")
print(f" Downloaded: {downloaded}")
print(f" Skipped (already exists): {skipped}")
print(f" Failed: {failed}")
print(f" Total: {len(image_data)}")
print("="*50)
if __name__ == '__main__':
main()