197 lines
5.4 KiB
Python
197 lines
5.4 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
Download images from itemImageURL stored in the database
|
|
"""
|
|
|
|
import os
|
|
import requests
|
|
import mysql.connector
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
import hashlib
|
|
|
|
# Database configuration
|
|
DB_CONFIG = {
|
|
'host': 'localhost',
|
|
'port': 31175,
|
|
'user': 'root',
|
|
'password': 'vLuH6WhOTMm5O9CarrAX4S5F',
|
|
'database': 'aliexpress',
|
|
'charset': 'utf8mb4'
|
|
}
|
|
|
|
# Image folder
|
|
IMG_FOLDER = 'img'
|
|
|
|
|
|
def ensure_folder_exists():
|
|
"""Create the img folder if it doesn't exist"""
|
|
Path(IMG_FOLDER).mkdir(exist_ok=True)
|
|
print(f"Image folder '{IMG_FOLDER}' ready")
|
|
|
|
|
|
def get_image_urls_from_db():
|
|
"""Retrieve all image URLs and their IDs from the database"""
|
|
try:
|
|
conn = mysql.connector.connect(**DB_CONFIG)
|
|
cursor = conn.cursor()
|
|
|
|
# Get image URLs with their IDs (not distinct, as we need to update each row)
|
|
query = """
|
|
SELECT id, itemImageURL, orderNumber
|
|
FROM items
|
|
WHERE itemImageURL IS NOT NULL AND itemImageURL != ''
|
|
AND (itemImage IS NULL OR itemImage = '')
|
|
ORDER BY orderNumber
|
|
"""
|
|
|
|
cursor.execute(query)
|
|
results = cursor.fetchall()
|
|
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
return results
|
|
|
|
except mysql.connector.Error as err:
|
|
print(f"Database error: {err}")
|
|
return []
|
|
|
|
|
|
def generate_filename(url, item_id, order_number):
|
|
"""Generate a unique filename based on URL, item ID and order number"""
|
|
# Extract file extension from URL
|
|
parsed_url = urlparse(url)
|
|
path = parsed_url.path
|
|
ext = os.path.splitext(path)[1]
|
|
|
|
# If no extension found, try to extract from query params or use .jpg as default
|
|
if not ext or ext == '':
|
|
if '.jpg' in url.lower():
|
|
ext = '.jpg'
|
|
elif '.png' in url.lower():
|
|
ext = '.png'
|
|
elif '.avif' in url.lower():
|
|
ext = '.avif'
|
|
elif '.webp' in url.lower():
|
|
ext = '.webp'
|
|
else:
|
|
ext = '.jpg' # Default
|
|
|
|
# Create a hash of the URL for uniqueness
|
|
url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
|
|
|
|
# Combine item ID, order number and hash for filename
|
|
filename = f"{item_id}_{order_number}_{url_hash}{ext}"
|
|
|
|
return filename
|
|
|
|
|
|
def download_image(url, filepath):
|
|
"""Download an image from URL and save to filepath"""
|
|
try:
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
}
|
|
|
|
response = requests.get(url, headers=headers, timeout=30, stream=True)
|
|
response.raise_for_status()
|
|
|
|
# Write image to file
|
|
with open(filepath, 'wb') as f:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
f.write(chunk)
|
|
|
|
return True
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f" Error downloading: {e}")
|
|
return False
|
|
|
|
|
|
def update_item_image_path(item_id, relative_path):
|
|
"""Update the itemImage field in the database"""
|
|
try:
|
|
conn = mysql.connector.connect(**DB_CONFIG)
|
|
cursor = conn.cursor()
|
|
|
|
update_query = """
|
|
UPDATE items
|
|
SET itemImage = %s
|
|
WHERE id = %s
|
|
"""
|
|
|
|
cursor.execute(update_query, (relative_path, item_id))
|
|
conn.commit()
|
|
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
return True
|
|
|
|
except mysql.connector.Error as err:
|
|
print(f" Database error: {err}")
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Main function to download all images"""
|
|
print("Starting image download process...")
|
|
|
|
# Ensure img folder exists
|
|
ensure_folder_exists()
|
|
|
|
# Get image URLs from database
|
|
print("\nFetching image URLs from database...")
|
|
image_data = get_image_urls_from_db()
|
|
|
|
if not image_data:
|
|
print("No image URLs found in database")
|
|
return
|
|
|
|
print(f"Found {len(image_data)} image(s) to download")
|
|
|
|
# Download each image
|
|
downloaded = 0
|
|
skipped = 0
|
|
failed = 0
|
|
|
|
for idx, (item_id, url, order_number) in enumerate(image_data, 1):
|
|
filename = generate_filename(url, item_id, order_number or 'unknown')
|
|
filepath = os.path.join(IMG_FOLDER, filename)
|
|
relative_path = os.path.join(IMG_FOLDER, filename)
|
|
|
|
# Check if file already exists
|
|
if os.path.exists(filepath):
|
|
print(f"[{idx}/{len(image_data)}] Skipped (already exists): {filename}")
|
|
# Still update the database if not already set
|
|
update_item_image_path(item_id, relative_path)
|
|
skipped += 1
|
|
continue
|
|
|
|
print(f"[{idx}/{len(image_data)}] Downloading: {filename}")
|
|
|
|
if download_image(url, filepath):
|
|
downloaded += 1
|
|
print(f" ✓ Saved to {filepath}")
|
|
# Update database with local image path
|
|
if update_item_image_path(item_id, relative_path):
|
|
print(f" ✓ Updated database")
|
|
else:
|
|
print(f" ✗ Failed to update database")
|
|
else:
|
|
failed += 1
|
|
|
|
# Summary
|
|
print("\n" + "="*50)
|
|
print("Download Summary:")
|
|
print(f" Downloaded: {downloaded}")
|
|
print(f" Skipped (already exists): {skipped}")
|
|
print(f" Failed: {failed}")
|
|
print(f" Total: {len(image_data)}")
|
|
print("="*50)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|