Files
Maison/AliExpress/parse_orders.py
2026-02-10 12:12:11 +01:00

294 lines
9.4 KiB
Python

# -*- coding: utf-8 -*-
"""
AliExpress Order Parser
Extracts order information from HTML and inserts into MariaDB
"""
import re
from bs4 import BeautifulSoup
from datetime import datetime
import mysql.connector
from decimal import Decimal
# Database configuration
DB_CONFIG = {
'host': 'localhost',
'port': 31175, # Change to your MariaDB port if different
'user': 'root', # Change to your MariaDB username
'password': 'vLuH6WhOTMm5O9CarrAX4S5F', # Change to your MariaDB password
'database': 'aliexpress', # Change to your database name
'charset': 'utf8mb4'
}
def parse_french_date(date_str):
"""
Convert French date format to US format (YYYY-MM-DD)
Example: "3 janv. 2026" -> "2026-01-03"
"""
month_map = {
'janv.': '01', 'févr.': '02', 'mars': '03', 'avr.': '04',
'mai': '05', 'juin': '06', 'juil.': '07', 'août': '08',
'sept.': '09', 'oct.': '10', 'nov.': '11', 'déc.': '12'
}
# Extract day, month, year from string like "3 janv. 2026"
parts = date_str.strip().split()
if len(parts) >= 3:
day = parts[0]
month_fr = parts[1]
year = parts[2]
month = month_map.get(month_fr, '01')
return f"{year}-{month}-{day.zfill(2)}"
return None
def parse_price(price_str):
"""
Convert French price format to decimal
Example: "1,29€" -> 1.29
"""
if not price_str:
return None
# Remove € and spaces, replace comma with dot
price_str = price_str.replace('', '').replace(' ', '').replace(',', '.').strip()
try:
return Decimal(price_str)
except:
return None
def extract_quantity(quantity_str):
"""
Extract quantity from string like "x1" or "x2"
"""
if not quantity_str:
return 1
match = re.search(r'x(\d+)', quantity_str)
if match:
return int(match.group(1))
return 1
def extract_image_url(style_str):
"""
Extract image URL from style attribute
Example: background-image: url("https://...")
"""
if not style_str:
return None
match = re.search(r'url\(["\']?(https?://[^"\']+)["\']?\)', style_str)
if match:
return match.group(1)
return None
def parse_orders_html(html_file):
"""
Parse the HTML file and extract order information
"""
with open(html_file, 'r', encoding='utf-8') as f:
html_content = f.read()
soup = BeautifulSoup(html_content, 'html.parser')
orders = []
# Find all order items
order_items = soup.find_all('div', class_='order-item')
for order_item in order_items:
try:
# Extract order date
order_date_elem = order_item.find('div', string=re.compile(r'Commande passée le:'))
order_date_str = None
if order_date_elem:
# Get the text after "Commande passée le: "
date_text = order_date_elem.text.replace('Commande passée le:', '').strip()
order_date_str = parse_french_date(date_text)
# Extract order number
order_number_elem = order_item.find('div', string=re.compile(r'Numéro de commande:'))
order_number = None
if order_number_elem:
# Extract the number
match = re.search(r'(\d{16})', order_number_elem.text)
if match:
order_number = match.group(1)
# Extract order detail URL
order_url = None
detail_link = order_item.find('a', {'data-pl': 'order_item_header_detail'})
if detail_link:
order_url = detail_link.get('href', '')
# Find all items in this order
content_items = order_item.find_all('div', class_='order-item-content-body')
for content_item in content_items:
# Extract item description
item_desc_elem = content_item.find('div', class_='order-item-content-info-name')
item_desc = item_desc_elem.get_text(strip=True) if item_desc_elem else None
# Extract item price
item_price_elem = content_item.find('div', class_='order-item-content-info-number')
item_price = None
if item_price_elem:
price_text = item_price_elem.get_text(strip=True)
# Extract price (e.g., "1,29€")
price_match = re.search(r'([\d,]+)\s*€', price_text)
if price_match:
item_price = parse_price(price_match.group(1) + '')
# Extract quantity
quantity_elem = content_item.find('span', class_='order-item-content-info-number-quantity')
quantity = extract_quantity(quantity_elem.get_text() if quantity_elem else 'x1')
# Extract image URL
image_elem = content_item.find('div', class_='order-item-content-img')
item_image_url = None
if image_elem:
style = image_elem.get('style', '')
item_image_url = extract_image_url(style)
# Extract order total
order_total_elem = order_item.find('span', class_='order-item-content-opt-price-total')
order_total = None
if order_total_elem:
total_text = order_total_elem.get_text(strip=True)
# Extract total (e.g., "Total:3,45€")
total_match = re.search(r'([\d,]+)\s*€', total_text)
if total_match:
order_total = parse_price(total_match.group(1) + '')
# Create order record
order_data = {
'orderDate': order_date_str,
'orderNumber': order_number,
'orderURL': order_url,
'itemDesc': item_desc,
'itemPrice': item_price,
'itemQuantity': quantity,
'itemImageURL': item_image_url,
'orderTotal': order_total
}
orders.append(order_data)
except Exception as e:
print(f"Error parsing order item: {e}")
continue
return orders
def create_database_table(cursor):
"""
Create the 'items' table in MariaDB
"""
create_table_sql = """
CREATE TABLE IF NOT EXISTS items (
id INT AUTO_INCREMENT PRIMARY KEY,
orderDate DATE,
orderNumber VARCHAR(20),
orderURL VARCHAR(500),
itemDesc TEXT,
itemPrice DECIMAL(10, 2),
itemQuantity INT,
itemImageURL VARCHAR(500),
itemImage VARCHAR(255),
orderTotal DECIMAL(10, 2),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_orderNumber (orderNumber),
INDEX idx_orderDate (orderDate)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
"""
cursor.execute(create_table_sql)
print("Table 'items' created successfully (or already exists)")
def insert_orders(cursor, orders):
"""
Insert orders into the database
"""
insert_sql = """
INSERT INTO items (orderDate, orderNumber, orderURL, itemDesc, itemPrice,
itemQuantity, itemImageURL, orderTotal)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
"""
inserted_count = 0
for order in orders:
try:
cursor.execute(insert_sql, (
order['orderDate'],
order['orderNumber'],
order['orderURL'],
order['itemDesc'],
order['itemPrice'],
order['itemQuantity'],
order['itemImageURL'],
order['orderTotal']
))
inserted_count += 1
except Exception as e:
print(f"Error inserting order {order.get('orderNumber')}: {e}")
return inserted_count
def main():
"""
Main function to parse HTML and insert into database
"""
html_file = r'Commandes.htm'
print("Parsing HTML file...")
orders = parse_orders_html(html_file)
print(f"Found {len(orders)} order items")
# Display sample order
if orders:
print("\nSample order:")
sample = orders[0]
for key, value in sample.items():
print(f" {key}: {value}")
# Connect to MariaDB
try:
print("\nConnecting to MariaDB...")
conn = mysql.connector.connect(**DB_CONFIG)
cursor = conn.cursor()
# Create table
create_database_table(cursor)
# Insert orders
print(f"\nInserting {len(orders)} orders into database...")
inserted = insert_orders(cursor, orders)
# Commit changes
conn.commit()
print(f"Successfully inserted {inserted} orders")
# Close connection
cursor.close()
conn.close()
print("Database connection closed")
except mysql.connector.Error as err:
print(f"Database error: {err}")
except Exception as e:
print(f"Error: {e}")
if __name__ == '__main__':
main()