Initialisation depot
This commit is contained in:
293
AliExpress/parse_orders.py
Normal file
293
AliExpress/parse_orders.py
Normal file
@@ -0,0 +1,293 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AliExpress Order Parser
|
||||
Extracts order information from HTML and inserts into MariaDB
|
||||
"""
|
||||
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
import mysql.connector
|
||||
from decimal import Decimal
|
||||
|
||||
# Database configuration
|
||||
DB_CONFIG = {
|
||||
'host': 'localhost',
|
||||
'port': 31175, # Change to your MariaDB port if different
|
||||
'user': 'root', # Change to your MariaDB username
|
||||
'password': 'vLuH6WhOTMm5O9CarrAX4S5F', # Change to your MariaDB password
|
||||
'database': 'aliexpress', # Change to your database name
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
def parse_french_date(date_str):
|
||||
"""
|
||||
Convert French date format to US format (YYYY-MM-DD)
|
||||
Example: "3 janv. 2026" -> "2026-01-03"
|
||||
"""
|
||||
month_map = {
|
||||
'janv.': '01', 'févr.': '02', 'mars': '03', 'avr.': '04',
|
||||
'mai': '05', 'juin': '06', 'juil.': '07', 'août': '08',
|
||||
'sept.': '09', 'oct.': '10', 'nov.': '11', 'déc.': '12'
|
||||
}
|
||||
|
||||
# Extract day, month, year from string like "3 janv. 2026"
|
||||
parts = date_str.strip().split()
|
||||
if len(parts) >= 3:
|
||||
day = parts[0]
|
||||
month_fr = parts[1]
|
||||
year = parts[2]
|
||||
|
||||
month = month_map.get(month_fr, '01')
|
||||
return f"{year}-{month}-{day.zfill(2)}"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_price(price_str):
|
||||
"""
|
||||
Convert French price format to decimal
|
||||
Example: "1,29€" -> 1.29
|
||||
"""
|
||||
if not price_str:
|
||||
return None
|
||||
|
||||
# Remove € and spaces, replace comma with dot
|
||||
price_str = price_str.replace('€', '').replace(' ', '').replace(',', '.').strip()
|
||||
|
||||
try:
|
||||
return Decimal(price_str)
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
def extract_quantity(quantity_str):
|
||||
"""
|
||||
Extract quantity from string like "x1" or "x2"
|
||||
"""
|
||||
if not quantity_str:
|
||||
return 1
|
||||
|
||||
match = re.search(r'x(\d+)', quantity_str)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def extract_image_url(style_str):
|
||||
"""
|
||||
Extract image URL from style attribute
|
||||
Example: background-image: url("https://...")
|
||||
"""
|
||||
if not style_str:
|
||||
return None
|
||||
|
||||
match = re.search(r'url\(["\']?(https?://[^"\']+)["\']?\)', style_str)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_orders_html(html_file):
|
||||
"""
|
||||
Parse the HTML file and extract order information
|
||||
"""
|
||||
with open(html_file, 'r', encoding='utf-8') as f:
|
||||
html_content = f.read()
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
orders = []
|
||||
|
||||
# Find all order items
|
||||
order_items = soup.find_all('div', class_='order-item')
|
||||
|
||||
for order_item in order_items:
|
||||
try:
|
||||
# Extract order date
|
||||
order_date_elem = order_item.find('div', string=re.compile(r'Commande passée le:'))
|
||||
order_date_str = None
|
||||
if order_date_elem:
|
||||
# Get the text after "Commande passée le: "
|
||||
date_text = order_date_elem.text.replace('Commande passée le:', '').strip()
|
||||
order_date_str = parse_french_date(date_text)
|
||||
|
||||
# Extract order number
|
||||
order_number_elem = order_item.find('div', string=re.compile(r'Numéro de commande:'))
|
||||
order_number = None
|
||||
if order_number_elem:
|
||||
# Extract the number
|
||||
match = re.search(r'(\d{16})', order_number_elem.text)
|
||||
if match:
|
||||
order_number = match.group(1)
|
||||
|
||||
# Extract order detail URL
|
||||
order_url = None
|
||||
detail_link = order_item.find('a', {'data-pl': 'order_item_header_detail'})
|
||||
if detail_link:
|
||||
order_url = detail_link.get('href', '')
|
||||
|
||||
# Find all items in this order
|
||||
content_items = order_item.find_all('div', class_='order-item-content-body')
|
||||
|
||||
for content_item in content_items:
|
||||
# Extract item description
|
||||
item_desc_elem = content_item.find('div', class_='order-item-content-info-name')
|
||||
item_desc = item_desc_elem.get_text(strip=True) if item_desc_elem else None
|
||||
|
||||
# Extract item price
|
||||
item_price_elem = content_item.find('div', class_='order-item-content-info-number')
|
||||
item_price = None
|
||||
if item_price_elem:
|
||||
price_text = item_price_elem.get_text(strip=True)
|
||||
# Extract price (e.g., "1,29€")
|
||||
price_match = re.search(r'([\d,]+)\s*€', price_text)
|
||||
if price_match:
|
||||
item_price = parse_price(price_match.group(1) + '€')
|
||||
|
||||
# Extract quantity
|
||||
quantity_elem = content_item.find('span', class_='order-item-content-info-number-quantity')
|
||||
quantity = extract_quantity(quantity_elem.get_text() if quantity_elem else 'x1')
|
||||
|
||||
# Extract image URL
|
||||
image_elem = content_item.find('div', class_='order-item-content-img')
|
||||
item_image_url = None
|
||||
if image_elem:
|
||||
style = image_elem.get('style', '')
|
||||
item_image_url = extract_image_url(style)
|
||||
|
||||
# Extract order total
|
||||
order_total_elem = order_item.find('span', class_='order-item-content-opt-price-total')
|
||||
order_total = None
|
||||
if order_total_elem:
|
||||
total_text = order_total_elem.get_text(strip=True)
|
||||
# Extract total (e.g., "Total:3,45€")
|
||||
total_match = re.search(r'([\d,]+)\s*€', total_text)
|
||||
if total_match:
|
||||
order_total = parse_price(total_match.group(1) + '€')
|
||||
|
||||
# Create order record
|
||||
order_data = {
|
||||
'orderDate': order_date_str,
|
||||
'orderNumber': order_number,
|
||||
'orderURL': order_url,
|
||||
'itemDesc': item_desc,
|
||||
'itemPrice': item_price,
|
||||
'itemQuantity': quantity,
|
||||
'itemImageURL': item_image_url,
|
||||
'orderTotal': order_total
|
||||
}
|
||||
|
||||
orders.append(order_data)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error parsing order item: {e}")
|
||||
continue
|
||||
|
||||
return orders
|
||||
|
||||
|
||||
def create_database_table(cursor):
|
||||
"""
|
||||
Create the 'items' table in MariaDB
|
||||
"""
|
||||
create_table_sql = """
|
||||
CREATE TABLE IF NOT EXISTS items (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
orderDate DATE,
|
||||
orderNumber VARCHAR(20),
|
||||
orderURL VARCHAR(500),
|
||||
itemDesc TEXT,
|
||||
itemPrice DECIMAL(10, 2),
|
||||
itemQuantity INT,
|
||||
itemImageURL VARCHAR(500),
|
||||
itemImage VARCHAR(255),
|
||||
orderTotal DECIMAL(10, 2),
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
INDEX idx_orderNumber (orderNumber),
|
||||
INDEX idx_orderDate (orderDate)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|
||||
"""
|
||||
|
||||
cursor.execute(create_table_sql)
|
||||
print("Table 'items' created successfully (or already exists)")
|
||||
|
||||
|
||||
def insert_orders(cursor, orders):
|
||||
"""
|
||||
Insert orders into the database
|
||||
"""
|
||||
insert_sql = """
|
||||
INSERT INTO items (orderDate, orderNumber, orderURL, itemDesc, itemPrice,
|
||||
itemQuantity, itemImageURL, orderTotal)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
inserted_count = 0
|
||||
for order in orders:
|
||||
try:
|
||||
cursor.execute(insert_sql, (
|
||||
order['orderDate'],
|
||||
order['orderNumber'],
|
||||
order['orderURL'],
|
||||
order['itemDesc'],
|
||||
order['itemPrice'],
|
||||
order['itemQuantity'],
|
||||
order['itemImageURL'],
|
||||
order['orderTotal']
|
||||
))
|
||||
inserted_count += 1
|
||||
except Exception as e:
|
||||
print(f"Error inserting order {order.get('orderNumber')}: {e}")
|
||||
|
||||
return inserted_count
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main function to parse HTML and insert into database
|
||||
"""
|
||||
html_file = r'Commandes.htm'
|
||||
|
||||
print("Parsing HTML file...")
|
||||
orders = parse_orders_html(html_file)
|
||||
print(f"Found {len(orders)} order items")
|
||||
|
||||
# Display sample order
|
||||
if orders:
|
||||
print("\nSample order:")
|
||||
sample = orders[0]
|
||||
for key, value in sample.items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
# Connect to MariaDB
|
||||
try:
|
||||
print("\nConnecting to MariaDB...")
|
||||
conn = mysql.connector.connect(**DB_CONFIG)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create table
|
||||
create_database_table(cursor)
|
||||
|
||||
# Insert orders
|
||||
print(f"\nInserting {len(orders)} orders into database...")
|
||||
inserted = insert_orders(cursor, orders)
|
||||
|
||||
# Commit changes
|
||||
conn.commit()
|
||||
print(f"Successfully inserted {inserted} orders")
|
||||
|
||||
# Close connection
|
||||
cursor.close()
|
||||
conn.close()
|
||||
print("Database connection closed")
|
||||
|
||||
except mysql.connector.Error as err:
|
||||
print(f"Database error: {err}")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user