# -*- coding: utf-8 -*- """ AliExpress Order Parser Extracts order information from HTML and inserts into MariaDB """ import re from bs4 import BeautifulSoup from datetime import datetime import mysql.connector from decimal import Decimal # Database configuration DB_CONFIG = { 'host': 'localhost', 'port': 31175, # Change to your MariaDB port if different 'user': 'root', # Change to your MariaDB username 'password': 'vLuH6WhOTMm5O9CarrAX4S5F', # Change to your MariaDB password 'database': 'aliexpress', # Change to your database name 'charset': 'utf8mb4' } def parse_french_date(date_str): """ Convert French date format to US format (YYYY-MM-DD) Example: "3 janv. 2026" -> "2026-01-03" """ month_map = { 'janv.': '01', 'févr.': '02', 'mars': '03', 'avr.': '04', 'mai': '05', 'juin': '06', 'juil.': '07', 'août': '08', 'sept.': '09', 'oct.': '10', 'nov.': '11', 'déc.': '12' } # Extract day, month, year from string like "3 janv. 2026" parts = date_str.strip().split() if len(parts) >= 3: day = parts[0] month_fr = parts[1] year = parts[2] month = month_map.get(month_fr, '01') return f"{year}-{month}-{day.zfill(2)}" return None def parse_price(price_str): """ Convert French price format to decimal Example: "1,29€" -> 1.29 """ if not price_str: return None # Remove € and spaces, replace comma with dot price_str = price_str.replace('€', '').replace(' ', '').replace(',', '.').strip() try: return Decimal(price_str) except: return None def extract_quantity(quantity_str): """ Extract quantity from string like "x1" or "x2" """ if not quantity_str: return 1 match = re.search(r'x(\d+)', quantity_str) if match: return int(match.group(1)) return 1 def extract_image_url(style_str): """ Extract image URL from style attribute Example: background-image: url("https://...") """ if not style_str: return None match = re.search(r'url\(["\']?(https?://[^"\']+)["\']?\)', style_str) if match: return match.group(1) return None def parse_orders_html(html_file): """ Parse the HTML file and extract order information """ with open(html_file, 'r', encoding='utf-8') as f: html_content = f.read() soup = BeautifulSoup(html_content, 'html.parser') orders = [] # Find all order items order_items = soup.find_all('div', class_='order-item') for order_item in order_items: try: # Extract order date order_date_elem = order_item.find('div', string=re.compile(r'Commande passée le:')) order_date_str = None if order_date_elem: # Get the text after "Commande passée le: " date_text = order_date_elem.text.replace('Commande passée le:', '').strip() order_date_str = parse_french_date(date_text) # Extract order number order_number_elem = order_item.find('div', string=re.compile(r'Numéro de commande:')) order_number = None if order_number_elem: # Extract the number match = re.search(r'(\d{16})', order_number_elem.text) if match: order_number = match.group(1) # Extract order detail URL order_url = None detail_link = order_item.find('a', {'data-pl': 'order_item_header_detail'}) if detail_link: order_url = detail_link.get('href', '') # Find all items in this order content_items = order_item.find_all('div', class_='order-item-content-body') for content_item in content_items: # Extract item description item_desc_elem = content_item.find('div', class_='order-item-content-info-name') item_desc = item_desc_elem.get_text(strip=True) if item_desc_elem else None # Extract item price item_price_elem = content_item.find('div', class_='order-item-content-info-number') item_price = None if item_price_elem: price_text = item_price_elem.get_text(strip=True) # Extract price (e.g., "1,29€") price_match = re.search(r'([\d,]+)\s*€', price_text) if price_match: item_price = parse_price(price_match.group(1) + '€') # Extract quantity quantity_elem = content_item.find('span', class_='order-item-content-info-number-quantity') quantity = extract_quantity(quantity_elem.get_text() if quantity_elem else 'x1') # Extract image URL image_elem = content_item.find('div', class_='order-item-content-img') item_image_url = None if image_elem: style = image_elem.get('style', '') item_image_url = extract_image_url(style) # Extract order total order_total_elem = order_item.find('span', class_='order-item-content-opt-price-total') order_total = None if order_total_elem: total_text = order_total_elem.get_text(strip=True) # Extract total (e.g., "Total:3,45€") total_match = re.search(r'([\d,]+)\s*€', total_text) if total_match: order_total = parse_price(total_match.group(1) + '€') # Create order record order_data = { 'orderDate': order_date_str, 'orderNumber': order_number, 'orderURL': order_url, 'itemDesc': item_desc, 'itemPrice': item_price, 'itemQuantity': quantity, 'itemImageURL': item_image_url, 'orderTotal': order_total } orders.append(order_data) except Exception as e: print(f"Error parsing order item: {e}") continue return orders def create_database_table(cursor): """ Create the 'items' table in MariaDB """ create_table_sql = """ CREATE TABLE IF NOT EXISTS items ( id INT AUTO_INCREMENT PRIMARY KEY, orderDate DATE, orderNumber VARCHAR(20), orderURL VARCHAR(500), itemDesc TEXT, itemPrice DECIMAL(10, 2), itemQuantity INT, itemImageURL VARCHAR(500), itemImage VARCHAR(255), orderTotal DECIMAL(10, 2), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, INDEX idx_orderNumber (orderNumber), INDEX idx_orderDate (orderDate) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; """ cursor.execute(create_table_sql) print("Table 'items' created successfully (or already exists)") def insert_orders(cursor, orders): """ Insert orders into the database """ insert_sql = """ INSERT INTO items (orderDate, orderNumber, orderURL, itemDesc, itemPrice, itemQuantity, itemImageURL, orderTotal) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """ inserted_count = 0 for order in orders: try: cursor.execute(insert_sql, ( order['orderDate'], order['orderNumber'], order['orderURL'], order['itemDesc'], order['itemPrice'], order['itemQuantity'], order['itemImageURL'], order['orderTotal'] )) inserted_count += 1 except Exception as e: print(f"Error inserting order {order.get('orderNumber')}: {e}") return inserted_count def main(): """ Main function to parse HTML and insert into database """ html_file = r'Commandes.htm' print("Parsing HTML file...") orders = parse_orders_html(html_file) print(f"Found {len(orders)} order items") # Display sample order if orders: print("\nSample order:") sample = orders[0] for key, value in sample.items(): print(f" {key}: {value}") # Connect to MariaDB try: print("\nConnecting to MariaDB...") conn = mysql.connector.connect(**DB_CONFIG) cursor = conn.cursor() # Create table create_database_table(cursor) # Insert orders print(f"\nInserting {len(orders)} orders into database...") inserted = insert_orders(cursor, orders) # Commit changes conn.commit() print(f"Successfully inserted {inserted} orders") # Close connection cursor.close() conn.close() print("Database connection closed") except mysql.connector.Error as err: print(f"Database error: {err}") except Exception as e: print(f"Error: {e}") if __name__ == '__main__': main()