Ajout Images
This commit is contained in:
66
SD/extract_css_images.py
Normal file
66
SD/extract_css_images.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import re
|
||||
import os
|
||||
import base64
|
||||
|
||||
|
||||
CSS_FILE = os.path.join(os.path.dirname(__file__), 'css', 'style.css')
|
||||
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'Imgs')
|
||||
|
||||
print(f"Reading CSS file: {CSS_FILE}")
|
||||
if not os.path.exists(OUTPUT_DIR):
|
||||
os.makedirs(OUTPUT_DIR)
|
||||
|
||||
with open(CSS_FILE, 'r') as f:
|
||||
css = f.read()
|
||||
print(f"First 1000 chars of CSS:\n{css[:1000]}")
|
||||
|
||||
# Regex to match: img.class { ... content: url(data:image/png;base64,....); ... }
|
||||
|
||||
# Loosened regex: match any img.class { ... content: url(data:image/...) ... } block, regardless of property order or spacing
|
||||
pattern = re.compile(
|
||||
r'img\\.([a-zA-Z0-9_]+)\\s*{{[^}}]*?content\s*:\s*url\\s*\\(\\s*data:image/(png|bmp|jpeg|jpg);base64,([A-Za-z0-9+/=]+)\\)[^}}]*}}',
|
||||
re.DOTALL)
|
||||
|
||||
matches = list(pattern.finditer(css))
|
||||
print(f"Found {len(matches)} images in CSS with main regex.")
|
||||
if len(matches) == 0:
|
||||
print("WARNING: No images found with main regex. Printing first and last 2000 chars of CSS for debug:")
|
||||
print("--- FIRST 2000 CHARS ---\n" + css[:2000])
|
||||
print("--- LAST 2000 CHARS ---\n" + css[-2000:])
|
||||
# Try to print a sample img.class block for debugging
|
||||
import re as _re
|
||||
# Print the exact img.class block containing 'content: url(data:image' for further debugging
|
||||
# Print the first img.class block with content:url for debug
|
||||
img_content_block = _re.search(r'(img\\.[a-zA-Z0-9_]+\\s*{{[^}}]*content\\s*:\\s*url\\s*\\(\\s*data:image/[^;]+;base64,([A-Za-z0-9+/=\\s]+)\\)[^}}]*}})', css, _re.DOTALL)
|
||||
if img_content_block:
|
||||
print("--- img.class BLOCK WITH BASE64 ---\n" + img_content_block.group(1))
|
||||
b64_sample = img_content_block.group(2)
|
||||
print(f"Base64 sample length: {len(b64_sample)}")
|
||||
print(f"Base64 sample (first 200 chars): {b64_sample[:200]}")
|
||||
else:
|
||||
print("No img.class block with content: url(data:image...) found.")
|
||||
# Fallback: match any data:image base64 in the file, allowing for whitespace and newlines in base64
|
||||
fallback_pattern = re.compile(r'url\\s*\\(\\s*data:image/(png|bmp|jpeg|jpg);base64,([A-Za-z0-9+/=\\s]+?)\\)', re.DOTALL)
|
||||
fallback_matches = list(fallback_pattern.finditer(css))
|
||||
print(f"Fallback found {len(fallback_matches)} images.")
|
||||
for i, match in enumerate(fallback_matches):
|
||||
img_type, b64data = match.groups()
|
||||
ext = 'jpg' if img_type == 'jpeg' else img_type
|
||||
out_path = os.path.join(OUTPUT_DIR, f"fallback_image_{i+1}.{ext}")
|
||||
try:
|
||||
clean_b64 = ''.join(b64data.split())
|
||||
with open(out_path, 'wb') as imgf:
|
||||
imgf.write(base64.b64decode(clean_b64.strip()))
|
||||
print(f"Extracted {out_path} (type: {img_type})")
|
||||
except Exception as e:
|
||||
print(f"Failed to extract {out_path}: {e}")
|
||||
else:
|
||||
for match in matches:
|
||||
class_name, img_type, b64data = match.groups()
|
||||
ext = 'jpg' if img_type == 'jpeg' else img_type
|
||||
out_path = os.path.join(OUTPUT_DIR, f"{class_name}.{ext}")
|
||||
with open(out_path, 'wb') as imgf:
|
||||
imgf.write(base64.b64decode(b64data.strip()))
|
||||
print(f"Extracted {out_path} (class: {class_name}, type: {img_type})")
|
||||
|
||||
print("Done.")
|
||||
Reference in New Issue
Block a user