Add daily-stone page showing a different mineral each day

New dailystone app with 207 minerals scraped from Wikipedia. Each day displays a different mineral with photos, formula, properties, description, and history. Page theme color matches the mineral's typical appearance. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 18:30:10 +03:00
parent a8ab5f6ce1
commit 0be99e8e9a
20 changed files with 6445 additions and 0 deletions
--- a/dailystone/management/commands/init.py
+++ b/dailystone/management/commands/init.py
--- a/dailystone/management/commands/export_minerals.py
+++ b/dailystone/management/commands/export_minerals.py
@@ -0,0 +1,22 @@
+"""
+Export mineral data as a JSON fixture for loading on the production server.
+
+Usage:
+    python manage.py export_minerals > dailystone/fixtures/minerals.json
+    python manage.py loaddata dailystone/fixtures/minerals.json
+"""
+import json
+import sys
+
+from django.core.management.base import BaseCommand
+from django.core import serializers
+
+from dailystone.models import Mineral
+
+
+class Command(BaseCommand):
+    help = 'Export mineral data as a Django fixture (JSON)'
+
+    def handle(self, *args, **options):
+        data = serializers.serialize('json', Mineral.objects.all(), indent=2)
+        self.stdout.write(data)
--- a/dailystone/management/commands/scrape_minerals.py
+++ b/dailystone/management/commands/scrape_minerals.py
@@ -0,0 +1,676 @@
+"""
+Scrape mineral data from Wikipedia for the Daily Stone feature.
+
+Usage:
+    python manage.py scrape_minerals          # scrape all ~365 minerals
+    python manage.py scrape_minerals --limit 10  # scrape first 10 only
+    python manage.py scrape_minerals --dry-run   # just list names, don't save
+"""
+import re
+import time
+import hashlib
+import json
+
+import requests
+from bs4 import BeautifulSoup
+from django.core.management.base import BaseCommand
+
+from dailystone.models import Mineral
+
+# Curated list of well-known, visually interesting minerals with approximate colors.
+# Color hex is a rough representative of the mineral's typical appearance.
+MINERAL_LIST = [
+    ("Quartz", "#f5f5f5"),
+    ("Amethyst", "#9b59b6"),
+    ("Rose quartz", "#f4a7b9"),
+    ("Citrine (quartz)", "#f0c420"),
+    ("Diamond", "#e8e8e8"),
+    ("Ruby", "#e0115f"),
+    ("Sapphire", "#0f52ba"),
+    ("Emerald", "#50c878"),
+    ("Topaz", "#ffc87c"),
+    ("Opal", "#a8c3bc"),
+    ("Turquoise (mineral)", "#40e0d0"),
+    ("Garnet", "#733635"),
+    ("Peridot", "#b4c424"),
+    ("Aquamarine", "#7fffd4"),
+    ("Tanzanite", "#4d5ba8"),
+    ("Malachite", "#0bda51"),
+    ("Lapis lazuli", "#26619c"),
+    ("Jade", "#00a86b"),
+    ("Obsidian", "#3d3635"),
+    ("Pyrite", "#c5a647"),
+    ("Hematite", "#5c5858"),
+    ("Magnetite", "#353535"),
+    ("Calcite", "#f5deb3"),
+    ("Fluorite", "#7b68ee"),
+    ("Apatite", "#509987"),
+    ("Beryl", "#c1f0c1"),
+    ("Spinel", "#ff4040"),
+    ("Zircon", "#c4b19e"),
+    ("Alexandrite", "#568c4c"),
+    ("Tourmaline", "#86c67c"),
+    ("Moonstone", "#c5cfe0"),
+    ("Sunstone", "#e07020"),
+    ("Labradorite", "#5678a0"),
+    ("Rhodonite", "#e87ea1"),
+    ("Rhodochrosite", "#e55b6e"),
+    ("Azurite", "#2d5da1"),
+    ("Chrysocolla", "#4cb9a0"),
+    ("Cuprite", "#a52a2a"),
+    ("Dioptase", "#209d7d"),
+    ("Wulfenite", "#e68a00"),
+    ("Vanadinite", "#cc3333"),
+    ("Crocoite", "#e74c3c"),
+    ("Realgar", "#e34234"),
+    ("Orpiment", "#e9a820"),
+    ("Stibnite", "#708090"),
+    ("Galena", "#6b6e70"),
+    ("Cinnabar", "#e44d2e"),
+    ("Barite", "#c8c8c0"),
+    ("Celestine (mineral)", "#a8d8ea"),
+    ("Gypsum", "#f0ece2"),
+    ("Halite", "#f0f0f0"),
+    ("Sylvite", "#e0c0a0"),
+    ("Sulfur", "#edda09"),
+    ("Copper", "#b87333"),
+    ("Gold", "#ffd700"),
+    ("Silver", "#c0c0c0"),
+    ("Platinum", "#e5e4e2"),
+    ("Bismuth", "#969696"),
+    ("Antimony", "#7b8c8a"),
+    ("Arsenic", "#808080"),
+    ("Graphite", "#474747"),
+    ("Corundum", "#d9413c"),
+    ("Spodumene", "#d8bfd8"),
+    ("Kunzite", "#e6a8d7"),
+    ("Hiddenite", "#98fb98"),
+    ("Chrysoberyl", "#e8d44d"),
+    ("Nephrite", "#638b57"),
+    ("Jadeite", "#00a86b"),
+    ("Serpentine subgroup", "#6b8e23"),
+    ("Chalcopyrite", "#b8860b"),
+    ("Bornite", "#8b6914"),
+    ("Covellite", "#4169e1"),
+    ("Molybdenite", "#6e6e6e"),
+    ("Sphalerite", "#a0522d"),
+    ("Wurtzite", "#8b4513"),
+    ("Cassiterite", "#5c4033"),
+    ("Rutile", "#b22222"),
+    ("Anatase", "#4682b4"),
+    ("Brookite", "#8b5e3b"),
+    ("Ilmenite", "#404040"),
+    ("Goethite", "#7b6b3a"),
+    ("Limonite", "#9a7b4f"),
+    ("Siderite", "#8b7d6b"),
+    ("Magnesite", "#ede6d6"),
+    ("Dolomite", "#dfc8a8"),
+    ("Aragonite", "#faebd7"),
+    ("Smithsonite", "#7ec8c8"),
+    ("Cerussite", "#c8c8c0"),
+    ("Witherite", "#e8e0d8"),
+    ("Strontianite", "#c8d8c0"),
+    ("Ankerite", "#c8b890"),
+    ("Olivine", "#9ab973"),
+    ("Forsterite", "#96be50"),
+    ("Fayalite", "#6b5c3e"),
+    ("Augite", "#2e4032"),
+    ("Diopside", "#507856"),
+    ("Enstatite", "#908870"),
+    ("Hypersthene", "#5b5e4e"),
+    ("Wollastonite", "#e8e0d8"),
+    ("Tremolite", "#e0e8d8"),
+    ("Actinolite", "#2d8b57"),
+    ("Hornblende", "#3b4838"),
+    ("Glaucophane", "#5b6db8"),
+    ("Riebeckite", "#2f4f4f"),
+    ("Muscovite", "#d4c48d"),
+    ("Biotite", "#4a3c28"),
+    ("Phlogopite", "#c4a35a"),
+    ("Lepidolite", "#c8a2c8"),
+    ("Talc", "#e8e8e0"),
+    ("Kaolinite", "#f0e8d8"),
+    ("Montmorillonite", "#c8b090"),
+    ("Vermiculite", "#b89c78"),
+    ("Chlorite group", "#6b8f47"),
+    ("Prehnite", "#c8e8a0"),
+    ("Epidote", "#7b8b2e"),
+    ("Zoisite", "#6b8b73"),
+    ("Clinozoisite", "#7b9b6b"),
+    ("Vesuvianite", "#6b8040"),
+    ("Pumpellyite", "#447744"),
+    ("Lawsonite", "#8090a0"),
+    ("Andalusite", "#b08080"),
+    ("Sillimanite", "#c8c0b8"),
+    ("Kyanite", "#5b8fbe"),
+    ("Staurolite", "#7b5b3b"),
+    ("Cordierite", "#6666aa"),
+    ("Sodalite", "#3c578e"),
+    ("Lazurite", "#26619c"),
+    ("Hauyne", "#4466bb"),
+    ("Leucite", "#d8d0c8"),
+    ("Nepheline", "#c8c0a8"),
+    ("Scapolite", "#d0c8b0"),
+    ("Danburite", "#e8e0d0"),
+    ("Datolite", "#d4e8d0"),
+    ("Titanite", "#b8a048"),
+    ("Dumortierite", "#4060a0"),
+    ("Hemimorphite", "#98d8e8"),
+    ("Willemite", "#70b020"),
+    ("Phenakite", "#e0e0d8"),
+    ("Euclase", "#80b8d8"),
+    ("Bertrandite", "#d8d0c0"),
+    ("Chrysoprase", "#79a868"),
+    ("Carnelian", "#b5462a"),
+    ("Jasper", "#ce4a2f"),
+    ("Agate", "#b0a090"),
+    ("Onyx", "#353839"),
+    ("Chalcedony", "#c8d0d8"),
+    ("Tiger's eye", "#b8860b"),
+    ("Hawk's eye", "#4c6c8c"),
+    ("Bloodstone", "#3b6e3f"),
+    ("Aventurine", "#568b52"),
+    ("Amazonite", "#4c8c7a"),
+    ("Larvikite", "#4a5060"),
+    ("Charoite", "#7b4e8a"),
+    ("Sugilite", "#8b4789"),
+    ("Larimar", "#88c8de"),
+    ("Pietersite", "#4a5c40"),
+    ("Moldavite", "#6b8e23"),
+    ("Tektite", "#3a3a3a"),
+    ("Shungite", "#2c2c2c"),
+    ("Seraphinite", "#4a7c5a"),
+    ("Astrophyllite", "#8b6c2a"),
+    ("Nuummite", "#3a3a3a"),
+    ("Howlite", "#e8e0d8"),
+    ("Magnesite", "#ede6d6"),
+    ("Sodalite", "#3c578e"),
+    ("Unakite", "#7a8a5a"),
+    ("Variscite", "#50b848"),
+    ("Wavellite", "#78b868"),
+    ("Vivianite", "#2e5e8e"),
+    ("Erythrite", "#d84888"),
+    ("Annabergite", "#58b858"),
+    ("Adamite", "#b8e830"),
+    ("Legrandite", "#e8d830"),
+    ("Aurichalcite", "#78c8b8"),
+    ("Rosasite", "#58a8a8"),
+    ("Hemimorphite", "#98d8e8"),
+    ("Cavansite", "#3070c8"),
+    ("Pentlandite", "#b8a830"),
+    ("Millerite", "#b8a040"),
+    ("Nickeline", "#c8a088"),
+    ("Skutterudite", "#808080"),
+    ("Cobaltite", "#808888"),
+    ("Arsenopyrite", "#808888"),
+    ("Marcasite", "#c0b838"),
+    ("Pyrrhotite", "#a09048"),
+    ("Pentlandite", "#b8a830"),
+    ("Chromite", "#404040"),
+    ("Spessartine", "#e86838"),
+    ("Almandine", "#a03050"),
+    ("Pyrope", "#c82040"),
+    ("Grossular", "#80b840"),
+    ("Andradite", "#686830"),
+    ("Uvarovite", "#388838"),
+    ("Tsavorite", "#38a848"),
+    ("Demantoid", "#58a838"),
+    ("Melanite", "#303030"),
+    ("Topazolite", "#d8c838"),
+    ("Schorl", "#2c2c2c"),
+    ("Elbaite", "#48b888"),
+    ("Dravite", "#8b6c3a"),
+    ("Indicolite", "#287888"),
+    ("Rubellite", "#c83868"),
+    ("Paraiba tourmaline", "#00b8c8"),
+    ("Watermelon tourmaline", "#78b858"),
+    ("Tephroite", "#7a6a50"),
+    ("Rhodolite", "#c84878"),
+    ("Iolite", "#5858a8"),
+    ("Scolecite", "#e8e0e0"),
+    ("Natrolite", "#e0e0d0"),
+    ("Stilbite", "#e8b898"),
+    ("Heulandite", "#e0c090"),
+    ("Apophyllite", "#c8e8d0"),
+    ("Analcime", "#e0e0d0"),
+    ("Chabazite", "#e0c898"),
+    ("Phillipsite", "#d0c8b0"),
+    ("Thomsonite", "#d8d0c0"),
+    ("Mesolite", "#e8e0d8"),
+    ("Laumontite", "#e0d0b0"),
+    ("Mordenite", "#e0d8c8"),
+    ("Clinoptilolite", "#d8d0c0"),
+    ("Erionite", "#e0d8d0"),
+    ("Colemanite", "#e0d8c8"),
+    ("Ulexite", "#f0e8e0"),
+    ("Borax", "#e8e0d8"),
+    ("Kernite", "#e0d8d0"),
+    ("Tincalconite", "#e8e0d8"),
+    ("Sassolite", "#e0e0d0"),
+    ("Boracite", "#c8d8c0"),
+    ("Sinhalite", "#c0a870"),
+    ("Kornerupine", "#587848"),
+    ("Grandidierite", "#4898a8"),
+    ("Serendibite", "#384838"),
+    ("Taaffeite", "#c888c8"),
+    ("Painite", "#a06040"),
+    ("Musgravite", "#808878"),
+    ("Jeremejevite", "#a8c8e0"),
+    ("Poudretteite", "#e8c0d8"),
+    ("Benitoite", "#3858c8"),
+    ("Neptunite", "#383028"),
+    ("Joaquinite", "#a88030"),
+    ("Sanbornite", "#e0d8c8"),
+    ("Fresnoite", "#e0d030"),
+    ("Celsian", "#d8d0c0"),
+    ("Hyalophane", "#d0c8b0"),
+    ("Harmotome", "#d8d0c0"),
+    ("Pectolite", "#d0e0e0"),
+    ("Okenite", "#f0e8e0"),
+    ("Gyrolite", "#e0e8d0"),
+    ("Tobermorite", "#d8d0c8"),
+    ("Xonotlite", "#e0d8d0"),
+    ("Thaumasite", "#e8e0d8"),
+    ("Ettringite", "#e8e838"),
+    ("Sturmanite", "#e8e030"),
+    ("Charlesite", "#e0e0c8"),
+    ("Afwillite", "#e0d8d0"),
+    ("Hillebrandite", "#e0d8c8"),
+    ("Foshagite", "#e0e0d0"),
+    ("Jennite", "#d8c8b8"),
+    ("Suolunite", "#d8d0c0"),
+    ("Rosenbuschite", "#c8a070"),
+    ("Eudialyte", "#c84860"),
+    ("Catapleiite", "#c8c0b0"),
+    ("Lorenzenite", "#584838"),
+    ("Ramsayite", "#685838"),
+    ("Lamprophyllite", "#a08030"),
+    ("Murmanite", "#907050"),
+    ("Lomonosovite", "#886040"),
+    ("Vuonnemite", "#b89050"),
+    ("Villiaumite", "#e8a030"),
+    ("Ussingite", "#c8a0b8"),
+    ("Chkalovite", "#d8d0c8"),
+    ("Tugtupite", "#e0586e"),
+    ("Sorensenite", "#d8d0c8"),
+    ("Tinguaite", "#586850"),
+    ("Cancrinite", "#e0c030"),
+    ("Vishnevite", "#c8c0a0"),
+    ("Davyne", "#d0c890"),
+    ("Microsommite", "#d0c890"),
+    ("Nosean", "#707888"),
+    ("Hackmanite", "#9870a0"),
+    ("Tugtupite", "#e0586e"),
+    ("Pargasite", "#386838"),
+    ("Edenite", "#507848"),
+    ("Kaersutite", "#483830"),
+    ("Richterite", "#586880"),
+    ("Winchite", "#607060"),
+    ("Barroisite", "#506860"),
+    ("Gedrite", "#606058"),
+    ("Anthophyllite", "#807860"),
+    ("Cummingtonite", "#787068"),
+    ("Grunerite", "#686058"),
+    ("Holmquistite", "#5868a0"),
+    ("Sapphirine", "#4060a8"),
+    ("Kornerupine", "#587848"),
+    ("Prismatine", "#586048"),
+    ("Boralsilite", "#d0c8b8"),
+    ("Werdingite", "#a8a098"),
+    ("Grandidierite", "#4898a8"),
+    ("Ominelite", "#404038"),
+    ("Serendibite", "#384838"),
+    ("Sinhalite", "#c0a870"),
+    ("Taafeite", "#c888c8"),
+    ("Musgravite", "#808878"),
+    ("Johachidolite", "#e0c070"),
+    ("Painite", "#a06040"),
+    ("Jeremejevite", "#a8c8e0"),
+    ("Poudretteite", "#e8c0d8"),
+    ("Benitoite", "#3858c8"),
+    ("Neptunite", "#383028"),
+    ("Joaquinite", "#a88030"),
+    ("Sanbornite", "#e0d8c8"),
+    ("Howlite", "#e8e0d8"),
+    ("Magnesite", "#ede6d6"),
+    ("Selenite (mineral)", "#f0eee0"),
+    ("Desert rose (crystal)", "#d8b890"),
+    ("Fulgurite", "#c8b898"),
+    ("Tektite", "#3a3a3a"),
+    ("Meteorite", "#686058"),
+    ("Pallasite", "#a09048"),
+    ("Kamacite", "#909090"),
+    ("Taenite", "#a0a0a0"),
+    ("Troilite", "#886838"),
+    ("Schreibersite", "#a0a098"),
+    ("Cohenite", "#686060"),
+    ("Moissanite", "#b8e8c8"),
+    ("Lonsdaleite", "#c8c8c0"),
+    ("Stishovite", "#d0d0c8"),
+    ("Coesite", "#c8c8c0"),
+    ("Seifertite", "#c0c0b8"),
+    ("Ringwoodite", "#5878c8"),
+    ("Bridgmanite", "#a0a098"),
+    ("Davemaoite", "#a8a0a0"),
+    ("Ice", "#e0f0f8"),
+    ("Dry ice", "#e8e8f0"),
+    ("Sal ammoniac", "#e0e0d8"),
+    ("Niter", "#e8e0d8"),
+    ("Natron", "#e0d8c8"),
+    ("Trona", "#d8d0c0"),
+    ("Thermonatrite", "#e0d8d0"),
+    ("Gaylussite", "#d8d0c0"),
+    ("Pirssonite", "#d0c8c0"),
+    ("Shortite", "#e0d838"),
+    ("Northupite", "#d8d0c0"),
+    ("Eitelite", "#d0c8c0"),
+    ("Bradleyite", "#c8c0b0"),
+    ("Tychite", "#c8c0b0"),
+    ("Schairerite", "#d0c8b8"),
+    ("Sulfohalite", "#d0c8b8"),
+    ("Kogarkoite", "#d0c8c0"),
+]
+
+# Deduplicate by name (keep first occurrence)
+_seen = set()
+_deduped = []
+for name, color in MINERAL_LIST:
+    key = name.lower()
+    if key not in _seen:
+        _seen.add(key)
+        _deduped.append((name, color))
+MINERAL_LIST = _deduped
+
+SESSION = None
+
+
+def get_session():
+    global SESSION
+    if SESSION is None:
+        SESSION = requests.Session()
+        SESSION.headers.update({
+            'User-Agent': 'DailyStoneBot/1.0 (k-boris.tech; educational mineral wiki)'
+        })
+    return SESSION
+
+
+def _request_with_backoff(session, url, params, timeout=30, max_retries=5):
+    """Make a request with backoff on 429 errors, respecting Retry-After."""
+    for attempt in range(max_retries):
+        resp = session.get(url, params=params, timeout=timeout)
+        if resp.status_code == 429:
+            retry_after = resp.headers.get('Retry-After')
+            if retry_after and retry_after.isdigit():
+                wait = min(int(retry_after) + 1, 120)  # Cap at 2 minutes
+            else:
+                wait = 10 * (2 ** attempt)  # 10, 20, 40, 80, 160
+            time.sleep(wait)
+            continue
+        resp.raise_for_status()
+        return resp
+    resp.raise_for_status()
+    return resp
+
+
+def get_wikipedia_page(title):
+    """Fetch parsed Wikipedia page via the API."""
+    session = get_session()
+    resp = _request_with_backoff(session, 'https://en.wikipedia.org/w/api.php', params={
+        'action': 'parse',
+        'page': title,
+        'prop': 'text|images',
+        'format': 'json',
+        'redirects': 1,
+    })
+    data = resp.json()
+    if 'error' in data:
+        return None
+    return data['parse']
+
+
+def get_image_urls(parse_data, limit=4):
+    """Get actual image URLs from the parsed page's image list."""
+    session = get_session()
+    images = parse_data.get('images', [])
+    # Filter out icons, logos, SVGs
+    good = [
+        img for img in images
+        if not any(skip in img.lower() for skip in [
+            'icon', 'logo', 'symbol', 'flag', 'commons-logo', 'wiki',
+            'question_mark', 'edit-clear', 'ambox', 'crystal_clear',
+            'lock-', 'padlock', 'red_pencil', 'text-', 'globe_',
+            'folder_', 'nuvola', 'gnome-', 'information', '.svg',
+            'wiktionary', 'disambig', 'merge-', 'split-', 'portal-',
+        ])
+    ]
+    if not good:
+        good = [img for img in images if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
+
+    urls = []
+    for img_name in good[:limit * 2]:
+        try:
+            resp = _request_with_backoff(session, 'https://en.wikipedia.org/w/api.php', params={
+                'action': 'query',
+                'titles': f'File:{img_name}',
+                'prop': 'imageinfo',
+                'iiprop': 'url|size',
+                'iiurlwidth': 800,
+                'format': 'json',
+            }, timeout=15)
+            pages = resp.json()['query']['pages']
+            for page in pages.values():
+                if 'imageinfo' in page:
+                    info = page['imageinfo'][0]
+                    thumb = info.get('thumburl', info.get('url', ''))
+                    if thumb:
+                        urls.append(thumb)
+                        if len(urls) >= limit:
+                            return urls
+        except Exception:
+            continue
+    return urls
+
+
+def extract_infobox(soup):
+    """Extract key-value pairs from a mineral infobox."""
+    info = {}
+    table = soup.find('table', class_='infobox')
+    if not table:
+        return info
+
+    for row in table.find_all('tr'):
+        th = row.find('th')
+        td = row.find('td')
+        if th and td:
+            key = th.get_text(strip=True).lower()
+            val = td.get_text(' ', strip=True)
+            info[key] = val
+    return info
+
+
+FIELD_MAPPINGS = {
+    'formula': [
+        'formula', 'chemical formula', 'idealformula',
+        'formula(repeating unit)', 'chemical', 'composition',
+    ],
+    'category': ['category', 'mineral class', 'classification', 'group'],
+    'crystal_system': ['crystal system', 'crystalsystem', 'crystal class', 'system'],
+    'mohs_hardness': ['mohs scalehardness', 'mohs scale hardness', 'hardness', 'mohs hardness'],
+    'luster': ['luster', 'lustre', 'luster (mineralogy)'],
+    'streak': ['streak', 'streak color'],
+    'specific_gravity': ['specific gravity', 'density', 'specificgravity', 'relative density'],
+    'color_description': ['color', 'colour', 'color/pleochroism'],
+}
+
+
+def match_field(info, candidates):
+    """Find the first matching key from candidates in the info dict."""
+    for c in candidates:
+        for key, val in info.items():
+            if c in key:
+                return val
+    return ''
+
+
+def _clean_text(text):
+    """Remove citation marks and normalize whitespace."""
+    text = re.sub(r'\[[\d,\s]+\]', '', text)
+    text = re.sub(r'\[citation needed\]', '', text, flags=re.IGNORECASE)
+    text = re.sub(r'\[clarification needed\]', '', text, flags=re.IGNORECASE)
+    # Normalize whitespace (collapse multiple spaces, fix space before punctuation)
+    text = re.sub(r'\s+', ' ', text)
+    text = re.sub(r'\s+([.,;:!?)])', r'\1', text)
+    text = re.sub(r'(\()\s+', r'\1', text)
+    return text.strip()
+
+
+def _find_heading_wrapper(tag):
+    """Return the wrapper div if the heading is inside mw-heading, else the tag itself."""
+    parent = tag.parent
+    if parent and parent.name == 'div' and 'mw-heading' in (parent.get('class') or []):
+        return parent
+    return tag
+
+
+def extract_description(soup):
+    """Get the first few paragraphs of the article (before any section heading)."""
+    paragraphs = []
+    for p in soup.find_all('p'):
+        text = p.get_text(' ', strip=True)
+        if len(text) > 50:
+            paragraphs.append(_clean_text(text))
+            if len(paragraphs) >= 3:
+                break
+    return '\n\n'.join(paragraphs)
+
+
+def _collect_section_paragraphs(start_element, max_paras=2):
+    """Collect paragraphs after a heading element until the next heading."""
+    parts = []
+    heading_classes = {'mw-heading', 'mw-heading2', 'mw-heading3'}
+    sibling = start_element.find_next_sibling()
+    while sibling:
+        # Stop at next heading (div.mw-heading or bare h2/h3)
+        if sibling.name in ['h2', 'h3']:
+            break
+        if sibling.name == 'div' and heading_classes & set(sibling.get('class') or []):
+            break
+        if sibling.name == 'p':
+            text = sibling.get_text(' ', strip=True)
+            if len(text) > 30:
+                parts.append(_clean_text(text))
+                if len(parts) >= max_paras:
+                    break
+        sibling = sibling.find_next_sibling()
+    return parts
+
+
+def extract_history(soup):
+    """Try to find history/etymology section."""
+    history_headers = ['history', 'etymology', 'discovery', 'naming', 'occurrence']
+
+    # Search both bare headings and headings inside mw-heading divs
+    for header_tag in soup.find_all(['h2', 'h3']):
+        header_text = header_tag.get_text(strip=True).lower()
+        header_text = re.sub(r'\[edit\]$', '', header_text).strip()
+        if any(h in header_text for h in history_headers):
+            wrapper = _find_heading_wrapper(header_tag)
+            parts = _collect_section_paragraphs(wrapper)
+            if parts:
+                return '\n\n'.join(parts)
+    return ''
+
+
+class Command(BaseCommand):
+    help = 'Scrape mineral data from Wikipedia'
+
+    def add_arguments(self, parser):
+        parser.add_argument('--limit', type=int, default=0, help='Max minerals to scrape (0 = all)')
+        parser.add_argument('--dry-run', action='store_true', help='List minerals without saving')
+        parser.add_argument('--skip-existing', action='store_true', help='Skip already-saved minerals')
+
+    def handle(self, *args, **options):
+        limit = options['limit']
+        dry_run = options['dry_run']
+        skip_existing = options['skip_existing']
+
+        minerals = MINERAL_LIST
+        if limit:
+            minerals = minerals[:limit]
+
+        self.stdout.write(f'Processing {len(minerals)} minerals...\n')
+
+        success = 0
+        skipped = 0
+        failed = 0
+
+        for i, (name, color_hex) in enumerate(minerals, 1):
+            display_name_check = re.sub(r'\s*\([^)]*\)\s*$', '', name).strip()
+            if skip_existing and Mineral.objects.filter(name=display_name_check).exists():
+                self.stdout.write(f'  [{i}/{len(minerals)}] SKIP {name} (already exists)')
+                skipped += 1
+                continue
+
+            if dry_run:
+                self.stdout.write(f'  [{i}/{len(minerals)}] {name} ({color_hex})')
+                continue
+
+            self.stdout.write(f'  [{i}/{len(minerals)}] Scraping {name}...', ending='')
+
+            try:
+                parsed = get_wikipedia_page(name)
+                if not parsed:
+                    self.stdout.write(self.style.WARNING(' NOT FOUND'))
+                    failed += 1
+                    continue
+
+                html = parsed['text']['*']
+                soup = BeautifulSoup(html, 'html.parser')
+
+                # Skip actual disambiguation pages (they have the dmbox class)
+                if soup.find('table', id='disambigbox') or soup.find('div', class_='dmbox'):
+                    self.stdout.write(self.style.WARNING(' DISAMBIGUATION - SKIPPED'))
+                    failed += 1
+                    continue
+                info = extract_infobox(soup)
+
+                # Strip Wikipedia disambiguation suffixes from display name
+                display_name = re.sub(r'\s*\([^)]*\)\s*$', '', name).strip()
+
+                image_urls = get_image_urls(parsed, limit=4)
+                description = extract_description(soup)
+                history = extract_history(soup)
+
+                mineral, created = Mineral.objects.update_or_create(
+                    name=display_name,
+                    defaults={
+                        'formula': match_field(info, FIELD_MAPPINGS['formula'])[:200],
+                        'category': match_field(info, FIELD_MAPPINGS['category'])[:200],
+                        'crystal_system': match_field(info, FIELD_MAPPINGS['crystal_system'])[:200],
+                        'mohs_hardness': match_field(info, FIELD_MAPPINGS['mohs_hardness'])[:50],
+                        'luster': match_field(info, FIELD_MAPPINGS['luster'])[:200],
+                        'streak': match_field(info, FIELD_MAPPINGS['streak'])[:200],
+                        'specific_gravity': match_field(info, FIELD_MAPPINGS['specific_gravity'])[:100],
+                        'color_description': match_field(info, FIELD_MAPPINGS['color_description'])[:300],
+                        'color_hex': color_hex,
+                        'description': description,
+                        'history': history,
+                        'image_urls': image_urls,
+                        'wikipedia_url': f'https://en.wikipedia.org/wiki/{name.replace(" ", "_")}',
+                        'day_of_year': i,
+                    },
+                )
+
+                status = 'CREATED' if created else 'UPDATED'
+                img_count = len(image_urls)
+                self.stdout.write(self.style.SUCCESS(f' {status} ({img_count} images)'))
+                success += 1
+
+            except Exception as e:
+                self.stdout.write(self.style.ERROR(f' ERROR: {e}'))
+                failed += 1
+
+            # Be polite to Wikipedia — ~3s between minerals keeps us under rate limits
+            time.sleep(3)
+
+        self.stdout.write(f'\nDone: {success} saved, {skipped} skipped, {failed} failed')