Add daily-stone page showing a different mineral each day
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

New dailystone app with 207 minerals scraped from Wikipedia.
Each day displays a different mineral with photos, formula,
properties, description, and history. Page theme color matches
the mineral's typical appearance.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-30 18:30:10 +03:00
parent a8ab5f6ce1
commit 0be99e8e9a
20 changed files with 6445 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
"""
Export mineral data as a JSON fixture for loading on the production server.
Usage:
python manage.py export_minerals > dailystone/fixtures/minerals.json
python manage.py loaddata dailystone/fixtures/minerals.json
"""
import json
import sys
from django.core.management.base import BaseCommand
from django.core import serializers
from dailystone.models import Mineral
class Command(BaseCommand):
help = 'Export mineral data as a Django fixture (JSON)'
def handle(self, *args, **options):
data = serializers.serialize('json', Mineral.objects.all(), indent=2)
self.stdout.write(data)

View File

@@ -0,0 +1,676 @@
"""
Scrape mineral data from Wikipedia for the Daily Stone feature.
Usage:
python manage.py scrape_minerals # scrape all ~365 minerals
python manage.py scrape_minerals --limit 10 # scrape first 10 only
python manage.py scrape_minerals --dry-run # just list names, don't save
"""
import re
import time
import hashlib
import json
import requests
from bs4 import BeautifulSoup
from django.core.management.base import BaseCommand
from dailystone.models import Mineral
# Curated list of well-known, visually interesting minerals with approximate colors.
# Color hex is a rough representative of the mineral's typical appearance.
MINERAL_LIST = [
("Quartz", "#f5f5f5"),
("Amethyst", "#9b59b6"),
("Rose quartz", "#f4a7b9"),
("Citrine (quartz)", "#f0c420"),
("Diamond", "#e8e8e8"),
("Ruby", "#e0115f"),
("Sapphire", "#0f52ba"),
("Emerald", "#50c878"),
("Topaz", "#ffc87c"),
("Opal", "#a8c3bc"),
("Turquoise (mineral)", "#40e0d0"),
("Garnet", "#733635"),
("Peridot", "#b4c424"),
("Aquamarine", "#7fffd4"),
("Tanzanite", "#4d5ba8"),
("Malachite", "#0bda51"),
("Lapis lazuli", "#26619c"),
("Jade", "#00a86b"),
("Obsidian", "#3d3635"),
("Pyrite", "#c5a647"),
("Hematite", "#5c5858"),
("Magnetite", "#353535"),
("Calcite", "#f5deb3"),
("Fluorite", "#7b68ee"),
("Apatite", "#509987"),
("Beryl", "#c1f0c1"),
("Spinel", "#ff4040"),
("Zircon", "#c4b19e"),
("Alexandrite", "#568c4c"),
("Tourmaline", "#86c67c"),
("Moonstone", "#c5cfe0"),
("Sunstone", "#e07020"),
("Labradorite", "#5678a0"),
("Rhodonite", "#e87ea1"),
("Rhodochrosite", "#e55b6e"),
("Azurite", "#2d5da1"),
("Chrysocolla", "#4cb9a0"),
("Cuprite", "#a52a2a"),
("Dioptase", "#209d7d"),
("Wulfenite", "#e68a00"),
("Vanadinite", "#cc3333"),
("Crocoite", "#e74c3c"),
("Realgar", "#e34234"),
("Orpiment", "#e9a820"),
("Stibnite", "#708090"),
("Galena", "#6b6e70"),
("Cinnabar", "#e44d2e"),
("Barite", "#c8c8c0"),
("Celestine (mineral)", "#a8d8ea"),
("Gypsum", "#f0ece2"),
("Halite", "#f0f0f0"),
("Sylvite", "#e0c0a0"),
("Sulfur", "#edda09"),
("Copper", "#b87333"),
("Gold", "#ffd700"),
("Silver", "#c0c0c0"),
("Platinum", "#e5e4e2"),
("Bismuth", "#969696"),
("Antimony", "#7b8c8a"),
("Arsenic", "#808080"),
("Graphite", "#474747"),
("Corundum", "#d9413c"),
("Spodumene", "#d8bfd8"),
("Kunzite", "#e6a8d7"),
("Hiddenite", "#98fb98"),
("Chrysoberyl", "#e8d44d"),
("Nephrite", "#638b57"),
("Jadeite", "#00a86b"),
("Serpentine subgroup", "#6b8e23"),
("Chalcopyrite", "#b8860b"),
("Bornite", "#8b6914"),
("Covellite", "#4169e1"),
("Molybdenite", "#6e6e6e"),
("Sphalerite", "#a0522d"),
("Wurtzite", "#8b4513"),
("Cassiterite", "#5c4033"),
("Rutile", "#b22222"),
("Anatase", "#4682b4"),
("Brookite", "#8b5e3b"),
("Ilmenite", "#404040"),
("Goethite", "#7b6b3a"),
("Limonite", "#9a7b4f"),
("Siderite", "#8b7d6b"),
("Magnesite", "#ede6d6"),
("Dolomite", "#dfc8a8"),
("Aragonite", "#faebd7"),
("Smithsonite", "#7ec8c8"),
("Cerussite", "#c8c8c0"),
("Witherite", "#e8e0d8"),
("Strontianite", "#c8d8c0"),
("Ankerite", "#c8b890"),
("Olivine", "#9ab973"),
("Forsterite", "#96be50"),
("Fayalite", "#6b5c3e"),
("Augite", "#2e4032"),
("Diopside", "#507856"),
("Enstatite", "#908870"),
("Hypersthene", "#5b5e4e"),
("Wollastonite", "#e8e0d8"),
("Tremolite", "#e0e8d8"),
("Actinolite", "#2d8b57"),
("Hornblende", "#3b4838"),
("Glaucophane", "#5b6db8"),
("Riebeckite", "#2f4f4f"),
("Muscovite", "#d4c48d"),
("Biotite", "#4a3c28"),
("Phlogopite", "#c4a35a"),
("Lepidolite", "#c8a2c8"),
("Talc", "#e8e8e0"),
("Kaolinite", "#f0e8d8"),
("Montmorillonite", "#c8b090"),
("Vermiculite", "#b89c78"),
("Chlorite group", "#6b8f47"),
("Prehnite", "#c8e8a0"),
("Epidote", "#7b8b2e"),
("Zoisite", "#6b8b73"),
("Clinozoisite", "#7b9b6b"),
("Vesuvianite", "#6b8040"),
("Pumpellyite", "#447744"),
("Lawsonite", "#8090a0"),
("Andalusite", "#b08080"),
("Sillimanite", "#c8c0b8"),
("Kyanite", "#5b8fbe"),
("Staurolite", "#7b5b3b"),
("Cordierite", "#6666aa"),
("Sodalite", "#3c578e"),
("Lazurite", "#26619c"),
("Hauyne", "#4466bb"),
("Leucite", "#d8d0c8"),
("Nepheline", "#c8c0a8"),
("Scapolite", "#d0c8b0"),
("Danburite", "#e8e0d0"),
("Datolite", "#d4e8d0"),
("Titanite", "#b8a048"),
("Dumortierite", "#4060a0"),
("Hemimorphite", "#98d8e8"),
("Willemite", "#70b020"),
("Phenakite", "#e0e0d8"),
("Euclase", "#80b8d8"),
("Bertrandite", "#d8d0c0"),
("Chrysoprase", "#79a868"),
("Carnelian", "#b5462a"),
("Jasper", "#ce4a2f"),
("Agate", "#b0a090"),
("Onyx", "#353839"),
("Chalcedony", "#c8d0d8"),
("Tiger's eye", "#b8860b"),
("Hawk's eye", "#4c6c8c"),
("Bloodstone", "#3b6e3f"),
("Aventurine", "#568b52"),
("Amazonite", "#4c8c7a"),
("Larvikite", "#4a5060"),
("Charoite", "#7b4e8a"),
("Sugilite", "#8b4789"),
("Larimar", "#88c8de"),
("Pietersite", "#4a5c40"),
("Moldavite", "#6b8e23"),
("Tektite", "#3a3a3a"),
("Shungite", "#2c2c2c"),
("Seraphinite", "#4a7c5a"),
("Astrophyllite", "#8b6c2a"),
("Nuummite", "#3a3a3a"),
("Howlite", "#e8e0d8"),
("Magnesite", "#ede6d6"),
("Sodalite", "#3c578e"),
("Unakite", "#7a8a5a"),
("Variscite", "#50b848"),
("Wavellite", "#78b868"),
("Vivianite", "#2e5e8e"),
("Erythrite", "#d84888"),
("Annabergite", "#58b858"),
("Adamite", "#b8e830"),
("Legrandite", "#e8d830"),
("Aurichalcite", "#78c8b8"),
("Rosasite", "#58a8a8"),
("Hemimorphite", "#98d8e8"),
("Cavansite", "#3070c8"),
("Pentlandite", "#b8a830"),
("Millerite", "#b8a040"),
("Nickeline", "#c8a088"),
("Skutterudite", "#808080"),
("Cobaltite", "#808888"),
("Arsenopyrite", "#808888"),
("Marcasite", "#c0b838"),
("Pyrrhotite", "#a09048"),
("Pentlandite", "#b8a830"),
("Chromite", "#404040"),
("Spessartine", "#e86838"),
("Almandine", "#a03050"),
("Pyrope", "#c82040"),
("Grossular", "#80b840"),
("Andradite", "#686830"),
("Uvarovite", "#388838"),
("Tsavorite", "#38a848"),
("Demantoid", "#58a838"),
("Melanite", "#303030"),
("Topazolite", "#d8c838"),
("Schorl", "#2c2c2c"),
("Elbaite", "#48b888"),
("Dravite", "#8b6c3a"),
("Indicolite", "#287888"),
("Rubellite", "#c83868"),
("Paraiba tourmaline", "#00b8c8"),
("Watermelon tourmaline", "#78b858"),
("Tephroite", "#7a6a50"),
("Rhodolite", "#c84878"),
("Iolite", "#5858a8"),
("Scolecite", "#e8e0e0"),
("Natrolite", "#e0e0d0"),
("Stilbite", "#e8b898"),
("Heulandite", "#e0c090"),
("Apophyllite", "#c8e8d0"),
("Analcime", "#e0e0d0"),
("Chabazite", "#e0c898"),
("Phillipsite", "#d0c8b0"),
("Thomsonite", "#d8d0c0"),
("Mesolite", "#e8e0d8"),
("Laumontite", "#e0d0b0"),
("Mordenite", "#e0d8c8"),
("Clinoptilolite", "#d8d0c0"),
("Erionite", "#e0d8d0"),
("Colemanite", "#e0d8c8"),
("Ulexite", "#f0e8e0"),
("Borax", "#e8e0d8"),
("Kernite", "#e0d8d0"),
("Tincalconite", "#e8e0d8"),
("Sassolite", "#e0e0d0"),
("Boracite", "#c8d8c0"),
("Sinhalite", "#c0a870"),
("Kornerupine", "#587848"),
("Grandidierite", "#4898a8"),
("Serendibite", "#384838"),
("Taaffeite", "#c888c8"),
("Painite", "#a06040"),
("Musgravite", "#808878"),
("Jeremejevite", "#a8c8e0"),
("Poudretteite", "#e8c0d8"),
("Benitoite", "#3858c8"),
("Neptunite", "#383028"),
("Joaquinite", "#a88030"),
("Sanbornite", "#e0d8c8"),
("Fresnoite", "#e0d030"),
("Celsian", "#d8d0c0"),
("Hyalophane", "#d0c8b0"),
("Harmotome", "#d8d0c0"),
("Pectolite", "#d0e0e0"),
("Okenite", "#f0e8e0"),
("Gyrolite", "#e0e8d0"),
("Tobermorite", "#d8d0c8"),
("Xonotlite", "#e0d8d0"),
("Thaumasite", "#e8e0d8"),
("Ettringite", "#e8e838"),
("Sturmanite", "#e8e030"),
("Charlesite", "#e0e0c8"),
("Afwillite", "#e0d8d0"),
("Hillebrandite", "#e0d8c8"),
("Foshagite", "#e0e0d0"),
("Jennite", "#d8c8b8"),
("Suolunite", "#d8d0c0"),
("Rosenbuschite", "#c8a070"),
("Eudialyte", "#c84860"),
("Catapleiite", "#c8c0b0"),
("Lorenzenite", "#584838"),
("Ramsayite", "#685838"),
("Lamprophyllite", "#a08030"),
("Murmanite", "#907050"),
("Lomonosovite", "#886040"),
("Vuonnemite", "#b89050"),
("Villiaumite", "#e8a030"),
("Ussingite", "#c8a0b8"),
("Chkalovite", "#d8d0c8"),
("Tugtupite", "#e0586e"),
("Sorensenite", "#d8d0c8"),
("Tinguaite", "#586850"),
("Cancrinite", "#e0c030"),
("Vishnevite", "#c8c0a0"),
("Davyne", "#d0c890"),
("Microsommite", "#d0c890"),
("Nosean", "#707888"),
("Hackmanite", "#9870a0"),
("Tugtupite", "#e0586e"),
("Pargasite", "#386838"),
("Edenite", "#507848"),
("Kaersutite", "#483830"),
("Richterite", "#586880"),
("Winchite", "#607060"),
("Barroisite", "#506860"),
("Gedrite", "#606058"),
("Anthophyllite", "#807860"),
("Cummingtonite", "#787068"),
("Grunerite", "#686058"),
("Holmquistite", "#5868a0"),
("Sapphirine", "#4060a8"),
("Kornerupine", "#587848"),
("Prismatine", "#586048"),
("Boralsilite", "#d0c8b8"),
("Werdingite", "#a8a098"),
("Grandidierite", "#4898a8"),
("Ominelite", "#404038"),
("Serendibite", "#384838"),
("Sinhalite", "#c0a870"),
("Taafeite", "#c888c8"),
("Musgravite", "#808878"),
("Johachidolite", "#e0c070"),
("Painite", "#a06040"),
("Jeremejevite", "#a8c8e0"),
("Poudretteite", "#e8c0d8"),
("Benitoite", "#3858c8"),
("Neptunite", "#383028"),
("Joaquinite", "#a88030"),
("Sanbornite", "#e0d8c8"),
("Howlite", "#e8e0d8"),
("Magnesite", "#ede6d6"),
("Selenite (mineral)", "#f0eee0"),
("Desert rose (crystal)", "#d8b890"),
("Fulgurite", "#c8b898"),
("Tektite", "#3a3a3a"),
("Meteorite", "#686058"),
("Pallasite", "#a09048"),
("Kamacite", "#909090"),
("Taenite", "#a0a0a0"),
("Troilite", "#886838"),
("Schreibersite", "#a0a098"),
("Cohenite", "#686060"),
("Moissanite", "#b8e8c8"),
("Lonsdaleite", "#c8c8c0"),
("Stishovite", "#d0d0c8"),
("Coesite", "#c8c8c0"),
("Seifertite", "#c0c0b8"),
("Ringwoodite", "#5878c8"),
("Bridgmanite", "#a0a098"),
("Davemaoite", "#a8a0a0"),
("Ice", "#e0f0f8"),
("Dry ice", "#e8e8f0"),
("Sal ammoniac", "#e0e0d8"),
("Niter", "#e8e0d8"),
("Natron", "#e0d8c8"),
("Trona", "#d8d0c0"),
("Thermonatrite", "#e0d8d0"),
("Gaylussite", "#d8d0c0"),
("Pirssonite", "#d0c8c0"),
("Shortite", "#e0d838"),
("Northupite", "#d8d0c0"),
("Eitelite", "#d0c8c0"),
("Bradleyite", "#c8c0b0"),
("Tychite", "#c8c0b0"),
("Schairerite", "#d0c8b8"),
("Sulfohalite", "#d0c8b8"),
("Kogarkoite", "#d0c8c0"),
]
# Deduplicate by name (keep first occurrence)
_seen = set()
_deduped = []
for name, color in MINERAL_LIST:
key = name.lower()
if key not in _seen:
_seen.add(key)
_deduped.append((name, color))
MINERAL_LIST = _deduped
SESSION = None
def get_session():
global SESSION
if SESSION is None:
SESSION = requests.Session()
SESSION.headers.update({
'User-Agent': 'DailyStoneBot/1.0 (k-boris.tech; educational mineral wiki)'
})
return SESSION
def _request_with_backoff(session, url, params, timeout=30, max_retries=5):
"""Make a request with backoff on 429 errors, respecting Retry-After."""
for attempt in range(max_retries):
resp = session.get(url, params=params, timeout=timeout)
if resp.status_code == 429:
retry_after = resp.headers.get('Retry-After')
if retry_after and retry_after.isdigit():
wait = min(int(retry_after) + 1, 120) # Cap at 2 minutes
else:
wait = 10 * (2 ** attempt) # 10, 20, 40, 80, 160
time.sleep(wait)
continue
resp.raise_for_status()
return resp
resp.raise_for_status()
return resp
def get_wikipedia_page(title):
"""Fetch parsed Wikipedia page via the API."""
session = get_session()
resp = _request_with_backoff(session, 'https://en.wikipedia.org/w/api.php', params={
'action': 'parse',
'page': title,
'prop': 'text|images',
'format': 'json',
'redirects': 1,
})
data = resp.json()
if 'error' in data:
return None
return data['parse']
def get_image_urls(parse_data, limit=4):
"""Get actual image URLs from the parsed page's image list."""
session = get_session()
images = parse_data.get('images', [])
# Filter out icons, logos, SVGs
good = [
img for img in images
if not any(skip in img.lower() for skip in [
'icon', 'logo', 'symbol', 'flag', 'commons-logo', 'wiki',
'question_mark', 'edit-clear', 'ambox', 'crystal_clear',
'lock-', 'padlock', 'red_pencil', 'text-', 'globe_',
'folder_', 'nuvola', 'gnome-', 'information', '.svg',
'wiktionary', 'disambig', 'merge-', 'split-', 'portal-',
])
]
if not good:
good = [img for img in images if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
urls = []
for img_name in good[:limit * 2]:
try:
resp = _request_with_backoff(session, 'https://en.wikipedia.org/w/api.php', params={
'action': 'query',
'titles': f'File:{img_name}',
'prop': 'imageinfo',
'iiprop': 'url|size',
'iiurlwidth': 800,
'format': 'json',
}, timeout=15)
pages = resp.json()['query']['pages']
for page in pages.values():
if 'imageinfo' in page:
info = page['imageinfo'][0]
thumb = info.get('thumburl', info.get('url', ''))
if thumb:
urls.append(thumb)
if len(urls) >= limit:
return urls
except Exception:
continue
return urls
def extract_infobox(soup):
"""Extract key-value pairs from a mineral infobox."""
info = {}
table = soup.find('table', class_='infobox')
if not table:
return info
for row in table.find_all('tr'):
th = row.find('th')
td = row.find('td')
if th and td:
key = th.get_text(strip=True).lower()
val = td.get_text(' ', strip=True)
info[key] = val
return info
FIELD_MAPPINGS = {
'formula': [
'formula', 'chemical formula', 'idealformula',
'formula(repeating unit)', 'chemical', 'composition',
],
'category': ['category', 'mineral class', 'classification', 'group'],
'crystal_system': ['crystal system', 'crystalsystem', 'crystal class', 'system'],
'mohs_hardness': ['mohs scalehardness', 'mohs scale hardness', 'hardness', 'mohs hardness'],
'luster': ['luster', 'lustre', 'luster (mineralogy)'],
'streak': ['streak', 'streak color'],
'specific_gravity': ['specific gravity', 'density', 'specificgravity', 'relative density'],
'color_description': ['color', 'colour', 'color/pleochroism'],
}
def match_field(info, candidates):
"""Find the first matching key from candidates in the info dict."""
for c in candidates:
for key, val in info.items():
if c in key:
return val
return ''
def _clean_text(text):
"""Remove citation marks and normalize whitespace."""
text = re.sub(r'\[[\d,\s]+\]', '', text)
text = re.sub(r'\[citation needed\]', '', text, flags=re.IGNORECASE)
text = re.sub(r'\[clarification needed\]', '', text, flags=re.IGNORECASE)
# Normalize whitespace (collapse multiple spaces, fix space before punctuation)
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'\s+([.,;:!?)])', r'\1', text)
text = re.sub(r'(\()\s+', r'\1', text)
return text.strip()
def _find_heading_wrapper(tag):
"""Return the wrapper div if the heading is inside mw-heading, else the tag itself."""
parent = tag.parent
if parent and parent.name == 'div' and 'mw-heading' in (parent.get('class') or []):
return parent
return tag
def extract_description(soup):
"""Get the first few paragraphs of the article (before any section heading)."""
paragraphs = []
for p in soup.find_all('p'):
text = p.get_text(' ', strip=True)
if len(text) > 50:
paragraphs.append(_clean_text(text))
if len(paragraphs) >= 3:
break
return '\n\n'.join(paragraphs)
def _collect_section_paragraphs(start_element, max_paras=2):
"""Collect paragraphs after a heading element until the next heading."""
parts = []
heading_classes = {'mw-heading', 'mw-heading2', 'mw-heading3'}
sibling = start_element.find_next_sibling()
while sibling:
# Stop at next heading (div.mw-heading or bare h2/h3)
if sibling.name in ['h2', 'h3']:
break
if sibling.name == 'div' and heading_classes & set(sibling.get('class') or []):
break
if sibling.name == 'p':
text = sibling.get_text(' ', strip=True)
if len(text) > 30:
parts.append(_clean_text(text))
if len(parts) >= max_paras:
break
sibling = sibling.find_next_sibling()
return parts
def extract_history(soup):
"""Try to find history/etymology section."""
history_headers = ['history', 'etymology', 'discovery', 'naming', 'occurrence']
# Search both bare headings and headings inside mw-heading divs
for header_tag in soup.find_all(['h2', 'h3']):
header_text = header_tag.get_text(strip=True).lower()
header_text = re.sub(r'\[edit\]$', '', header_text).strip()
if any(h in header_text for h in history_headers):
wrapper = _find_heading_wrapper(header_tag)
parts = _collect_section_paragraphs(wrapper)
if parts:
return '\n\n'.join(parts)
return ''
class Command(BaseCommand):
help = 'Scrape mineral data from Wikipedia'
def add_arguments(self, parser):
parser.add_argument('--limit', type=int, default=0, help='Max minerals to scrape (0 = all)')
parser.add_argument('--dry-run', action='store_true', help='List minerals without saving')
parser.add_argument('--skip-existing', action='store_true', help='Skip already-saved minerals')
def handle(self, *args, **options):
limit = options['limit']
dry_run = options['dry_run']
skip_existing = options['skip_existing']
minerals = MINERAL_LIST
if limit:
minerals = minerals[:limit]
self.stdout.write(f'Processing {len(minerals)} minerals...\n')
success = 0
skipped = 0
failed = 0
for i, (name, color_hex) in enumerate(minerals, 1):
display_name_check = re.sub(r'\s*\([^)]*\)\s*$', '', name).strip()
if skip_existing and Mineral.objects.filter(name=display_name_check).exists():
self.stdout.write(f' [{i}/{len(minerals)}] SKIP {name} (already exists)')
skipped += 1
continue
if dry_run:
self.stdout.write(f' [{i}/{len(minerals)}] {name} ({color_hex})')
continue
self.stdout.write(f' [{i}/{len(minerals)}] Scraping {name}...', ending='')
try:
parsed = get_wikipedia_page(name)
if not parsed:
self.stdout.write(self.style.WARNING(' NOT FOUND'))
failed += 1
continue
html = parsed['text']['*']
soup = BeautifulSoup(html, 'html.parser')
# Skip actual disambiguation pages (they have the dmbox class)
if soup.find('table', id='disambigbox') or soup.find('div', class_='dmbox'):
self.stdout.write(self.style.WARNING(' DISAMBIGUATION - SKIPPED'))
failed += 1
continue
info = extract_infobox(soup)
# Strip Wikipedia disambiguation suffixes from display name
display_name = re.sub(r'\s*\([^)]*\)\s*$', '', name).strip()
image_urls = get_image_urls(parsed, limit=4)
description = extract_description(soup)
history = extract_history(soup)
mineral, created = Mineral.objects.update_or_create(
name=display_name,
defaults={
'formula': match_field(info, FIELD_MAPPINGS['formula'])[:200],
'category': match_field(info, FIELD_MAPPINGS['category'])[:200],
'crystal_system': match_field(info, FIELD_MAPPINGS['crystal_system'])[:200],
'mohs_hardness': match_field(info, FIELD_MAPPINGS['mohs_hardness'])[:50],
'luster': match_field(info, FIELD_MAPPINGS['luster'])[:200],
'streak': match_field(info, FIELD_MAPPINGS['streak'])[:200],
'specific_gravity': match_field(info, FIELD_MAPPINGS['specific_gravity'])[:100],
'color_description': match_field(info, FIELD_MAPPINGS['color_description'])[:300],
'color_hex': color_hex,
'description': description,
'history': history,
'image_urls': image_urls,
'wikipedia_url': f'https://en.wikipedia.org/wiki/{name.replace(" ", "_")}',
'day_of_year': i,
},
)
status = 'CREATED' if created else 'UPDATED'
img_count = len(image_urls)
self.stdout.write(self.style.SUCCESS(f' {status} ({img_count} images)'))
success += 1
except Exception as e:
self.stdout.write(self.style.ERROR(f' ERROR: {e}'))
failed += 1
# Be polite to Wikipedia — ~3s between minerals keeps us under rate limits
time.sleep(3)
self.stdout.write(f'\nDone: {success} saved, {skipped} skipped, {failed} failed')