From 8a162afe2a041d787d374cea2566cf8662d6f558 Mon Sep 17 00:00:00 2001 From: Boris Date: Mon, 30 Mar 2026 22:54:46 +0300 Subject: [PATCH] Add Russian translations and pronunciation button - Russian fields on Mineral model (name_ru, description_ru, history_ru, etc.) - scrape_minerals_ru management command fetches from Russian Wikipedia via langlinks - EN/RU toggle in header, saved to localStorage - Speaker button next to mineral name uses Web Speech API - Section headers and labels translated - Russian Wikipedia link in footer when in RU mode Co-Authored-By: Claude Opus 4.6 --- dailystone/admin.py | 8 +- .../management/commands/scrape_minerals_ru.py | 244 ++++++++++++++++++ .../migrations/0002_add_russian_fields.py | 38 +++ dailystone/models.py | 7 + dailystone/templates/dailystone/stone.html | 235 ++++++++++++++--- 5 files changed, 488 insertions(+), 44 deletions(-) create mode 100644 dailystone/management/commands/scrape_minerals_ru.py create mode 100644 dailystone/migrations/0002_add_russian_fields.py diff --git a/dailystone/admin.py b/dailystone/admin.py index 33cd6af..53a4f15 100644 --- a/dailystone/admin.py +++ b/dailystone/admin.py @@ -5,9 +5,9 @@ from .models import Mineral @admin.register(Mineral) class MineralAdmin(admin.ModelAdmin): - list_display = ('name', 'formula', 'day_of_year', 'color_hex', 'category') + list_display = ('name', 'name_ru', 'formula', 'day_of_year', 'color_hex', 'category') list_filter = ('category', 'crystal_system') - search_fields = ('name', 'formula') + search_fields = ('name', 'name_ru', 'formula') list_editable = ('day_of_year', 'color_hex') fieldsets = ( (None, { @@ -23,6 +23,10 @@ class MineralAdmin(admin.ModelAdmin): ('Text', { 'fields': ('description', 'history'), }), + ('Russian', { + 'fields': ('name_ru', 'color_description_ru', + 'description_ru', 'history_ru', 'wikipedia_url_ru'), + }), ('Links', { 'fields': ('wikipedia_url',), }), diff --git a/dailystone/management/commands/scrape_minerals_ru.py b/dailystone/management/commands/scrape_minerals_ru.py new file mode 100644 index 0000000..4499664 --- /dev/null +++ b/dailystone/management/commands/scrape_minerals_ru.py @@ -0,0 +1,244 @@ +""" +Scrape Russian Wikipedia translations for existing minerals. + +Usage: + python manage.py scrape_minerals_ru + python manage.py scrape_minerals_ru --limit 10 + python manage.py scrape_minerals_ru --skip-existing +""" +import re +import time + +import requests +from bs4 import BeautifulSoup +from django.core.management.base import BaseCommand + +from dailystone.models import Mineral + +SESSION = None + + +def get_session(): + global SESSION + if SESSION is None: + SESSION = requests.Session() + SESSION.headers.update({ + 'User-Agent': 'DailyStoneBot/1.0 (k-boris.tech; educational mineral wiki)' + }) + return SESSION + + +def _request_with_backoff(session, url, params, timeout=30, max_retries=5): + for attempt in range(max_retries): + resp = session.get(url, params=params, timeout=timeout) + if resp.status_code == 429: + retry_after = resp.headers.get('Retry-After') + if retry_after and retry_after.isdigit(): + wait = min(int(retry_after) + 1, 120) + else: + wait = 10 * (2 ** attempt) + time.sleep(wait) + continue + resp.raise_for_status() + return resp + resp.raise_for_status() + return resp + + +def _clean_text(text): + text = re.sub(r'\[[\d,\s]+\]', '', text) + text = re.sub(r'\[citation needed\]', '', text, flags=re.IGNORECASE) + text = re.sub(r'\[уточнить\]', '', text, flags=re.IGNORECASE) + text = re.sub(r'\s+', ' ', text) + text = re.sub(r'\s+([.,;:!?)])', r'\1', text) + text = re.sub(r'(\()\s+', r'\1', text) + return text.strip() + + +def get_russian_title(english_title): + """Get the Russian Wikipedia article title via langlinks API.""" + session = get_session() + resp = _request_with_backoff(session, 'https://en.wikipedia.org/w/api.php', params={ + 'action': 'query', + 'titles': english_title, + 'prop': 'langlinks', + 'lllang': 'ru', + 'redirects': 1, + 'format': 'json', + }) + data = resp.json() + pages = data.get('query', {}).get('pages', {}) + for page_data in pages.values(): + langlinks = page_data.get('langlinks', []) + if langlinks: + return langlinks[0]['*'] + return None + + +def get_russian_page(title): + """Fetch parsed Russian Wikipedia page.""" + session = get_session() + resp = _request_with_backoff(session, 'https://ru.wikipedia.org/w/api.php', params={ + 'action': 'parse', + 'page': title, + 'prop': 'text', + 'format': 'json', + 'redirects': 1, + }) + data = resp.json() + if 'error' in data: + return None + return data['parse'] + + +def _find_heading_wrapper(tag): + parent = tag.parent + if parent and parent.name == 'div' and 'mw-heading' in (parent.get('class') or []): + return parent + return tag + + +def _collect_section_paragraphs(start_element, max_paras=2): + parts = [] + heading_classes = {'mw-heading', 'mw-heading2', 'mw-heading3'} + sibling = start_element.find_next_sibling() + while sibling: + if sibling.name in ['h2', 'h3']: + break + if sibling.name == 'div' and heading_classes & set(sibling.get('class') or []): + break + if sibling.name == 'p': + text = sibling.get_text(' ', strip=True) + if len(text) > 30: + parts.append(_clean_text(text)) + if len(parts) >= max_paras: + break + sibling = sibling.find_next_sibling() + return parts + + +def extract_description(soup): + paragraphs = [] + for p in soup.find_all('p'): + text = p.get_text(' ', strip=True) + if len(text) > 50: + paragraphs.append(_clean_text(text)) + if len(paragraphs) >= 3: + break + return '\n\n'.join(paragraphs) + + +def extract_history(soup): + history_headers = [ + 'история', 'этимология', 'открытие', 'происхождение названия', + 'название', 'нахождение', 'месторождения', + ] + for header_tag in soup.find_all(['h2', 'h3']): + header_text = header_tag.get_text(strip=True).lower() + header_text = re.sub(r'\[править[^\]]*\]', '', header_text).strip() + if any(h in header_text for h in history_headers): + wrapper = _find_heading_wrapper(header_tag) + parts = _collect_section_paragraphs(wrapper) + if parts: + return '\n\n'.join(parts) + return '' + + +def extract_infobox_color(soup): + """Try to extract color description from Russian infobox.""" + table = soup.find('table', class_='infobox') + if not table: + return '' + for row in table.find_all('tr'): + th = row.find('th') + td = row.find('td') + if th and td: + key = th.get_text(strip=True).lower() + if 'цвет' in key or 'окраска' in key: + return td.get_text(' ', strip=True) + return '' + + +class Command(BaseCommand): + help = 'Scrape Russian Wikipedia translations for existing minerals' + + def add_arguments(self, parser): + parser.add_argument('--limit', type=int, default=0) + parser.add_argument('--skip-existing', action='store_true', + help='Skip minerals that already have Russian name') + + def handle(self, *args, **options): + limit = options['limit'] + skip_existing = options['skip_existing'] + + minerals = Mineral.objects.all() + if limit: + minerals = minerals[:limit] + + total = minerals.count() + self.stdout.write(f'Processing {total} minerals...\n') + + success = 0 + skipped = 0 + failed = 0 + + for i, mineral in enumerate(minerals, 1): + if skip_existing and mineral.name_ru: + skipped += 1 + continue + + self.stdout.write(f'[{i}/{total}] {mineral.name}... ', ending='') + + try: + # Extract English Wikipedia title from URL or use name + if mineral.wikipedia_url: + en_title = mineral.wikipedia_url.split('/wiki/')[-1] + en_title = requests.utils.unquote(en_title) + else: + en_title = mineral.name + + # Find Russian article + ru_title = get_russian_title(en_title) + if not ru_title: + self.stdout.write('no Russian article') + failed += 1 + time.sleep(2) + continue + + # Fetch Russian page + parse_data = get_russian_page(ru_title) + if not parse_data: + self.stdout.write(f'failed to fetch {ru_title}') + failed += 1 + time.sleep(2) + continue + + html = parse_data['text']['*'] + soup = BeautifulSoup(html, 'html.parser') + + # Remove reference sections, navboxes, etc. + for tag in soup.find_all(['table', 'div'], class_=['navbox', 'metadata']): + tag.decompose() + + mineral.name_ru = ru_title + mineral.description_ru = extract_description(soup) + mineral.history_ru = extract_history(soup) + mineral.wikipedia_url_ru = f'https://ru.wikipedia.org/wiki/{requests.utils.quote(ru_title)}' + + color = extract_infobox_color(soup) + if color: + mineral.color_description_ru = color[:300] + + mineral.save() + success += 1 + self.stdout.write(f'{ru_title}') + + except Exception as e: + self.stdout.write(f'ERROR: {e}') + failed += 1 + + time.sleep(3) + + self.stdout.write( + f'\nDone: {success} translated, {skipped} skipped, {failed} failed' + ) diff --git a/dailystone/migrations/0002_add_russian_fields.py b/dailystone/migrations/0002_add_russian_fields.py new file mode 100644 index 0000000..4aeff41 --- /dev/null +++ b/dailystone/migrations/0002_add_russian_fields.py @@ -0,0 +1,38 @@ +# Generated by Django 6.0.3 on 2026-03-30 19:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dailystone', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='mineral', + name='color_description_ru', + field=models.CharField(blank=True, max_length=300), + ), + migrations.AddField( + model_name='mineral', + name='description_ru', + field=models.TextField(blank=True), + ), + migrations.AddField( + model_name='mineral', + name='history_ru', + field=models.TextField(blank=True), + ), + migrations.AddField( + model_name='mineral', + name='name_ru', + field=models.CharField(blank=True, max_length=200), + ), + migrations.AddField( + model_name='mineral', + name='wikipedia_url_ru', + field=models.URLField(blank=True, max_length=500), + ), + ] diff --git a/dailystone/models.py b/dailystone/models.py index bbc2c28..198954a 100644 --- a/dailystone/models.py +++ b/dailystone/models.py @@ -18,6 +18,13 @@ class Mineral(models.Model): wikipedia_url = models.URLField(max_length=500, blank=True) day_of_year = models.IntegerField(unique=True, null=True, blank=True) + # Russian translations + name_ru = models.CharField(max_length=200, blank=True) + description_ru = models.TextField(blank=True) + history_ru = models.TextField(blank=True) + color_description_ru = models.CharField(max_length=300, blank=True) + wikipedia_url_ru = models.URLField(max_length=500, blank=True) + class Meta: ordering = ['day_of_year'] diff --git a/dailystone/templates/dailystone/stone.html b/dailystone/templates/dailystone/stone.html index 10d6ccc..3ab20fc 100644 --- a/dailystone/templates/dailystone/stone.html +++ b/dailystone/templates/dailystone/stone.html @@ -69,13 +69,15 @@ align-items: center; } - .search-toggle { + .header-btn { display: inline-flex; align-items: center; justify-content: center; - width: 34px; height: 34px; + min-width: 34px; + padding: 0 0.5rem; font-family: 'Georgia', serif; + font-size: 0.8rem; color: var(--stone-text); background: var(--stone-muted); border: 1px solid var(--border); @@ -85,15 +87,55 @@ transition: background 0.2s; } - .search-toggle:hover { + .header-btn:hover { background: color-mix(in srgb, var(--stone-color) 20%, #f5f5f0); } - .search-toggle svg { + .header-btn svg { width: 14px; height: 14px; } + .header-btn.active { + background: var(--stone-color); + color: #fff; + border-color: var(--stone-color); + } + + .random-btn { + gap: 0.35rem; + padding: 0 0.65rem; + } + + .lang-toggle { + display: flex; + border: 1px solid var(--border); + border-radius: 3px; + overflow: hidden; + height: 34px; + } + + .lang-toggle button { + padding: 0 0.5rem; + font-family: 'Georgia', serif; + font-size: 0.75rem; + font-weight: bold; + border: none; + cursor: pointer; + background: var(--stone-muted); + color: var(--stone-text); + transition: background 0.2s, color 0.2s; + } + + .lang-toggle button:first-child { + border-right: 1px solid var(--border); + } + + .lang-toggle button.active { + background: var(--stone-color); + color: #fff; + } + .search-bar { display: none; margin: -0.5rem 0 1.5rem; @@ -139,43 +181,51 @@ background: color-mix(in srgb, var(--stone-color) 20%, #f5f5f0); } - .random-btn { - display: inline-flex; - align-items: center; - gap: 0.35rem; - padding: 0.4rem 0.65rem; - font-family: 'Georgia', serif; - font-size: 0.8rem; - color: var(--stone-text); - background: var(--stone-muted); - border: 1px solid var(--border); - border-radius: 3px; - text-decoration: none; - transition: background 0.2s; - } - - .random-btn:hover { - background: color-mix(in srgb, var(--stone-color) 20%, #f5f5f0); - } - - .random-btn svg { - width: 14px; - height: 14px; - } - .page-header .date { font-size: 0.85rem; color: var(--text-secondary); } + .mineral-name-row { + display: flex; + align-items: center; + justify-content: center; + gap: 0.5rem; + margin: 0.5rem 0 0.25rem; + } + .mineral-name { font-size: 2.4rem; font-weight: normal; color: var(--stone-text); - margin: 0.5rem 0 0.25rem; line-height: 1.2; } + .speak-btn { + display: inline-flex; + align-items: center; + justify-content: center; + width: 30px; + height: 30px; + background: none; + border: 1px solid transparent; + border-radius: 50%; + cursor: pointer; + color: var(--text-secondary); + transition: color 0.2s, border-color 0.2s; + flex-shrink: 0; + } + + .speak-btn:hover { + color: var(--stone-text); + border-color: var(--border); + } + + .speak-btn svg { + width: 18px; + height: 18px; + } + .formula { font-size: 1.15rem; font-family: 'Georgia', serif; @@ -191,6 +241,16 @@ top: 0.3em; } + /* Language content */ + .lang-ru { display: none; } + + .mineral-name-ru { + font-size: 1.1rem; + color: var(--text-secondary); + margin-top: -0.15rem; + margin-bottom: 0.25rem; + } + /* Photo gallery */ .gallery { margin: 1.5rem 0; @@ -397,12 +457,18 @@ @@ -449,52 +533,57 @@
+ {% if mineral.color_description %} {{ mineral.color_description }} {% else %} Typical color {% endif %} + + {% if mineral.color_description_ru %} + {{ mineral.color_description_ru }} + {% endif %}
{{ mineral.color_hex }}
-

Properties

+

PropertiesСвойства

{% if mineral.category %}
- Category + CategoryКатегория {{ mineral.category }}
{% endif %} {% if mineral.crystal_system %}
- Crystal System + Crystal SystemСингония {{ mineral.crystal_system }}
{% endif %} {% if mineral.mohs_hardness %}
- Hardness (Mohs) + Hardness (Mohs)Твёрдость (Моос) {{ mineral.mohs_hardness }}
{% endif %} {% if mineral.luster %}
- Luster + LusterБлеск {{ mineral.luster }}
{% endif %} {% if mineral.streak %}
- Streak + StreakЧерта {{ mineral.streak }}
{% endif %} {% if mineral.specific_gravity %}
- Specific Gravity + Specific GravityПлотность {{ mineral.specific_gravity }}
{% endif %} @@ -504,32 +593,66 @@ {% if mineral.description %}
-

About

+

AboutОписание

+
{% for para in mineral.description.splitlines %} {% if para %}

{{ para }}

{% endif %} {% endfor %} +
+ {% if mineral.description_ru %} +
+ {% for para in mineral.description_ru.splitlines %} + {% if para %}

{{ para }}

{% endif %} + {% endfor %} +
+ {% endif %}
{% endif %} {% if mineral.history %}
-

History & Etymology

+

History & EtymologyИстория и этимология

+
{% for para in mineral.history.splitlines %} {% if para %}

{{ para }}

{% endif %} {% endfor %} +
+ {% if mineral.history_ru %} +
+ {% for para in mineral.history_ru.splitlines %} + {% if para %}

{{ para }}

{% endif %} + {% endfor %} +
+ {% endif %}
{% endif %}