Quelle-Badges (OT/WF), Legende verschoben, Footer mit Emojis, A11y-Verbesserungen; Generator: Merge OpenThesaurus+wordfreq; Dockerfile/Gunicorn hinzugefügt

2025-08-19 11:26:02 +02:00
parent d6d23a230e
commit 916f6510d8
5 changed files with 26823 additions and 60 deletions
--- a/app.py
+++ b/app.py
@@ -1,24 +1,37 @@
 from pathlib import Path
+import json
+from typing import Tuple, Dict, List
 from flask import Flask, render_template, request

 app = Flask(__name__)


-def load_words() -> list[str]:
-    data_path = Path(__file__).parent / "data" / "words_de_5.txt"
-    if not data_path.exists():
-        return []
-    words: list[str] = []
-    with data_path.open("r", encoding="utf-8") as f:
-        for line in f:
-            word = line.strip().lower()
-            if len(word) == 5 and word.isalpha():
-                words.append(word)
-    return words
+def load_words() -> Tuple[List[str], Dict[str, List[str]]]:
+    data_dir = Path(__file__).parent / "data"
+    txt_path = data_dir / "words_de_5.txt"
+    json_path = data_dir / "words_de_5_sources.json"
+
+    words: List[str] = []
+    sources_map: Dict[str, List[str]] = {}
+
+    if txt_path.exists():
+        with txt_path.open("r", encoding="utf-8") as f:
+            for line in f:
+                word = line.strip().lower()
+                if len(word) == 5 and word.isalpha():
+                    words.append(word)
+
+    if json_path.exists():
+        try:
+            sources_map = json.loads(json_path.read_text(encoding="utf-8"))
+        except Exception:
+            sources_map = {}
+
+    return words, sources_map


-def filter_words(words: list[str], position_letters: list[str], includes_text: str, excludes_text: str) -> list[str]:
-    results: list[str] = []
+def filter_words(words: List[str], position_letters: List[str], includes_text: str, excludes_text: str) -> List[str]:
+    results: List[str] = []
    includes_letters = [ch for ch in includes_text.lower() if ch.isalpha()]
    excludes_letters = [ch for ch in excludes_text.lower() if ch.isalpha()]
    for word in words:
@@ -37,9 +50,9 @@ def filter_words(words: list[str], position_letters: list[str], includes_text: s

@app.route("/", methods=["GET", "POST"])
 def index():
-    all_words = load_words()
-    results: list[str] | None = None
-    pos: list[str] = ["", "", "", "", ""]
+    all_words, sources_map = load_words()
+    results: List[str] | None = None
+    pos: List[str] = ["", "", "", "", ""]
    includes: str = ""
    excludes: str = ""
    if request.method == "POST":
@@ -60,6 +73,7 @@ def index():
        includes=includes,
        excludes=excludes,
        words_count=len(all_words),
+        sources_map=sources_map,
    )


--- a/data/words_de_5.txt
+++ b/data/words_de_5.txt
--- a/data/words_de_5_sources.json
+++ b/data/words_de_5_sources.json
--- a/scripts/generate_wordlist.py
+++ b/scripts/generate_wordlist.py
@@ -1,11 +1,12 @@
 from __future__ import annotations

+import json
 import re
 from pathlib import Path
 from typing import Iterable

 try:
-    # Optional, nur Fallback
+    # Optional: wordfreq als zusätzliche Quelle
    from wordfreq import top_n_list  # type: ignore
 except Exception:  # pragma: no cover
    top_n_list = None  # type: ignore
@@ -57,21 +58,49 @@ def extract_from_wordfreq(limit: int = 500_000) -> list[str]:
 def main() -> None:
    root = Path(__file__).resolve().parents[1]
    source_ot = root / "data" / "openthesaurus.txt"
-    out_path = root / "data" / "words_de_5.txt"
-    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_txt = root / "data" / "words_de_5.txt"
+    out_json = root / "data" / "words_de_5_sources.json"
+    out_txt.parent.mkdir(parents=True, exist_ok=True)

+    ot_words: list[str] = []
    if source_ot.exists():
-        words = extract_from_openthesaurus(source_ot)
-        source = "OpenThesaurus"
-    else:
-        words = extract_from_wordfreq()
-        source = "wordfreq"
+        ot_words = extract_from_openthesaurus(source_ot)

-    with out_path.open("w", encoding="utf-8") as f:
-        for w in words:
+    wf_words = extract_from_wordfreq()
+
+    ot_set = set(ot_words)
+    wf_set = set(wf_words)
+
+    merged = sorted(ot_set | wf_set)
+
+    # Textliste schreiben
+    with out_txt.open("w", encoding="utf-8") as f:
+        for w in merged:
            f.write(w + "\n")

-    print(f"Gespeichert: {len(words)} Wörter (Quelle: {source}) -> {out_path}")
+    # Quellen-Map schreiben
+    sources_map: dict[str, list[str]] = {}
+    for w in merged:
+        srcs: list[str] = []
+        if w in ot_set:
+            srcs.append("ot")
+        if w in wf_set:
+            srcs.append("wf")
+        sources_map[w] = srcs
+
+    with out_json.open("w", encoding="utf-8") as jf:
+        json.dump(sources_map, jf, ensure_ascii=False)
+
+    print(
+        " | ".join(
+            [
+                f"OpenThesaurus: {len(ot_set)}",
+                f"wordfreq: {len(wf_set)}",
+                f"gesamt (dedupliziert): {len(merged)}",
+                f"→ {out_txt} / {out_json}",
+            ]
+        )
+    )


 if __name__ == "__main__":
--- a/templates/index.html
+++ b/templates/index.html
@@ -11,51 +11,95 @@
        .grid input { text-align: center; font-size: 1.25rem; padding: .4rem; }
        label { font-weight: 600; display: block; margin-top: 1rem; margin-bottom: .25rem; }
        .results { margin-top: 1.5rem; }
-        .badge { display: inline-block; padding: .25rem .5rem; background: #f3f4f6; border-radius: .375rem; margin-right: .25rem; margin-bottom: .25rem; }
+        .badge { display: inline-block; padding: .25rem .5rem; background: #e5e7eb; color: #111827; border-radius: .375rem; margin-right: .25rem; margin-bottom: .25rem; }
+        .source { font-size: .75rem; padding: .1rem .35rem; border-radius: .25rem; margin-left: .25rem; }
+        .source.ot { background: #dbeafe; color: #1e40af; }
+        .source.wf { background: #dcfce7; color: #065f46; }
        button { margin-top: 1rem; padding: .5rem 1rem; font-size: 1rem; }
        summary { cursor: pointer; }
+        .footer { margin-top: 2rem; font-size: .9rem; color: #6b7280; }
+        .footer a { color: inherit; text-decoration: underline; }
+        .sr-only { position: absolute; width: 1px; height: 1px; padding: 0; margin: -1px; overflow: hidden; clip: rect(0, 0, 1px, 1px); white-space: nowrap; border: 0; }
+        .skip-link { position: absolute; left: -9999px; top: auto; width: 1px; height: 1px; overflow: hidden; }
+        .skip-link:focus { position: static; width: auto; height: auto; padding: .5rem .75rem; background: #111827; color: #ffffff; border-radius: .25rem; }
+        .hint { margin-top: .25rem; color: #374151; font-size: .9rem; }
+        .word-list { list-style: none; padding: 0; margin: 0; }
+        .word-list li { display: inline-block; margin: 0 .25rem .25rem 0; }
+        fieldset { border: 1px solid #e5e7eb; border-radius: .5rem; padding: .75rem; }
+        legend { font-weight: 700; padding: 0 .25rem; }
    </style>
 </head>
 <body>
+    <a href="#results" class="skip-link">Zum Ergebnisbereich springen</a>
    <div class="container">
        <h1>Wordle‑Cheater (Deutsch)</h1>
        <p>Wortliste geladen: <strong>{{ words_count }}</strong> Wörter</p>
-        <form method="post">
-            <label for="pos1">Buchstaben mit korrekter Position</label>
-            <div class="grid">
-                <input id="pos1" name="pos1" maxlength="1" value="{{ pos[0] }}" />
-                <input id="pos2" name="pos2" maxlength="1" value="{{ pos[1] }}" />
-                <input id="pos3" name="pos3" maxlength="1" value="{{ pos[2] }}" />
-                <input id="pos4" name="pos4" maxlength="1" value="{{ pos[3] }}" />
-                <input id="pos5" name="pos5" maxlength="1" value="{{ pos[4] }}" />
-            </div>

-            <label for="includes">Weitere enthaltene Buchstaben (beliebige Reihenfolge)</label>
-            <input id="includes" name="includes" value="{{ includes }}" />
+        <main id="main" role="main">
+            <form method="post" aria-describedby="form-hint">
+                <p id="form-hint" class="hint">Gib bekannte Buchstaben ein. Leere Felder werden ignoriert.</p>

-            <label for="excludes">Ausgeschlossene Buchstaben</label>
-            <input id="excludes" name="excludes" value="{{ excludes }}" />
+                <fieldset>
+                    <legend>Buchstaben mit korrekter Position</legend>
+                    <div class="grid" aria-describedby="pos-hint">
+                        <input id="pos1" name="pos1" maxlength="1" aria-label="Position 1" inputmode="text" autocomplete="off" pattern="[A-Za-zÄÖÜäöüß]" value="{{ pos[0] }}" />
+                        <input id="pos2" name="pos2" maxlength="1" aria-label="Position 2" inputmode="text" autocomplete="off" pattern="[A-Za-zÄÖÜäöüß]" value="{{ pos[1] }}" />
+                        <input id="pos3" name="pos3" maxlength="1" aria-label="Position 3" inputmode="text" autocomplete="off" pattern="[A-Za-zÄÖÜäöüß]" value="{{ pos[2] }}" />
+                        <input id="pos4" name="pos4" maxlength="1" aria-label="Position 4" inputmode="text" autocomplete="off" pattern="[A-Za-zÄÖÜäöüß]" value="{{ pos[3] }}" />
+                        <input id="pos5" name="pos5" maxlength="1" aria-label="Position 5" inputmode="text" autocomplete="off" pattern="[A-Za-zÄÖÜäöüß]" value="{{ pos[4] }}" />
+                    </div>
+                    <p id="pos-hint" class="hint">Je Feld genau ein Buchstabe. Umlaute (ä, ö, ü) und ß sind erlaubt.</p>
+                </fieldset>

-            <button type="submit">Suchen</button>
-        </form>
+                <label for="includes">Weitere enthaltene Buchstaben (beliebige Reihenfolge)</label>
+                <input id="includes" name="includes" aria-describedby="includes-hint" inputmode="text" autocomplete="off" value="{{ includes }}" />
+                <p id="includes-hint" class="hint">Mehrere Buchstaben ohne Trennzeichen eingeben (z. B. „aei“).</p>

-        {% if results is not none %}
-            <div class="results">
-                <h2>Vorschläge ({{ results|length }})</h2>
-                {% if results|length == 0 %}
-                    <p>Keine Treffer. Bitte Bedingungen anpassen.</p>
-                {% else %}
-                    <details open>
-                        <summary>Liste anzeigen</summary>
-                        <p>
-                            {% for w in results %}
-                                <span class="badge">{{ w }}</span>
-                            {% endfor %}
-                        </p>
-                    </details>
-                {% endif %}
-            </div>
-        {% endif %}
+                <label for="excludes">Ausgeschlossene Buchstaben</label>
+                <input id="excludes" name="excludes" aria-describedby="excludes-hint" inputmode="text" autocomplete="off" value="{{ excludes }}" />
+                <p id="excludes-hint" class="hint">Buchstaben, die nicht vorkommen (z. B. „rst“).</p>
+
+                <button type="submit">Suchen</button>
+            </form>
+
+            {% if results is not none %}
+                <div class="results" id="results" role="region" aria-labelledby="results-title">
+                    <h2 id="results-title">Vorschläge ({{ results|length }})</h2>
+                    {% if results|length == 0 %}
+                        <p>Keine Treffer. Bitte Bedingungen anpassen.</p>
+                    {% else %}
+                        <details open>
+                            <summary>Liste anzeigen</summary>
+                            <ul class="word-list">
+                                {% for w in results %}
+                                    <li>
+                                        <span class="badge">{{ w }}
+                                            {% set srcs = sources_map.get(w, []) %}
+                                            {% for s in srcs %}
+                                                {% if s == 'ot' %}
+                                                    <span class="source ot">OT</span>
+                                                {% elif s == 'wf' %}
+                                                    <span class="source wf">WF</span>
+                                                {% endif %}
+                                            {% endfor %}
+                                        </span>
+                                    </li>
+                                {% endfor %}
+                            </ul>
+                        </details>
+                    {% endif %}
+                    <p>
+                        <strong>Legende:</strong>
+                        <span class="source wf">WF</span> = Wordfreq,
+                        <span class="source ot">OT</span> = OpenThesaurus
+                    </p>
+                </div>
+            {% endif %}
+        </main>
+
+        <footer class="footer">
+            Made in 2025 with ❤️ and ☕ by <a href="mailto:elpatron@mailbox.org">Markus F.J. Busche</a>
+        </footer>
    </div>
 </body>
 </html>