From acc2b5377b63c830f470f51fdd826210ee4422e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 3 Mar 2026 01:32:37 +0100 Subject: [PATCH 1/7] simplify code with JSONPath package --- PythonScripts/audit_translations/parsers.py | 170 +++++++------------- PythonScripts/pyproject.toml | 1 + PythonScripts/uv.lock | 11 ++ 3 files changed, 71 insertions(+), 111 deletions(-) diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index 7f1c3165..140851dd 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -5,8 +5,10 @@ """ import os -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Iterator, List, Optional, Tuple +from jsonpath_ng.ext import parse +from jsonpath_ng.jsonpath import Fields from ruamel.yaml import YAML from ruamel.yaml.scanner import ScannerError @@ -71,6 +73,32 @@ def build_raw_blocks(lines: List[str], starts: List[int]) -> List[str]: return blocks +def mapping_key_line(mapping: Any, key: str) -> Optional[int]: + """ + - 'lc' is line and column in YAML file: https://yaml.dev/doc/ruamel.yaml/detail/ + """ + if hasattr(mapping, "lc") and hasattr(mapping.lc, "data"): + line_info = mapping.lc.data.get(key) + return line_info[0] + 1 + return None + + +def iter_field_matches(node: Any) -> Iterator[Tuple[str, Any, Any]]: + """ + Iterate nested mapping fields using jsonpath. + + Returns tuples of (key, child_value, parent_mapping) in traversal order. + """ + all_fields_expr = parse('$..*') # '..' is recursive descent + + for match in all_fields_expr.find(node): + path = match.path + if isinstance(path, Fields) and len(path.fields) == 1: + key = path.fields[0] + parent = match.context.value if match.context is not None else None + yield key, match.value, parent + + def parse_rules_file(content: str, data: Any) -> List[RuleInfo]: """Parse a standard rules file with name/tag entries""" if not isinstance(data, list): @@ -162,31 +190,7 @@ def find_untranslated_text_values(node: Any) -> List[str]: Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) that should be uppercase in translations. Returns list of the untranslated text values found. """ - untranslated: List[str] = [] - translation_keys = {"t", "ot", "ct", "spell", "pronounce", "ifthenelse"} - - def should_add(text: str) -> bool: - if not text.strip(): - return False - if len(text) == 1 and not text.isalpha(): - return False - if text.startswith('$') or text.startswith('@'): - return False - return True - - def walk(value: Any) -> None: - if isinstance(value, dict): - for key, child in value.items(): - if isinstance(key, str) and key.lower() in translation_keys and not key.isupper() and isinstance(child, str): - if should_add(child): - untranslated.append(child) - walk(child) - elif isinstance(value, list): - for item in value: - walk(item) - - walk(node) - return untranslated + return [entry[1] for entry in find_untranslated_text_entries(node)] def find_untranslated_text_entries(node: Any) -> List[Tuple[str, str, Optional[int]]]: @@ -206,30 +210,14 @@ def should_add(text: str) -> bool: return False return True - def key_line(mapping: Any, key: str) -> Optional[int]: - if hasattr(mapping, "lc") and hasattr(mapping.lc, "data"): - line_info = mapping.lc.data.get(key) - if line_info: - return line_info[0] + 1 - return None - - def walk(value: Any) -> None: - if isinstance(value, dict): - for key, child in value.items(): - if ( - isinstance(key, str) - and key.lower() in translation_keys - and not key.isupper() - and isinstance(child, str) - ): - if should_add(child): - entries.append((key, child, key_line(value, key))) - walk(child) - elif isinstance(value, list): - for item in value: - walk(item) - - walk(node) + for key, child, parent in iter_field_matches(node): + if ( + key.lower() in translation_keys + and not key.isupper() + and isinstance(child, str) + ): + if should_add(child): + entries.append((key, child, mapping_key_line(parent, key))) return entries @@ -257,31 +245,15 @@ def add_line(kind: str, line: Optional[int]) -> None: return line_map.setdefault(kind, []).append(line) - def key_line(mapping: Any, key: str) -> Optional[int]: - if hasattr(mapping, "lc") and hasattr(mapping.lc, "data"): - line_info = mapping.lc.data.get(key) - if line_info: - return line_info[0] + 1 - return None - - def walk(value: Any) -> None: - if isinstance(value, dict): - for key, child in value.items(): - if isinstance(key, str): - if key == "match": - add_line("match", key_line(value, key)) - if key in ("if", "else_if"): - add_line("condition", key_line(value, key)) - if key == "variables": - add_line("variables", key_line(value, key)) - if key in structure_tokens: - add_line(f"structure:{key}", key_line(value, key)) - walk(child) - elif isinstance(value, list): - for item in value: - walk(item) - - walk(node) + for key, _, parent in iter_field_matches(node): + if key == "match": + add_line("match", mapping_key_line(parent, key)) + if key in ("if", "else_if"): + add_line("condition", mapping_key_line(parent, key)) + if key == "variables": + add_line("variables", mapping_key_line(parent, key)) + if key in structure_tokens: + add_line(f"structure:{key}", mapping_key_line(parent, key)) return line_map @@ -312,25 +284,18 @@ def dedup_list(values: List[str]) -> List[str]: def extract_match_pattern(rule_data: Any) -> str: if isinstance(rule_data, dict): - return normalize_match(rule_data.get("match")) + matches = parse('$.match').find(rule_data) + if matches: + return normalize_match(matches[0].value) return "" def extract_conditions(rule_data: Any) -> List[str]: """Extract all if/else conditions from a rule""" conditions: List[str] = [] - - def walk(value: Any) -> None: - if isinstance(value, dict): - for key, child in value.items(): - if key in ("if", "else_if") and isinstance(child, str): - conditions.append(child) - walk(child) - elif isinstance(value, list): - for item in value: - walk(item) - - walk(rule_data) + for key, child, _ in iter_field_matches(rule_data): + if key in ("if", "else_if") and isinstance(child, str): + conditions.append(child) return conditions @@ -348,17 +313,9 @@ def add_from_value(value: Any) -> None: for name, expr in item.items(): variables.append((str(name), str(expr))) - def walk(value: Any) -> None: - if isinstance(value, dict): - for key, child in value.items(): - if key == "variables": - add_from_value(child) - walk(child) - elif isinstance(value, list): - for item in value: - walk(item) - - walk(rule_data) + for key, child, _ in iter_field_matches(rule_data): + if key == "variables": + add_from_value(child) return variables @@ -366,18 +323,9 @@ def extract_structure_elements(rule_data: Any) -> List[str]: """Extract structural elements (test, with, replace blocks) ignoring text content""" elements: List[str] = [] tokens = {"test", "if", "else_if", "then", "else", "then_test", "else_test", "with", "replace", "intent"} - - def walk(value: Any) -> None: - if isinstance(value, dict): - for key, child in value.items(): - if key in tokens: - elements.append(f"{key}:") - walk(child) - elif isinstance(value, list): - for item in value: - walk(item) - - walk(rule_data) + for key, _, _ in iter_field_matches(rule_data): + if key in tokens: + elements.append(f"{key}:") return elements diff --git a/PythonScripts/pyproject.toml b/PythonScripts/pyproject.toml index 96655c09..eb2d4102 100644 --- a/PythonScripts/pyproject.toml +++ b/PythonScripts/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "deepl", "googletrans", "html-table-extractor", + "jsonpath-ng>=1.8.0", "pyyaml", "rich", "ruamel.yaml", diff --git a/PythonScripts/uv.lock b/PythonScripts/uv.lock index 96137df6..bddb870a 100644 --- a/PythonScripts/uv.lock +++ b/PythonScripts/uv.lock @@ -205,6 +205,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "jsonpath-ng" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/32/58/250751940d75c8019659e15482d548a4aa3b6ce122c515102a4bfdac50e3/jsonpath_ng-1.8.0.tar.gz", hash = "sha256:54252968134b5e549ea5b872f1df1168bd7defe1a52fed5a358c194e1943ddc3", size = 74513, upload-time = "2026-02-24T14:42:06.182Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/99/33c7d78a3fb70d545fd5411ac67a651c81602cc09c9cf0df383733f068c5/jsonpath_ng-1.8.0-py3-none-any.whl", hash = "sha256:b8dde192f8af58d646fc031fac9c99fe4d00326afc4148f1f043c601a8cfe138", size = 67844, upload-time = "2026-02-28T00:53:19.637Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -279,6 +288,7 @@ dependencies = [ { name = "deepl" }, { name = "googletrans" }, { name = "html-table-extractor" }, + { name = "jsonpath-ng" }, { name = "pyyaml" }, { name = "rich" }, { name = "ruamel-yaml" }, @@ -296,6 +306,7 @@ requires-dist = [ { name = "deepl" }, { name = "googletrans" }, { name = "html-table-extractor" }, + { name = "jsonpath-ng", specifier = ">=1.8.0" }, { name = "pyyaml" }, { name = "rich" }, { name = "ruamel-yaml" }, From cbc5db8fcefa9fca5c9e88343212cbe9fb695a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 3 Mar 2026 02:44:51 +0100 Subject: [PATCH 2/7] refactor audit_translations: split auditor.py and remove redundant fields --- PythonScripts/audit_translations/auditor.py | 447 +----------------- .../audit_translations/dataclasses.py | 22 +- .../audit_translations/line_resolver.py | 163 +++++++ PythonScripts/audit_translations/parsers.py | 12 +- PythonScripts/audit_translations/renderer.py | 268 +++++++++++ .../audit_translations/tests/test_auditor.py | 3 +- .../tests/test_line_resolver.py | 53 +++ .../tests/test_output_jsonl.py | 3 +- 8 files changed, 513 insertions(+), 458 deletions(-) create mode 100644 PythonScripts/audit_translations/line_resolver.py create mode 100644 PythonScripts/audit_translations/renderer.py create mode 100644 PythonScripts/audit_translations/tests/test_line_resolver.py diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index de98f3cc..bcb85f15 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -5,20 +5,19 @@ and for performing full language audits. """ -import json -import os import sys from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, TextIO, Tuple +from typing import List, Optional, TextIO, Tuple -from rich.console import Console -from rich.markup import escape from rich.panel import Panel from rich.table import Table -from .dataclasses import RuleInfo, RuleDifference, ComparisonResult -from .parsers import parse_yaml_file, diff_rules, extract_structure_elements -console = Console() +from .dataclasses import RuleInfo, ComparisonResult +from .parsers import parse_yaml_file, diff_rules +from .renderer import IssueWriter, collect_issues, console, print_warnings + +# Re-export console so existing `from .auditor import console` callers keep working. +__all__ = ["console"] def normalize_language(language: str) -> str: @@ -76,7 +75,7 @@ def compare_files( """Compare English and translated YAML files""" def load_rules(path: Optional[str]) -> List[RuleInfo]: - if path and os.path.exists(path): + if path and Path(path).exists(): rules, _ = parse_yaml_file(path) return rules return [] @@ -150,432 +149,6 @@ def merge_rules(base_rules: List[RuleInfo], region_rules: List[RuleInfo]) -> Lis ) -def rule_label(rule: RuleInfo) -> str: - if rule.name is None: - return f"[yellow]\"{escape(rule.key)}\"[/]" - tag = rule.tag or "unknown" - return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]" - - -def issue_type_sort_key(issue_type: str) -> Tuple[int, str]: - """ - Stable ordering for per-rule issue groups. - - The first tuple element defines user-facing priority (missing/untranslated/ - match/condition/variables/structure/extra). The second element keeps sorting - deterministic for unknown keys. - """ - order = { - "missing_rule": 0, - "untranslated_text": 1, - "rule_difference:match": 2, - "rule_difference:condition": 3, - "rule_difference:variables": 4, - "rule_difference:structure": 5, - "extra_rule": 6, - } - return order.get(issue_type, 99), issue_type - - -def issue_type_label(issue_type: str) -> str: - """ - Return the display label used in rich grouped output. - - Unknown issue types fall back to their raw key so renderer behavior remains - robust when new categories are introduced. - """ - labels = { - "missing_rule": "Missing in Translation", - "untranslated_text": "Untranslated Text", - "rule_difference:match": "Match Pattern Differences", - "rule_difference:condition": "Condition Differences", - "rule_difference:variables": "Variable Differences", - "rule_difference:structure": "Structure Differences", - "extra_rule": "Extra in Translation", - } - return labels.get(issue_type, issue_type) - - -def issue_base(rule: RuleInfo, file_name: str, language: str) -> dict: - return { - "language": language, - "file": Path(file_name).as_posix(), - "rule_name": rule.name or "", - "rule_tag": rule.tag or "", - "rule_key": rule.key, - "issue_line_en": None, - "issue_line_tr": None, - "rule_line_en": None, - "rule_line_tr": None, - } - - -def first_structure_mismatch( - english_tokens: List[str], - translated_tokens: List[str], -) -> Tuple[Optional[str], Optional[str], int]: - """ - Find the first structural mismatch between two token lists. - - Returns (en_token, tr_token, mismatch_position). - Position is the index in the token list where they first differ. - """ - min_len = min(len(english_tokens), len(translated_tokens)) - for idx in range(min_len): - if english_tokens[idx] != translated_tokens[idx]: - return english_tokens[idx], translated_tokens[idx], idx - if len(english_tokens) > min_len: - return english_tokens[min_len], None, min_len - if len(translated_tokens) > min_len: - return None, translated_tokens[min_len], min_len - return None, None, -1 - - -def resolve_issue_line_at_position( - rule: RuleInfo, - kind: str, - token: Optional[str] = None, - position: int = 0 -) -> Optional[int]: - """ - Resolve the line number for a specific occurrence of an element within a rule. - - Args: - rule: The rule to search in - kind: The kind of element ('match', 'condition', 'variables', 'structure') - token: For structure kind, the specific token to find - position: The occurrence index (0 for first, 1 for second, etc.) - - Returns: - The line number if found, None if the element doesn't exist at that position. - """ - if kind == "match": - lines = rule.line_map.get("match", []) - elif kind == "condition": - lines = rule.line_map.get("condition", []) - elif kind == "variables": - lines = rule.line_map.get("variables", []) - elif kind == "structure" and token: - token_key = f"structure:{token.rstrip(':')}" - lines = rule.line_map.get(token_key, []) - else: - lines = [] - - if position < len(lines): - return lines[position] - return None - - -def resolve_issue_line(rule: RuleInfo, kind: str, token: Optional[str] = None) -> Optional[int]: - """ - Resolve the line number for an issue within a rule. - - Returns the line number if found, None if the element doesn't exist in the rule. - For 'structure' kind with a missing token, returns None instead of falling back - to rule.line_number to avoid misleading line numbers when elements are missing. - """ - if kind == "match": - lines = rule.line_map.get("match", []) - elif kind == "condition": - lines = rule.line_map.get("condition", []) - elif kind == "variables": - lines = rule.line_map.get("variables", []) - elif kind == "structure" and token: - token_key = f"structure:{token.rstrip(':')}" - lines = rule.line_map.get(token_key, []) - # For structure differences, if the token doesn't exist, return None - # rather than falling back to rule.line_number which is misleading - return lines[0] if lines else None - else: - lines = [] - return lines[0] if lines else rule.line_number - - -def structure_token_occurrence_index(tokens: List[str], position: int) -> Optional[int]: - """ - Return which occurrence of a token appears at a given absolute token position. - - Example: for ["test:", "if:", "test:"], position 2 returns 1. - """ - if position < 0 or position >= len(tokens): - return None - token = tokens[position] - return sum(1 for current in tokens[:position] if current == token) - - -def resolve_structure_issue_lines(diff: RuleDifference) -> Optional[Tuple[int, int]]: - """ - Resolve stable line anchors for a structural rule difference. - - Strategy: - - Use position-aware token occurrence matching when possible. - - For insert/delete cases (one side missing token), anchor to the previous - shared structural token; if unavailable, anchor to `replace:`. - """ - en_tokens = extract_structure_elements(diff.english_rule.data) - tr_tokens = extract_structure_elements(diff.translated_rule.data) - en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens) - - if mismatch_pos < 0: - return None - - # Insertion/deletion: anchor to the previous shared token if possible. - if en_token is None or tr_token is None: - anchor_pos = mismatch_pos - 1 - if ( - anchor_pos >= 0 - and anchor_pos < len(en_tokens) - and anchor_pos < len(tr_tokens) - and en_tokens[anchor_pos] == tr_tokens[anchor_pos] - ): - anchor_token = en_tokens[anchor_pos] - en_occ = structure_token_occurrence_index(en_tokens, anchor_pos) - tr_occ = structure_token_occurrence_index(tr_tokens, anchor_pos) - if en_occ is not None and tr_occ is not None: - line_en = resolve_issue_line_at_position(diff.english_rule, "structure", anchor_token, en_occ) - line_tr = resolve_issue_line_at_position(diff.translated_rule, "structure", anchor_token, tr_occ) - if line_en is not None and line_tr is not None: - return line_en, line_tr - - # Fallback: anchor both sides to replace, which is the rule body entrypoint. - line_en = resolve_issue_line(diff.english_rule, "structure", "replace:") or diff.english_rule.line_number - line_tr = resolve_issue_line(diff.translated_rule, "structure", "replace:") or diff.translated_rule.line_number - return line_en, line_tr - - # Exact token available on both sides: resolve by occurrence index at mismatch. - en_occ = structure_token_occurrence_index(en_tokens, mismatch_pos) - tr_occ = structure_token_occurrence_index(tr_tokens, mismatch_pos) - if en_occ is not None and tr_occ is not None: - line_en = resolve_issue_line_at_position(diff.english_rule, "structure", en_token, en_occ) - line_tr = resolve_issue_line_at_position(diff.translated_rule, "structure", tr_token, tr_occ) - if line_en is not None and line_tr is not None: - return line_en, line_tr - - line_en = resolve_issue_line(diff.english_rule, "structure", en_token) - line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token) - if line_en is None or line_tr is None: - return None - return line_en, line_tr - - -def collect_issues( - result: ComparisonResult, - file_name: str, - language: str, -) -> List[dict]: - """ - Flatten a ComparisonResult into one normalized dictionary per issue. - - This is the canonical bridge from parser/diff objects to serializable - records consumed by JSONL output, snapshot tests, and line-level assertions. - """ - issues = [] - - for rule in result.missing_rules: - issue = issue_base(rule, file_name, language) - issue.update( - issue_type="missing_rule", - diff_type="", - issue_line_en=rule.line_number, - rule_line_en=rule.line_number, - description="Rule present in English but missing in translation", - english_snippet="", - translated_snippet="", - untranslated_texts=[], - ) - issues.append(issue) - - for rule in result.extra_rules: - issue = issue_base(rule, file_name, language) - issue.update( - issue_type="extra_rule", - diff_type="", - issue_line_tr=rule.line_number, - rule_line_tr=rule.line_number, - description="Rule present in translation but missing in English", - english_snippet="", - translated_snippet="", - untranslated_texts=[], - ) - issues.append(issue) - - for rule, entries in result.untranslated_text: - for key, text, line in entries: - issue = issue_base(rule, file_name, language) - issue.update( - issue_type="untranslated_text", - diff_type="", - issue_line_tr=line or rule.line_number, - rule_line_tr=rule.line_number, - description="Lowercase t/ot/ct keys indicate untranslated text", - english_snippet="", - translated_snippet="", - untranslated_texts=[text], - ) - issues.append(issue) - - for diff in result.rule_differences: - rule = diff.english_rule - issue = issue_base(rule, file_name, language) - if diff.diff_type == "structure": - structure_lines = resolve_structure_issue_lines(diff) - if structure_lines is None: - continue - issue_line_en, issue_line_tr = structure_lines - else: - issue_line_en = resolve_issue_line(diff.english_rule, diff.diff_type) - issue_line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type) - issue.update( - issue_type="rule_difference", - diff_type=diff.diff_type, - issue_line_en=issue_line_en, - issue_line_tr=issue_line_tr, - rule_line_en=diff.english_rule.line_number, - rule_line_tr=diff.translated_rule.line_number, - description=diff.description, - english_snippet=diff.english_snippet, - translated_snippet=diff.translated_snippet, - untranslated_texts=[], - ) - issues.append(issue) - - return issues - - -class IssueWriter: - def __init__(self, output_format: str, stream: TextIO): - if output_format != "jsonl": - raise ValueError(f"Unsupported output format: {output_format}") - self.stream = stream - - def write(self, issue: dict) -> None: - self.stream.write(json.dumps(issue, ensure_ascii=False) + "\n") - - -def print_warnings( - result: ComparisonResult, - file_name: str, - verbose: bool = False, - target_language: str = "tr", -) -> int: - """Print warnings to console. Returns count of issues found.""" - issues = 0 - display_name = Path(file_name).as_posix() - target_label = normalize_language(target_language) - - has_issues = result.missing_rules or result.untranslated_text or result.extra_rules or result.rule_differences - if not has_issues: - return issues - - style, icon = ("green", "✓") if result.translated_rule_count == result.english_rule_count else \ - ("red", "✗") if result.translated_rule_count == 0 else ("yellow", "⚠") - console.print() - console.rule(style="cyan") - console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]") - console.print(f" [dim]English: {result.english_rule_count} rules → Translated: {result.translated_rule_count} rules[/]") - console.rule(style="cyan") - - grouped_issues: Dict[str, Dict[str, Any]] = {} - - def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None: - if rule.key not in grouped_issues: - grouped_issues[rule.key] = { - "rule": rule, - "by_type": {}, - } - type_map: Dict[str, List[Dict[str, Any]]] = grouped_issues[rule.key]["by_type"] - type_map.setdefault(issue_type, []).append(payload) - - for rule in result.missing_rules: - add_issue( - rule, - "missing_rule", - {"line_en": rule.line_number}, - ) - - for rule, entries in result.untranslated_text: - for _, text, line in entries: - issue_line = line or rule.line_number - add_issue( - rule, - "untranslated_text", - {"line_tr": issue_line, "text": text}, - ) - - for diff in result.rule_differences: - if diff.diff_type == "structure": - structure_lines = resolve_structure_issue_lines(diff) - if structure_lines is None: - continue - line_en, line_tr = structure_lines - else: - line_en = resolve_issue_line(diff.english_rule, diff.diff_type) - line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type) - - add_issue( - diff.english_rule, - f"rule_difference:{diff.diff_type}", - {"line_en": line_en, "line_tr": line_tr, "diff": diff}, - ) - - for rule in result.extra_rules: - add_issue( - rule, - "extra_rule", - {"line_tr": rule.line_number}, - ) - - if grouped_issues: - total_grouped_issues = sum( - len(entries) - for group in grouped_issues.values() - for entries in group["by_type"].values() - ) - console.print( - f"\n [magenta]≠[/] [bold]Rule Issues[/] " - f"[[magenta]{total_grouped_issues}[/]] [dim](grouped by rule and issue type)[/]" - ) - for group in grouped_issues.values(): - rule = group["rule"] - by_type: Dict[str, List[Dict[str, Any]]] = group["by_type"] - console.print(f" [dim]•[/] {rule_label(rule)}") - for issue_type in sorted(by_type.keys(), key=issue_type_sort_key): - entries = by_type[issue_type] - console.print( - f" [dim]{issue_type_label(issue_type)} " - f"[{len(entries)}][/]" - ) - for entry in entries: - if issue_type == "missing_rule": - console.print( - f" [dim]•[/] [dim](line {entry['line_en']} in English)[/]" - ) - issues += 1 - elif issue_type == "extra_rule": - console.print( - f" [dim]•[/] [dim](line {entry['line_tr']} in {target_label})[/]" - ) - issues += 1 - elif issue_type == "untranslated_text": - console.print( - f" [dim]•[/] [dim](line {entry['line_tr']} {target_label})[/] " - f"[yellow]\"{escape(entry['text'])}\"[/]" - ) - issues += 1 - else: - diff: RuleDifference = entry["diff"] - console.print( - f" [dim]•[/] [dim](line {entry['line_en']} en, {entry['line_tr']} {target_label})[/]" - ) - console.print(f" [dim]{diff.description}[/]") - if verbose: - console.print(f" [green]en:[/] {escape(diff.english_snippet)}") - console.print(f" [red]{target_label}:[/] {escape(diff.translated_snippet)}") - issues += 1 - - return issues - - def audit_language( language: str, specific_file: Optional[str] = None, @@ -647,10 +220,8 @@ def audit_language( str(english_region_path) if english_region_path and english_region_path.exists() else None, ) - # check for issues - has_issues = result.missing_rules or result.untranslated_text or result.extra_rules or result.rule_differences if output_format == "rich": - if has_issues: + if result.has_issues: issues = print_warnings(result, file_name, verbose, language) if issues > 0: files_with_issues += 1 diff --git a/PythonScripts/audit_translations/dataclasses.py b/PythonScripts/audit_translations/dataclasses.py index 9cd4b0e9..87955153 100644 --- a/PythonScripts/audit_translations/dataclasses.py +++ b/PythonScripts/audit_translations/dataclasses.py @@ -27,10 +27,6 @@ class RuleInfo: Raw YAML block for this rule (used for reporting/snippets). data : Optional[Any] Parsed YAML node for the rule; used for structural diffs. - has_untranslated_text : bool - True if the rule contains lowercase t/ot/ct/etc. values. - untranslated_keys : List[str] - List of untranslated text values (used for summary counts). untranslated_entries : List[Tuple[str, str, Optional[int]]] List of (key, text, line) entries extracted from lowercase translation keys. This drives per-issue JSONL output so each untranslated string can report @@ -48,12 +44,18 @@ class RuleInfo: line_number: int raw_content: str data: Optional[Any] = None - has_untranslated_text: bool = False - untranslated_keys: List[str] = field(default_factory=list) - untranslated_entries: List[Tuple[str, str, Optional[int]]] = field(default_factory=list) # (key, text, line) for JSONL output - line_map: Dict[str, List[int]] = field(default_factory=dict) # Element-type -> line numbers for precise diff locations + untranslated_entries: List[Tuple[str, str, Optional[int]]] = field(default_factory=list) + line_map: Dict[str, List[int]] = field(default_factory=dict) audit_ignore: bool = False + @property + def has_untranslated_text(self) -> bool: + return bool(self.untranslated_entries) + + @property + def untranslated_keys(self) -> List[str]: + return [entry[1] for entry in self.untranslated_entries] + @dataclass class RuleDifference: @@ -76,3 +78,7 @@ class ComparisonResult: english_rule_count: int translated_rule_count: int rule_differences: List[RuleDifference] = field(default_factory=list) # Fine-grained diffs + + @property + def has_issues(self) -> bool: + return bool(self.missing_rules or self.untranslated_text or self.extra_rules or self.rule_differences) diff --git a/PythonScripts/audit_translations/line_resolver.py b/PythonScripts/audit_translations/line_resolver.py new file mode 100644 index 00000000..f8331d38 --- /dev/null +++ b/PythonScripts/audit_translations/line_resolver.py @@ -0,0 +1,163 @@ +""" +Line number resolution for rule differences. + +Maps rule diff types and structure tokens to precise YAML source line numbers. +""" + +from typing import List, Optional, Tuple + +from .dataclasses import RuleInfo, RuleDifference +from .parsers import extract_structure_elements + + +def _get_line_map_lines(rule: RuleInfo, kind: str, token: Optional[str] = None) -> List[int]: + """Return the line-number list for a given element kind from the rule's line map.""" + if kind == "match": + return rule.line_map.get("match", []) + if kind == "condition": + return rule.line_map.get("condition", []) + if kind == "variables": + return rule.line_map.get("variables", []) + if kind == "structure" and token: + return rule.line_map.get(f"structure:{token.rstrip(':')}", []) + return [] + + +def first_structure_mismatch( + english_tokens: List[str], + translated_tokens: List[str], +) -> Tuple[Optional[str], Optional[str], int]: + """ + Find the first structural mismatch between two token lists. + + Returns (en_token, tr_token, mismatch_position). + Position is the index in the token list where they first differ. + """ + min_len = min(len(english_tokens), len(translated_tokens)) + for idx in range(min_len): + if english_tokens[idx] != translated_tokens[idx]: + return english_tokens[idx], translated_tokens[idx], idx + if len(english_tokens) > min_len: + return english_tokens[min_len], None, min_len + if len(translated_tokens) > min_len: + return None, translated_tokens[min_len], min_len + return None, None, -1 + + +def resolve_issue_line_at_position( + rule: RuleInfo, + kind: str, + token: Optional[str] = None, + position: int = 0, +) -> Optional[int]: + """ + Resolve the line number for a specific occurrence of an element within a rule. + + Args: + rule: The rule to search in + kind: The kind of element ('match', 'condition', 'variables', 'structure') + token: For structure kind, the specific token to find + position: The occurrence index (0 for first, 1 for second, etc.) + + Returns: + The line number if found, None if the element doesn't exist at that position. + """ + lines = _get_line_map_lines(rule, kind, token) + return lines[position] if position < len(lines) else None + + +def resolve_issue_line(rule: RuleInfo, kind: str, token: Optional[str] = None) -> Optional[int]: + """ + Resolve the line number for an issue within a rule. + + Returns the line number if found, None if the element doesn't exist in the rule. + For 'structure' kind with a missing token, returns None instead of falling back + to rule.line_number to avoid misleading line numbers when elements are missing. + """ + lines = _get_line_map_lines(rule, kind, token) + if kind == "structure" and token: + return lines[0] if lines else None + return lines[0] if lines else rule.line_number + + +def structure_token_occurrence_index(tokens: List[str], position: int) -> Optional[int]: + """ + Return which occurrence of a token appears at a given absolute token position. + + Example: for ["test:", "if:", "test:"], position 2 returns 1. + """ + if position < 0 or position >= len(tokens): + return None + token = tokens[position] + return sum(1 for current in tokens[:position] if current == token) + + +def resolve_structure_issue_lines(diff: RuleDifference) -> Optional[Tuple[int, int]]: + """ + Resolve stable line anchors for a structural rule difference. + + Strategy: + - Use position-aware token occurrence matching when possible. + - For insert/delete cases (one side missing token), anchor to the previous + shared structural token; if unavailable, anchor to `replace:`. + """ + en_tokens = extract_structure_elements(diff.english_rule.data) + tr_tokens = extract_structure_elements(diff.translated_rule.data) + en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens) + + if mismatch_pos < 0: + return None + + # Insertion/deletion: anchor to the previous shared token if possible. + if en_token is None or tr_token is None: + anchor_pos = mismatch_pos - 1 + if ( + anchor_pos >= 0 + and anchor_pos < len(en_tokens) + and anchor_pos < len(tr_tokens) + and en_tokens[anchor_pos] == tr_tokens[anchor_pos] + ): + anchor_token = en_tokens[anchor_pos] + en_occ = structure_token_occurrence_index(en_tokens, anchor_pos) + tr_occ = structure_token_occurrence_index(tr_tokens, anchor_pos) + if en_occ is not None and tr_occ is not None: + line_en = resolve_issue_line_at_position(diff.english_rule, "structure", anchor_token, en_occ) + line_tr = resolve_issue_line_at_position(diff.translated_rule, "structure", anchor_token, tr_occ) + if line_en is not None and line_tr is not None: + return line_en, line_tr + + # Fallback: anchor both sides to replace, which is the rule body entrypoint. + line_en = resolve_issue_line(diff.english_rule, "structure", "replace:") or diff.english_rule.line_number + line_tr = resolve_issue_line(diff.translated_rule, "structure", "replace:") or diff.translated_rule.line_number + return line_en, line_tr + + # Exact token available on both sides: resolve by occurrence index at mismatch. + en_occ = structure_token_occurrence_index(en_tokens, mismatch_pos) + tr_occ = structure_token_occurrence_index(tr_tokens, mismatch_pos) + if en_occ is not None and tr_occ is not None: + line_en = resolve_issue_line_at_position(diff.english_rule, "structure", en_token, en_occ) + line_tr = resolve_issue_line_at_position(diff.translated_rule, "structure", tr_token, tr_occ) + if line_en is not None and line_tr is not None: + return line_en, line_tr + + line_en = resolve_issue_line(diff.english_rule, "structure", en_token) + line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token) + if line_en is None or line_tr is None: + return None + return line_en, line_tr + + +def resolve_diff_lines(diff: RuleDifference) -> Optional[Tuple[Optional[int], Optional[int]]]: + """ + Resolve issue line numbers for a rule difference. + + Returns (line_en, line_tr), or None only for unresolvable structure diffs. + This is the single entry point used by both collect_issues and print_warnings + to avoid duplicating the structure vs non-structure branching logic. + """ + if diff.diff_type == "structure": + return resolve_structure_issue_lines(diff) + return ( + resolve_issue_line(diff.english_rule, diff.diff_type), + resolve_issue_line(diff.translated_rule, diff.diff_type), + ) diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index 140851dd..cea57504 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -120,8 +120,6 @@ def parse_rules_file(content: str, data: Any) -> List[RuleInfo]: for item, raw_content, line_idx in zip(rule_items, raw_blocks, start_lines): rule_name = str(item.get("name")) tag = format_tag(item.get("tag")) - untranslated_entries = find_untranslated_text_entries(item) - untranslated = [entry[1] for entry in untranslated_entries] rule_key = f"{rule_name}|{tag or 'unknown'}" rules.append(RuleInfo( name=rule_name, @@ -130,9 +128,7 @@ def parse_rules_file(content: str, data: Any) -> List[RuleInfo]: line_number=line_idx + 1, raw_content=raw_content, data=item, - has_untranslated_text=len(untranslated) > 0, - untranslated_keys=untranslated, - untranslated_entries=untranslated_entries, + untranslated_entries=find_untranslated_text_entries(item), line_map=build_line_map(item), audit_ignore=has_audit_ignore(raw_content) )) @@ -161,8 +157,6 @@ def parse_unicode_file(content: str, data: Any) -> List[RuleInfo]: raw_blocks = build_raw_blocks(lines, start_lines) for (char_key, value), raw_content, line_idx in zip(entries, raw_blocks, start_lines): - untranslated_entries = find_untranslated_text_entries(value) - untranslated = [entry[1] for entry in untranslated_entries] rules.append(RuleInfo( name=None, tag=None, @@ -170,9 +164,7 @@ def parse_unicode_file(content: str, data: Any) -> List[RuleInfo]: line_number=line_idx + 1, raw_content=raw_content, data=value, - has_untranslated_text=len(untranslated) > 0, - untranslated_keys=untranslated, - untranslated_entries=untranslated_entries, + untranslated_entries=find_untranslated_text_entries(value), line_map=build_line_map(value), audit_ignore=has_audit_ignore(raw_content) )) diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py new file mode 100644 index 00000000..8b0cee56 --- /dev/null +++ b/PythonScripts/audit_translations/renderer.py @@ -0,0 +1,268 @@ +""" +Rich console rendering and issue serialization. + +Handles all output concerns: rich terminal display, JSONL issue normalization, +and the IssueWriter interface. +""" + +import json +from pathlib import Path +from typing import Any, Dict, List, TextIO, Tuple + +from rich.console import Console +from rich.markup import escape + +from .dataclasses import ComparisonResult, RuleDifference, RuleInfo +from .line_resolver import resolve_diff_lines + +console = Console() + + +def rule_label(rule: RuleInfo) -> str: + if rule.name is None: + return f"[yellow]\"{escape(rule.key)}\"[/]" + tag = rule.tag or "unknown" + return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]" + + +def issue_type_sort_key(issue_type: str) -> Tuple[int, str]: + """ + Stable ordering for per-rule issue groups. + + The first tuple element defines user-facing priority (missing/untranslated/ + match/condition/variables/structure/extra). The second element keeps sorting + deterministic for unknown keys. + """ + order = { + "missing_rule": 0, + "untranslated_text": 1, + "rule_difference:match": 2, + "rule_difference:condition": 3, + "rule_difference:variables": 4, + "rule_difference:structure": 5, + "extra_rule": 6, + } + return order.get(issue_type, 99), issue_type + + +def issue_type_label(issue_type: str) -> str: + """ + Return the display label used in rich grouped output. + + Unknown issue types fall back to their raw key so renderer behavior remains + robust when new categories are introduced. + """ + labels = { + "missing_rule": "Missing in Translation", + "untranslated_text": "Untranslated Text", + "rule_difference:match": "Match Pattern Differences", + "rule_difference:condition": "Condition Differences", + "rule_difference:variables": "Variable Differences", + "rule_difference:structure": "Structure Differences", + "extra_rule": "Extra in Translation", + } + return labels.get(issue_type, issue_type) + + +def issue_base(rule: RuleInfo, file_name: str, language: str) -> dict: + return { + "language": language, + "file": Path(file_name).as_posix(), + "rule_name": rule.name or "", + "rule_tag": rule.tag or "", + "rule_key": rule.key, + "issue_line_en": None, + "issue_line_tr": None, + "rule_line_en": None, + "rule_line_tr": None, + } + + +def collect_issues( + result: ComparisonResult, + file_name: str, + language: str, +) -> List[dict]: + """ + Flatten a ComparisonResult into one normalized dictionary per issue. + + This is the canonical bridge from parser/diff objects to serializable + records consumed by JSONL output, snapshot tests, and line-level assertions. + """ + issues = [] + + for rule in result.missing_rules: + issue = issue_base(rule, file_name, language) + issue.update( + issue_type="missing_rule", + diff_type="", + issue_line_en=rule.line_number, + rule_line_en=rule.line_number, + description="Rule present in English but missing in translation", + english_snippet="", + translated_snippet="", + untranslated_texts=[], + ) + issues.append(issue) + + for rule in result.extra_rules: + issue = issue_base(rule, file_name, language) + issue.update( + issue_type="extra_rule", + diff_type="", + issue_line_tr=rule.line_number, + rule_line_tr=rule.line_number, + description="Rule present in translation but missing in English", + english_snippet="", + translated_snippet="", + untranslated_texts=[], + ) + issues.append(issue) + + for rule, entries in result.untranslated_text: + for key, text, line in entries: + issue = issue_base(rule, file_name, language) + issue.update( + issue_type="untranslated_text", + diff_type="", + issue_line_tr=line or rule.line_number, + rule_line_tr=rule.line_number, + description="Lowercase t/ot/ct keys indicate untranslated text", + english_snippet="", + translated_snippet="", + untranslated_texts=[text], + ) + issues.append(issue) + + for diff in result.rule_differences: + lines = resolve_diff_lines(diff) + if lines is None: + continue + issue_line_en, issue_line_tr = lines + issue = issue_base(diff.english_rule, file_name, language) + issue.update( + issue_type="rule_difference", + diff_type=diff.diff_type, + issue_line_en=issue_line_en, + issue_line_tr=issue_line_tr, + rule_line_en=diff.english_rule.line_number, + rule_line_tr=diff.translated_rule.line_number, + description=diff.description, + english_snippet=diff.english_snippet, + translated_snippet=diff.translated_snippet, + untranslated_texts=[], + ) + issues.append(issue) + + return issues + + +class IssueWriter: + def __init__(self, output_format: str, stream: TextIO): + if output_format != "jsonl": + raise ValueError(f"Unsupported output format: {output_format}") + self.stream = stream + + def write(self, issue: dict) -> None: + self.stream.write(json.dumps(issue, ensure_ascii=False) + "\n") + + +def print_warnings( + result: ComparisonResult, + file_name: str, + verbose: bool = False, + target_language: str = "tr", +) -> int: + """Print warnings to console. Returns count of issues found.""" + issues = 0 + display_name = Path(file_name).as_posix() + target_label = target_language.lower().replace("_", "-") + + if not result.has_issues: + return issues + + style, icon = ("green", "✓") if result.translated_rule_count == result.english_rule_count else \ + ("red", "✗") if result.translated_rule_count == 0 else ("yellow", "⚠") + console.print() + console.rule(style="cyan") + console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]") + console.print(f" [dim]English: {result.english_rule_count} rules → Translated: {result.translated_rule_count} rules[/]") + console.rule(style="cyan") + + grouped_issues: Dict[str, Dict[str, Any]] = {} + + def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None: + if rule.key not in grouped_issues: + grouped_issues[rule.key] = {"rule": rule, "by_type": {}} + grouped_issues[rule.key]["by_type"].setdefault(issue_type, []).append(payload) + + for rule in result.missing_rules: + add_issue(rule, "missing_rule", {"line_en": rule.line_number}) + + for rule, entries in result.untranslated_text: + for _, text, line in entries: + add_issue(rule, "untranslated_text", {"line_tr": line or rule.line_number, "text": text}) + + for diff in result.rule_differences: + lines = resolve_diff_lines(diff) + if lines is None: + continue + line_en, line_tr = lines + add_issue( + diff.english_rule, + f"rule_difference:{diff.diff_type}", + {"line_en": line_en, "line_tr": line_tr, "diff": diff}, + ) + + for rule in result.extra_rules: + add_issue(rule, "extra_rule", {"line_tr": rule.line_number}) + + if grouped_issues: + total_grouped_issues = sum( + len(entries) + for group in grouped_issues.values() + for entries in group["by_type"].values() + ) + console.print( + f"\n [magenta]≠[/] [bold]Rule Issues[/] " + f"[[magenta]{total_grouped_issues}[/]] [dim](grouped by rule and issue type)[/]" + ) + for group in grouped_issues.values(): + rule = group["rule"] + by_type: Dict[str, List[Dict[str, Any]]] = group["by_type"] + console.print(f" [dim]•[/] {rule_label(rule)}") + for issue_type in sorted(by_type.keys(), key=issue_type_sort_key): + entries = by_type[issue_type] + console.print( + f" [dim]{issue_type_label(issue_type)} " + f"[{len(entries)}][/]" + ) + for entry in entries: + if issue_type == "missing_rule": + console.print( + f" [dim]•[/] [dim](line {entry['line_en']} in English)[/]" + ) + issues += 1 + elif issue_type == "extra_rule": + console.print( + f" [dim]•[/] [dim](line {entry['line_tr']} in {target_label})[/]" + ) + issues += 1 + elif issue_type == "untranslated_text": + console.print( + f" [dim]•[/] [dim](line {entry['line_tr']} {target_label})[/] " + f"[yellow]\"{escape(entry['text'])}\"[/]" + ) + issues += 1 + else: + diff: RuleDifference = entry["diff"] + console.print( + f" [dim]•[/] [dim](line {entry['line_en']} en, {entry['line_tr']} {target_label})[/]" + ) + console.print(f" [dim]{diff.description}[/]") + if verbose: + console.print(f" [green]en:[/] {escape(diff.english_snippet)}") + console.print(f" [red]{target_label}:[/] {escape(diff.translated_snippet)}") + issues += 1 + + return issues diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index e416cd29..fa60b143 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -6,8 +6,9 @@ import pytest -from ..auditor import collect_issues, compare_files, console, get_yaml_files, list_languages, print_warnings +from ..auditor import compare_files, console, get_yaml_files, list_languages from ..dataclasses import ComparisonResult, RuleDifference, RuleInfo +from ..renderer import collect_issues, print_warnings @pytest.fixture() diff --git a/PythonScripts/audit_translations/tests/test_line_resolver.py b/PythonScripts/audit_translations/tests/test_line_resolver.py new file mode 100644 index 00000000..49d57cdc --- /dev/null +++ b/PythonScripts/audit_translations/tests/test_line_resolver.py @@ -0,0 +1,53 @@ +""" +Unit tests for line_resolver.py. +""" + +from ..dataclasses import RuleInfo, RuleDifference +from ..line_resolver import first_structure_mismatch, resolve_diff_lines + + +def _make_rule(name: str, line_map: dict, line_number: int = 1) -> RuleInfo: + return RuleInfo( + name=name, + tag="mo", + key=f"{name}|mo", + line_number=line_number, + raw_content="", + line_map=line_map, + ) + + +def test_first_structure_mismatch_marks_missing_token_as_none(): + """ + When English has a token that the translation omits, the mismatch position + points to the extra English token and tr_token is None. + """ + en = ["test:", "if:", "then:", "else:"] + tr = ["test:", "if:", "then:"] + + en_token, tr_token, pos = first_structure_mismatch(en, tr) + + assert pos == 3 + assert en_token == "else:" + assert tr_token is None + + +def test_resolve_diff_lines_uses_line_map_and_falls_back_to_rule_line(): + """ + For non-structure diffs, resolve_diff_lines reads from the line_map when + the element is present, and falls back to rule.line_number when it is not. + """ + en_with_map = _make_rule("r", {"match": [10]}, line_number=5) + tr_with_map = _make_rule("r", {"match": [20]}, line_number=15) + en_no_map = _make_rule("r", {}, line_number=5) + tr_no_map = _make_rule("r", {}, line_number=15) + + def make_match_diff(en, tr): + return RuleDifference( + english_rule=en, translated_rule=tr, + diff_type="match", description="Match differs", + english_snippet="a", translated_snippet="b", + ) + + assert resolve_diff_lines(make_match_diff(en_with_map, tr_with_map)) == (10, 20) + assert resolve_diff_lines(make_match_diff(en_no_map, tr_no_map)) == (5, 15) diff --git a/PythonScripts/audit_translations/tests/test_output_jsonl.py b/PythonScripts/audit_translations/tests/test_output_jsonl.py index 1c2ae008..1235f479 100644 --- a/PythonScripts/audit_translations/tests/test_output_jsonl.py +++ b/PythonScripts/audit_translations/tests/test_output_jsonl.py @@ -4,7 +4,8 @@ from io import StringIO from pathlib import Path -from ..auditor import IssueWriter, collect_issues, compare_files +from ..auditor import compare_files +from ..renderer import IssueWriter, collect_issues def load_jsonl(text: str) -> list[dict]: From 9ded115fa5da949d692cb36a04f3e0186b503cd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 3 Mar 2026 02:54:29 +0100 Subject: [PATCH 3/7] inline unnecessary IssueWriter class --- PythonScripts/audit_translations/auditor.py | 7 +++---- PythonScripts/audit_translations/renderer.py | 12 +----------- .../audit_translations/tests/test_output_jsonl.py | 8 +++----- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index bcb85f15..1a3ce1c9 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -5,6 +5,7 @@ and for performing full language audits. """ +import json import sys from pathlib import Path from typing import List, Optional, TextIO, Tuple @@ -14,7 +15,7 @@ from .dataclasses import RuleInfo, ComparisonResult from .parsers import parse_yaml_file, diff_rules -from .renderer import IssueWriter, collect_issues, console, print_warnings +from .renderer import collect_issues, console, print_warnings # Re-export console so existing `from .auditor import console` callers keep working. __all__ = ["console"] @@ -192,8 +193,6 @@ def audit_language( if output_path: out_stream = open(output_path, "w", encoding="utf-8", newline="") - writer = IssueWriter(output_format, out_stream) if output_format != "rich" else None - total_issues = 0 total_missing = 0 total_untranslated = 0 @@ -231,7 +230,7 @@ def audit_language( else: issues_list = collect_issues(result, file_name, language) for issue in issues_list: - writer.write(issue) + out_stream.write(json.dumps(issue, ensure_ascii=False) + "\n") if issues_list: files_with_issues += 1 total_issues += len(issues_list) diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index 8b0cee56..0aaae5d8 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -7,7 +7,7 @@ import json from pathlib import Path -from typing import Any, Dict, List, TextIO, Tuple +from typing import Any, Dict, List, Tuple from rich.console import Console from rich.markup import escape @@ -157,16 +157,6 @@ def collect_issues( return issues -class IssueWriter: - def __init__(self, output_format: str, stream: TextIO): - if output_format != "jsonl": - raise ValueError(f"Unsupported output format: {output_format}") - self.stream = stream - - def write(self, issue: dict) -> None: - self.stream.write(json.dumps(issue, ensure_ascii=False) + "\n") - - def print_warnings( result: ComparisonResult, file_name: str, diff --git a/PythonScripts/audit_translations/tests/test_output_jsonl.py b/PythonScripts/audit_translations/tests/test_output_jsonl.py index 1235f479..e636fb27 100644 --- a/PythonScripts/audit_translations/tests/test_output_jsonl.py +++ b/PythonScripts/audit_translations/tests/test_output_jsonl.py @@ -5,7 +5,7 @@ from pathlib import Path from ..auditor import compare_files -from ..renderer import IssueWriter, collect_issues +from ..renderer import collect_issues def load_jsonl(text: str) -> list[dict]: @@ -27,16 +27,14 @@ def test_jsonl_output_matches_golden(): files = sorted(path.name for path in english_dir.glob("*.yaml")) stream = StringIO() - writer = IssueWriter("jsonl", stream) for file_name in files: result = compare_files( str(english_dir / file_name), str(translated_dir / file_name), ) - issues = collect_issues(result, file_name, "de") - for issue in issues: - writer.write(issue) + for issue in collect_issues(result, file_name, "de"): + stream.write(json.dumps(issue, ensure_ascii=False) + "\n") actual = load_jsonl(stream.getvalue()) From 251fd6f628f064d9fb18323404ee99219936d606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 3 Mar 2026 03:03:31 +0100 Subject: [PATCH 4/7] modernize type hints --- PythonScripts/audit_translations/auditor.py | 28 ++++----- .../audit_translations/dataclasses.py | 34 +++++------ .../audit_translations/line_resolver.py | 22 +++---- PythonScripts/audit_translations/parsers.py | 61 +++++++++---------- PythonScripts/audit_translations/renderer.py | 10 +-- 5 files changed, 76 insertions(+), 79 deletions(-) diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index 1a3ce1c9..5d49cbf2 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -8,7 +8,7 @@ import json import sys from pathlib import Path -from typing import List, Optional, TextIO, Tuple +from typing import TextIO from rich.panel import Panel from rich.table import Table @@ -26,7 +26,7 @@ def normalize_language(language: str) -> str: return language.lower().replace("_", "-") -def split_language(language: str) -> Tuple[str, Optional[str]]: +def split_language(language: str) -> tuple[str, str | None]: """Split a language code into base and optional region.""" normalized = normalize_language(language) if "-" in normalized: @@ -35,7 +35,7 @@ def split_language(language: str) -> Tuple[str, Optional[str]]: return normalized, None -def get_rules_dir(rules_dir: Optional[str] = None) -> Path: +def get_rules_dir(rules_dir: str | None = None) -> Path: """Get the Rules/Languages directory path""" if rules_dir: return Path(rules_dir).expanduser() @@ -44,7 +44,7 @@ def get_rules_dir(rules_dir: Optional[str] = None) -> Path: return package_dir.parent.parent / "Rules" / "Languages" -def get_yaml_files(lang_dir: Path, region_dir: Optional[Path] = None) -> List[Path]: +def get_yaml_files(lang_dir: Path, region_dir: Path | None = None) -> list[Path]: """Get all YAML files to audit for a language, including region overrides.""" files: set[Path] = set() @@ -69,19 +69,19 @@ def collect_from(directory: Path, root: Path) -> None: def compare_files( english_path: str, translated_path: str, - issue_filter: Optional[set[str]] = None, - translated_region_path: Optional[str] = None, - english_region_path: Optional[str] = None, + issue_filter: set[str] | None = None, + translated_region_path: str | None = None, + english_region_path: str | None = None, ) -> ComparisonResult: """Compare English and translated YAML files""" - def load_rules(path: Optional[str]) -> List[RuleInfo]: + def load_rules(path: str | None) -> list[RuleInfo]: if path and Path(path).exists(): rules, _ = parse_yaml_file(path) return rules return [] - def merge_rules(base_rules: List[RuleInfo], region_rules: List[RuleInfo]) -> List[RuleInfo]: + def merge_rules(base_rules: list[RuleInfo], region_rules: list[RuleInfo]) -> list[RuleInfo]: if not region_rules: return base_rules merged = {r.key: r for r in base_rules} @@ -152,11 +152,11 @@ def merge_rules(base_rules: List[RuleInfo], region_rules: List[RuleInfo]) -> Lis def audit_language( language: str, - specific_file: Optional[str] = None, + specific_file: str | None = None, output_format: str = "rich", - output_path: Optional[str] = None, - rules_dir: Optional[str] = None, - issue_filter: Optional[set[str]] = None, + output_path: str | None = None, + rules_dir: str | None = None, + issue_filter: set[str] | None = None, verbose: bool = False, ) -> int: """Audit translations for a specific language. Returns total issue count.""" @@ -264,7 +264,7 @@ def audit_language( return total_issues -def list_languages(rules_dir: Optional[str] = None): +def list_languages(rules_dir: str | None = None): """List available languages for auditing""" console.print(Panel("Available Languages", style="bold cyan")) diff --git a/PythonScripts/audit_translations/dataclasses.py b/PythonScripts/audit_translations/dataclasses.py index 87955153..cdd2ca88 100644 --- a/PythonScripts/audit_translations/dataclasses.py +++ b/PythonScripts/audit_translations/dataclasses.py @@ -5,7 +5,7 @@ """ from dataclasses import dataclass, field -from typing import Any, List, Tuple, Optional, Dict +from typing import Any @dataclass @@ -15,9 +15,9 @@ class RuleInfo: Attributes ---------- - name : Optional[str] + name : str | None Rule name for standard rule files; None for unicode entries. - tag : Optional[str] + tag : str | None Rule tag (normalized string); None for unicode entries. key : str Stable identifier used for matching; for unicode entries this is the character or range key. @@ -25,27 +25,27 @@ class RuleInfo: 1-based line number where the rule starts in the source file. raw_content : str Raw YAML block for this rule (used for reporting/snippets). - data : Optional[Any] + data : Any | None Parsed YAML node for the rule; used for structural diffs. - untranslated_entries : List[Tuple[str, str, Optional[int]]] + untranslated_entries : list[tuple[str, str, int | None]] List of (key, text, line) entries extracted from lowercase translation keys. This drives per-issue JSONL output so each untranslated string can report the specific YAML line number where it appears. - line_map : Dict[str, List[int]] + line_map : dict[str, list[int]] Mapping of element type to line numbers for rule components like match, conditions, variables, and structural tokens. This is used to point structural diffs at a precise line rather than the top of the rule. audit_ignore : bool True if the raw content contains an audit-ignore marker. """ - name: Optional[str] # None for unicode entries - tag: Optional[str] # None for unicode entries - key: str # For unicode entries, this is the character/range + name: str | None # None for unicode entries + tag: str | None # None for unicode entries + key: str # For unicode entries, this is the character/range line_number: int raw_content: str - data: Optional[Any] = None - untranslated_entries: List[Tuple[str, str, Optional[int]]] = field(default_factory=list) - line_map: Dict[str, List[int]] = field(default_factory=dict) + data: Any | None = None + untranslated_entries: list[tuple[str, str, int | None]] = field(default_factory=list) + line_map: dict[str, list[int]] = field(default_factory=dict) audit_ignore: bool = False @property @@ -53,7 +53,7 @@ def has_untranslated_text(self) -> bool: return bool(self.untranslated_entries) @property - def untranslated_keys(self) -> List[str]: + def untranslated_keys(self) -> list[str]: return [entry[1] for entry in self.untranslated_entries] @@ -71,13 +71,13 @@ class RuleDifference: @dataclass class ComparisonResult: """Results from comparing English and translated files""" - missing_rules: List[RuleInfo] # Rules in English but not in translation - extra_rules: List[RuleInfo] # Rules in translation but not in English - untranslated_text: List[Tuple[RuleInfo, List[Tuple[str, str, Optional[int]]]]] # Rules with lowercase t/ot/ct + missing_rules: list[RuleInfo] # Rules in English but not in translation + extra_rules: list[RuleInfo] # Rules in translation but not in English + untranslated_text: list[tuple[RuleInfo, list[tuple[str, str, int | None]]]] # Rules with lowercase t/ot/ct file_path: str english_rule_count: int translated_rule_count: int - rule_differences: List[RuleDifference] = field(default_factory=list) # Fine-grained diffs + rule_differences: list[RuleDifference] = field(default_factory=list) # Fine-grained diffs @property def has_issues(self) -> bool: diff --git a/PythonScripts/audit_translations/line_resolver.py b/PythonScripts/audit_translations/line_resolver.py index f8331d38..e440c5f6 100644 --- a/PythonScripts/audit_translations/line_resolver.py +++ b/PythonScripts/audit_translations/line_resolver.py @@ -4,13 +4,11 @@ Maps rule diff types and structure tokens to precise YAML source line numbers. """ -from typing import List, Optional, Tuple - from .dataclasses import RuleInfo, RuleDifference from .parsers import extract_structure_elements -def _get_line_map_lines(rule: RuleInfo, kind: str, token: Optional[str] = None) -> List[int]: +def _get_line_map_lines(rule: RuleInfo, kind: str, token: str | None = None) -> list[int]: """Return the line-number list for a given element kind from the rule's line map.""" if kind == "match": return rule.line_map.get("match", []) @@ -24,9 +22,9 @@ def _get_line_map_lines(rule: RuleInfo, kind: str, token: Optional[str] = None) def first_structure_mismatch( - english_tokens: List[str], - translated_tokens: List[str], -) -> Tuple[Optional[str], Optional[str], int]: + english_tokens: list[str], + translated_tokens: list[str], +) -> tuple[str | None, str | None, int]: """ Find the first structural mismatch between two token lists. @@ -47,9 +45,9 @@ def first_structure_mismatch( def resolve_issue_line_at_position( rule: RuleInfo, kind: str, - token: Optional[str] = None, + token: str | None = None, position: int = 0, -) -> Optional[int]: +) -> int | None: """ Resolve the line number for a specific occurrence of an element within a rule. @@ -66,7 +64,7 @@ def resolve_issue_line_at_position( return lines[position] if position < len(lines) else None -def resolve_issue_line(rule: RuleInfo, kind: str, token: Optional[str] = None) -> Optional[int]: +def resolve_issue_line(rule: RuleInfo, kind: str, token: str | None = None) -> int | None: """ Resolve the line number for an issue within a rule. @@ -80,7 +78,7 @@ def resolve_issue_line(rule: RuleInfo, kind: str, token: Optional[str] = None) - return lines[0] if lines else rule.line_number -def structure_token_occurrence_index(tokens: List[str], position: int) -> Optional[int]: +def structure_token_occurrence_index(tokens: list[str], position: int) -> int | None: """ Return which occurrence of a token appears at a given absolute token position. @@ -92,7 +90,7 @@ def structure_token_occurrence_index(tokens: List[str], position: int) -> Option return sum(1 for current in tokens[:position] if current == token) -def resolve_structure_issue_lines(diff: RuleDifference) -> Optional[Tuple[int, int]]: +def resolve_structure_issue_lines(diff: RuleDifference) -> tuple[int, int] | None: """ Resolve stable line anchors for a structural rule difference. @@ -147,7 +145,7 @@ def resolve_structure_issue_lines(diff: RuleDifference) -> Optional[Tuple[int, i return line_en, line_tr -def resolve_diff_lines(diff: RuleDifference) -> Optional[Tuple[Optional[int], Optional[int]]]: +def resolve_diff_lines(diff: RuleDifference) -> tuple[int | None, int | None] | None: """ Resolve issue line numbers for a rule difference. diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index cea57504..352de209 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -5,7 +5,7 @@ """ import os -from typing import Any, Dict, Iterator, List, Optional, Tuple +from typing import Any, Iterator from jsonpath_ng.ext import parse from jsonpath_ng.jsonpath import Fields @@ -21,7 +21,7 @@ def is_unicode_file(file_path: str) -> bool: return basename in ("unicode.yaml", "unicode-full.yaml") -def parse_yaml_file(file_path: str, strict: bool = False) -> Tuple[List[RuleInfo], str]: +def parse_yaml_file(file_path: str, strict: bool = False) -> tuple[list[RuleInfo], str]: """ Parse a YAML file and extract rules. Returns list of RuleInfo and the raw file content. @@ -53,8 +53,7 @@ def parse_yaml_file(file_path: str, strict: bool = False) -> Tuple[List[RuleInfo return rules, content - -def format_tag(tag_value: Any) -> Optional[str]: +def format_tag(tag_value: Any) -> str | None: if tag_value is None: return None if isinstance(tag_value, list): @@ -63,7 +62,7 @@ def format_tag(tag_value: Any) -> Optional[str]: return str(tag_value) -def build_raw_blocks(lines: List[str], starts: List[int]) -> List[str]: +def build_raw_blocks(lines: list[str], starts: list[int]) -> list[str]: blocks = [] if not starts: return blocks @@ -73,7 +72,7 @@ def build_raw_blocks(lines: List[str], starts: List[int]) -> List[str]: return blocks -def mapping_key_line(mapping: Any, key: str) -> Optional[int]: +def mapping_key_line(mapping: Any, key: str) -> int | None: """ - 'lc' is line and column in YAML file: https://yaml.dev/doc/ruamel.yaml/detail/ """ @@ -83,7 +82,7 @@ def mapping_key_line(mapping: Any, key: str) -> Optional[int]: return None -def iter_field_matches(node: Any) -> Iterator[Tuple[str, Any, Any]]: +def iter_field_matches(node: Any) -> Iterator[tuple[str, Any, Any]]: """ Iterate nested mapping fields using jsonpath. @@ -99,16 +98,16 @@ def iter_field_matches(node: Any) -> Iterator[Tuple[str, Any, Any]]: yield key, match.value, parent -def parse_rules_file(content: str, data: Any) -> List[RuleInfo]: +def parse_rules_file(content: str, data: Any) -> list[RuleInfo]: """Parse a standard rules file with name/tag entries""" if not isinstance(data, list): return [] - rules: List[RuleInfo] = [] + rules: list[RuleInfo] = [] lines = content.splitlines() - start_lines: List[int] = [] - rule_items: List[Any] = [] + start_lines: list[int] = [] + rule_items: list[Any] = [] for idx, item in enumerate(data): if isinstance(item, dict) and "name" in item: line = data.lc.item(idx)[0] if hasattr(data, "lc") else 0 @@ -136,16 +135,16 @@ def parse_rules_file(content: str, data: Any) -> List[RuleInfo]: return rules -def parse_unicode_file(content: str, data: Any) -> List[RuleInfo]: +def parse_unicode_file(content: str, data: Any) -> list[RuleInfo]: """Parse a unicode file with character/range keys""" if not isinstance(data, list): return [] - rules: List[RuleInfo] = [] + rules: list[RuleInfo] = [] lines = content.splitlines() - start_lines: List[int] = [] - entries: List[Tuple[str, Any]] = [] + start_lines: list[int] = [] + entries: list[tuple[str, Any]] = [] for idx, item in enumerate(data): if isinstance(item, dict) and len(item) == 1: key = next(iter(item.keys())) @@ -177,7 +176,7 @@ def has_audit_ignore(content: str) -> bool: return '# audit-ignore' in content -def find_untranslated_text_values(node: Any) -> List[str]: +def find_untranslated_text_values(node: Any) -> list[str]: """ Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) that should be uppercase in translations. Returns list of the untranslated text values found. @@ -185,12 +184,12 @@ def find_untranslated_text_values(node: Any) -> List[str]: return [entry[1] for entry in find_untranslated_text_entries(node)] -def find_untranslated_text_entries(node: Any) -> List[Tuple[str, str, Optional[int]]]: +def find_untranslated_text_entries(node: Any) -> list[tuple[str, str, int | None]]: """ Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) and their line numbers. Returns list of (key, text, line_number) entries. Line number is 1-based when available. """ - entries: List[Tuple[str, str, Optional[int]]] = [] + entries: list[tuple[str, str, int | None]] = [] translation_keys = {"t", "ot", "ct", "spell", "pronounce", "ifthenelse"} def should_add(text: str) -> bool: @@ -213,12 +212,12 @@ def should_add(text: str) -> bool: return entries -def build_line_map(node: Any) -> Dict[str, List[int]]: +def build_line_map(node: Any) -> dict[str, list[int]]: """ Build a mapping of rule element types to line numbers. Line numbers are 1-based. Missing elements are omitted. """ - line_map: Dict[str, List[int]] = {} + line_map: dict[str, list[int]] = {} structure_tokens = { "test", "if", @@ -232,7 +231,7 @@ def build_line_map(node: Any) -> Dict[str, List[int]]: "intent", } - def add_line(kind: str, line: Optional[int]) -> None: + def add_line(kind: str, line: int | None) -> None: if line is None: return line_map.setdefault(kind, []).append(line) @@ -260,7 +259,8 @@ def normalize_match(value: Any) -> str: def normalize_xpath(value: str) -> str: return " ".join(value.split()) -def dedup_list(values: List[str]) -> List[str]: + +def dedup_list(values: list[str]) -> list[str]: """ Return a list without duplicates while preserving first-seen order. Originally, rule differences were stored as sets, losing their original order, @@ -270,8 +270,7 @@ def dedup_list(values: List[str]) -> List[str]: >>> dedup_list(["if:a", "if:b", "if:a"]) ['if:a', 'if:b'] """ - - return list(dict.fromkeys(values)) # dict preserves insertion order (guaranteed in Python 3.7+) + return list(dict.fromkeys(values)) # dict preserves insertion order (guaranteed in Python 3.7+) def extract_match_pattern(rule_data: Any) -> str: @@ -282,18 +281,18 @@ def extract_match_pattern(rule_data: Any) -> str: return "" -def extract_conditions(rule_data: Any) -> List[str]: +def extract_conditions(rule_data: Any) -> list[str]: """Extract all if/else conditions from a rule""" - conditions: List[str] = [] + conditions: list[str] = [] for key, child, _ in iter_field_matches(rule_data): if key in ("if", "else_if") and isinstance(child, str): conditions.append(child) return conditions -def extract_variables(rule_data: Any) -> List[Tuple[str, str]]: +def extract_variables(rule_data: Any) -> list[tuple[str, str]]: """Extract variable definitions from a rule""" - variables: List[Tuple[str, str]] = [] + variables: list[tuple[str, str]] = [] def add_from_value(value: Any) -> None: if isinstance(value, dict): @@ -311,9 +310,9 @@ def add_from_value(value: Any) -> None: return variables -def extract_structure_elements(rule_data: Any) -> List[str]: +def extract_structure_elements(rule_data: Any) -> list[str]: """Extract structural elements (test, with, replace blocks) ignoring text content""" - elements: List[str] = [] + elements: list[str] = [] tokens = {"test", "if", "else_if", "then", "else", "then_test", "else_test", "with", "replace", "intent"} for key, _, _ in iter_field_matches(rule_data): if key in tokens: @@ -321,7 +320,7 @@ def extract_structure_elements(rule_data: Any) -> List[str]: return elements -def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> List[RuleDifference]: +def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> list[RuleDifference]: """ Compare two rules and return fine-grained differences. Ignores text content differences (T/t values) but catches structural changes. diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index 0aaae5d8..ef19bf73 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -7,7 +7,7 @@ import json from pathlib import Path -from typing import Any, Dict, List, Tuple +from typing import Any from rich.console import Console from rich.markup import escape @@ -25,7 +25,7 @@ def rule_label(rule: RuleInfo) -> str: return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]" -def issue_type_sort_key(issue_type: str) -> Tuple[int, str]: +def issue_type_sort_key(issue_type: str) -> tuple[int, str]: """ Stable ordering for per-rule issue groups. @@ -179,9 +179,9 @@ def print_warnings( console.print(f" [dim]English: {result.english_rule_count} rules → Translated: {result.translated_rule_count} rules[/]") console.rule(style="cyan") - grouped_issues: Dict[str, Dict[str, Any]] = {} + grouped_issues: dict[str, dict[str, Any]] = {} - def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None: + def add_issue(rule: RuleInfo, issue_type: str, payload: dict[str, Any]) -> None: if rule.key not in grouped_issues: grouped_issues[rule.key] = {"rule": rule, "by_type": {}} grouped_issues[rule.key]["by_type"].setdefault(issue_type, []).append(payload) @@ -219,7 +219,7 @@ def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None: ) for group in grouped_issues.values(): rule = group["rule"] - by_type: Dict[str, List[Dict[str, Any]]] = group["by_type"] + by_type: dict[str, list[dict[str, Any]]] = group["by_type"] console.print(f" [dim]•[/] {rule_label(rule)}") for issue_type in sorted(by_type.keys(), key=issue_type_sort_key): entries = by_type[issue_type] From 500ce8744769a2df08adf468c62ec503acdbf65b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 3 Mar 2026 03:14:33 +0100 Subject: [PATCH 5/7] . --- PythonScripts/audit_translations/renderer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index ef19bf73..8728eeaf 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -82,7 +82,7 @@ def collect_issues( result: ComparisonResult, file_name: str, language: str, -) -> List[dict]: +) -> list[dict]: """ Flatten a ComparisonResult into one normalized dictionary per issue. From 06b5a0748f7e90a28ec9bb4d9f8403c09432391b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 3 Mar 2026 03:22:27 +0100 Subject: [PATCH 6/7] . --- PythonScripts/audit_translations/auditor.py | 11 +++------- PythonScripts/audit_translations/parsers.py | 22 +++++++++++--------- PythonScripts/audit_translations/renderer.py | 1 - 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index 5d49cbf2..845f46e4 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -21,14 +21,9 @@ __all__ = ["console"] -def normalize_language(language: str) -> str: - """Return a normalized language code (lowercase, '-' separators).""" - return language.lower().replace("_", "-") - - -def split_language(language: str) -> tuple[str, str | None]: +def split_language_into_base_and_region(language: str) -> tuple[str, str | None]: """Split a language code into base and optional region.""" - normalized = normalize_language(language) + normalized = language.lower().replace("_", "-") if "-" in normalized: base, region = normalized.split("-", 1) return base, region or None @@ -163,7 +158,7 @@ def audit_language( rules_dir_path = get_rules_dir(rules_dir) english_dir = rules_dir_path / "en" - base_language, region = split_language(language) + base_language, region = split_language_into_base_and_region(language) translated_dir = rules_dir_path / base_language translated_region_dir = translated_dir / region if region else None english_region_dir = english_dir / region if region else None diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index 352de209..7c7e4669 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -4,7 +4,7 @@ Handles parsing of rule files and unicode files to extract rule information. """ -import os +from pathlib import Path from typing import Any, Iterator from jsonpath_ng.ext import parse @@ -14,10 +14,16 @@ from .dataclasses import RuleInfo, RuleDifference +_yaml = YAML() +_yaml.preserve_quotes = True + +_ALL_FIELDS_EXPR = parse('$..*') # '..' is recursive descent +_MATCH_EXPR = parse('$.match') + def is_unicode_file(file_path: str) -> bool: """Check if this is a unicode.yaml or unicode-full.yaml file""" - basename = os.path.basename(file_path) + basename = Path(file_path).name return basename in ("unicode.yaml", "unicode-full.yaml") @@ -32,16 +38,14 @@ def parse_yaml_file(file_path: str, strict: bool = False) -> tuple[list[RuleInfo with open(file_path, 'r', encoding='utf-8') as f: content = f.read() - yaml = YAML() - yaml.preserve_quotes = True try: - data = yaml.load(content) + data = _yaml.load(content) except ScannerError as exc: if strict: raise exc if "\t" in content: sanitized = content.replace("\t", " ") - data = yaml.load(sanitized) + data = _yaml.load(sanitized) else: raise exc @@ -88,9 +92,7 @@ def iter_field_matches(node: Any) -> Iterator[tuple[str, Any, Any]]: Returns tuples of (key, child_value, parent_mapping) in traversal order. """ - all_fields_expr = parse('$..*') # '..' is recursive descent - - for match in all_fields_expr.find(node): + for match in _ALL_FIELDS_EXPR.find(node): path = match.path if isinstance(path, Fields) and len(path.fields) == 1: key = path.fields[0] @@ -275,7 +277,7 @@ def dedup_list(values: list[str]) -> list[str]: def extract_match_pattern(rule_data: Any) -> str: if isinstance(rule_data, dict): - matches = parse('$.match').find(rule_data) + matches = _MATCH_EXPR.find(rule_data) if matches: return normalize_match(matches[0].value) return "" diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index 8728eeaf..399d2d07 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -5,7 +5,6 @@ and the IssueWriter interface. """ -import json from pathlib import Path from typing import Any From 0c0a77c22eb943bc0e50c835f2eb0842e2fb9bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Wed, 4 Mar 2026 02:32:13 +0100 Subject: [PATCH 7/7] apply ruff formatting --- PythonScripts/audit_translations/__init__.py | 6 +- PythonScripts/audit_translations/auditor.py | 2 +- PythonScripts/audit_translations/cli.py | 7 +- .../audit_translations/dataclasses.py | 11 +- PythonScripts/audit_translations/parsers.py | 136 +++++++++--------- PythonScripts/audit_translations/renderer.py | 32 ++--- .../audit_translations/tests/test_auditor.py | 11 +- .../tests/test_cli_end_to_end.py | 8 +- .../tests/test_line_resolver.py | 9 +- .../tests/test_output_jsonl.py | 3 +- .../audit_translations/tests/test_parsers.py | 1 + PythonScripts/pyproject.toml | 12 +- PythonScripts/uv.lock | 31 +++- 13 files changed, 154 insertions(+), 115 deletions(-) diff --git a/PythonScripts/audit_translations/__init__.py b/PythonScripts/audit_translations/__init__.py index 194b7acf..d17aae02 100644 --- a/PythonScripts/audit_translations/__init__.py +++ b/PythonScripts/audit_translations/__init__.py @@ -7,10 +7,12 @@ Read README.md for more details. """ + import sys -sys.stdout.reconfigure(encoding='utf-8') + +sys.stdout.reconfigure(encoding="utf-8") from .cli import main __all__ = [ - 'main', + "main", ] diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index 845f46e4..7ad8a216 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -141,7 +141,7 @@ def merge_rules(base_rules: list[RuleInfo], region_rules: list[RuleInfo]) -> lis rule_differences=rule_differences, file_path=translated_path, english_rule_count=len(english_rules), - translated_rule_count=len(translated_rules) + translated_rule_count=len(translated_rules), ) diff --git a/PythonScripts/audit_translations/cli.py b/PythonScripts/audit_translations/cli.py index 06c3dd7a..edeb190d 100644 --- a/PythonScripts/audit_translations/cli.py +++ b/PythonScripts/audit_translations/cli.py @@ -21,7 +21,7 @@ def main(): uv run audit-translations es uv run audit-translations de --file SharedRules/default.yaml uv run audit-translations --list - """ + """, ) parser.add_argument("language", nargs="?", help="Language code to audit (e.g., 'es', 'de', 'fi')") @@ -61,10 +61,7 @@ def main(): allowed = {"missing", "untranslated", "extra", "diffs"} unknown = set(tokens) - allowed if unknown: - console.print( - "\n[red]Error:[/] Unknown issue types: " - + ", ".join(sorted(unknown)) - ) + console.print("\n[red]Error:[/] Unknown issue types: " + ", ".join(sorted(unknown))) sys.exit(1) issue_filter = set(tokens) diff --git a/PythonScripts/audit_translations/dataclasses.py b/PythonScripts/audit_translations/dataclasses.py index cdd2ca88..98a7098e 100644 --- a/PythonScripts/audit_translations/dataclasses.py +++ b/PythonScripts/audit_translations/dataclasses.py @@ -38,9 +38,10 @@ class RuleInfo: audit_ignore : bool True if the raw content contains an audit-ignore marker. """ + name: str | None # None for unicode entries - tag: str | None # None for unicode entries - key: str # For unicode entries, this is the character/range + tag: str | None # None for unicode entries + key: str # For unicode entries, this is the character/range line_number: int raw_content: str data: Any | None = None @@ -60,6 +61,7 @@ def untranslated_keys(self) -> list[str]: @dataclass class RuleDifference: """Fine-grained difference between English and translated rule""" + english_rule: RuleInfo translated_rule: RuleInfo diff_type: str # 'match', 'condition', 'structure', 'variables' @@ -71,8 +73,9 @@ class RuleDifference: @dataclass class ComparisonResult: """Results from comparing English and translated files""" - missing_rules: list[RuleInfo] # Rules in English but not in translation - extra_rules: list[RuleInfo] # Rules in translation but not in English + + missing_rules: list[RuleInfo] # Rules in English but not in translation + extra_rules: list[RuleInfo] # Rules in translation but not in English untranslated_text: list[tuple[RuleInfo, list[tuple[str, str, int | None]]]] # Rules with lowercase t/ot/ct file_path: str english_rule_count: int diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index 7c7e4669..eda0a375 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -17,8 +17,8 @@ _yaml = YAML() _yaml.preserve_quotes = True -_ALL_FIELDS_EXPR = parse('$..*') # '..' is recursive descent -_MATCH_EXPR = parse('$.match') +_ALL_FIELDS_EXPR = parse("$..*") # '..' is recursive descent +_MATCH_EXPR = parse("$.match") def is_unicode_file(file_path: str) -> bool: @@ -35,7 +35,7 @@ def parse_yaml_file(file_path: str, strict: bool = False) -> tuple[list[RuleInfo For standard rule files: extracts rules with name/tag For unicode files: extracts entries with character/range keys """ - with open(file_path, 'r', encoding='utf-8') as f: + with open(file_path, "r", encoding="utf-8") as f: content = f.read() try: @@ -122,17 +122,19 @@ def parse_rules_file(content: str, data: Any) -> list[RuleInfo]: rule_name = str(item.get("name")) tag = format_tag(item.get("tag")) rule_key = f"{rule_name}|{tag or 'unknown'}" - rules.append(RuleInfo( - name=rule_name, - tag=tag, - key=rule_key, - line_number=line_idx + 1, - raw_content=raw_content, - data=item, - untranslated_entries=find_untranslated_text_entries(item), - line_map=build_line_map(item), - audit_ignore=has_audit_ignore(raw_content) - )) + rules.append( + RuleInfo( + name=rule_name, + tag=tag, + key=rule_key, + line_number=line_idx + 1, + raw_content=raw_content, + data=item, + untranslated_entries=find_untranslated_text_entries(item), + line_map=build_line_map(item), + audit_ignore=has_audit_ignore(raw_content), + ) + ) return rules @@ -158,24 +160,26 @@ def parse_unicode_file(content: str, data: Any) -> list[RuleInfo]: raw_blocks = build_raw_blocks(lines, start_lines) for (char_key, value), raw_content, line_idx in zip(entries, raw_blocks, start_lines): - rules.append(RuleInfo( - name=None, - tag=None, - key=char_key, - line_number=line_idx + 1, - raw_content=raw_content, - data=value, - untranslated_entries=find_untranslated_text_entries(value), - line_map=build_line_map(value), - audit_ignore=has_audit_ignore(raw_content) - )) + rules.append( + RuleInfo( + name=None, + tag=None, + key=char_key, + line_number=line_idx + 1, + raw_content=raw_content, + data=value, + untranslated_entries=find_untranslated_text_entries(value), + line_map=build_line_map(value), + audit_ignore=has_audit_ignore(raw_content), + ) + ) return rules def has_audit_ignore(content: str) -> bool: """Check if the rule content contains an audit-ignore comment""" - return '# audit-ignore' in content + return "# audit-ignore" in content def find_untranslated_text_values(node: Any) -> list[str]: @@ -199,16 +203,12 @@ def should_add(text: str) -> bool: return False if len(text) == 1 and not text.isalpha(): return False - if text.startswith('$') or text.startswith('@'): + if text.startswith("$") or text.startswith("@"): return False return True for key, child, parent in iter_field_matches(node): - if ( - key.lower() in translation_keys - and not key.isupper() - and isinstance(child, str) - ): + if key.lower() in translation_keys and not key.isupper() and isinstance(child, str): if should_add(child): entries.append((key, child, mapping_key_line(parent, key))) return entries @@ -334,14 +334,16 @@ def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> list[RuleDi en_match = normalize_xpath(en_match_raw) translated_match = normalize_xpath(translated_match_raw) if en_match != translated_match and en_match and translated_match: - differences.append(RuleDifference( - english_rule=english_rule, - translated_rule=translated_rule, - diff_type='match', - description='Match pattern differs', - english_snippet=en_match, - translated_snippet=translated_match - )) + differences.append( + RuleDifference( + english_rule=english_rule, + translated_rule=translated_rule, + diff_type="match", + description="Match pattern differs", + english_snippet=en_match, + translated_snippet=translated_match, + ) + ) # Check condition differences en_conditions_raw = extract_conditions(english_rule.data) @@ -352,14 +354,16 @@ def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> list[RuleDi # Find specific differences en_set, tr_set = set(en_conditions), set(tr_conditions) if en_set != tr_set: - differences.append(RuleDifference( - english_rule=english_rule, - translated_rule=translated_rule, - diff_type='condition', - description='Conditions differ', - english_snippet=', '.join(dedup_list(en_conditions)) or '(none)', - translated_snippet=', '.join(dedup_list(tr_conditions)) or '(none)' - )) + differences.append( + RuleDifference( + english_rule=english_rule, + translated_rule=translated_rule, + diff_type="condition", + description="Conditions differ", + english_snippet=", ".join(dedup_list(en_conditions)) or "(none)", + translated_snippet=", ".join(dedup_list(tr_conditions)) or "(none)", + ) + ) # Check variable differences en_vars = extract_variables(english_rule.data) @@ -368,26 +372,30 @@ def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> list[RuleDi en_var_names = {v[0] for v in en_vars} tr_var_names = {v[0] for v in tr_vars} if en_var_names != tr_var_names: - differences.append(RuleDifference( - english_rule=english_rule, - translated_rule=translated_rule, - diff_type='variables', - description='Variable definitions differ', - english_snippet=', '.join(sorted(en_var_names)) or '(none)', - translated_snippet=', '.join(sorted(tr_var_names)) or '(none)' - )) + differences.append( + RuleDifference( + english_rule=english_rule, + translated_rule=translated_rule, + diff_type="variables", + description="Variable definitions differ", + english_snippet=", ".join(sorted(en_var_names)) or "(none)", + translated_snippet=", ".join(sorted(tr_var_names)) or "(none)", + ) + ) # Check structural differences (test/if/then/else blocks) en_structure = extract_structure_elements(english_rule.data) tr_structure = extract_structure_elements(translated_rule.data) if en_structure != tr_structure: - differences.append(RuleDifference( - english_rule=english_rule, - translated_rule=translated_rule, - diff_type='structure', - description='Rule structure differs (test/if/then/else blocks)', - english_snippet=' '.join(en_structure), - translated_snippet=' '.join(tr_structure) - )) + differences.append( + RuleDifference( + english_rule=english_rule, + translated_rule=translated_rule, + diff_type="structure", + description="Rule structure differs (test/if/then/else blocks)", + english_snippet=" ".join(en_structure), + translated_snippet=" ".join(tr_structure), + ) + ) return differences diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index 399d2d07..14779df8 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -19,7 +19,7 @@ def rule_label(rule: RuleInfo) -> str: if rule.name is None: - return f"[yellow]\"{escape(rule.key)}\"[/]" + return f'[yellow]"{escape(rule.key)}"[/]' tag = rule.tag or "unknown" return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]" @@ -170,8 +170,13 @@ def print_warnings( if not result.has_issues: return issues - style, icon = ("green", "✓") if result.translated_rule_count == result.english_rule_count else \ - ("red", "✗") if result.translated_rule_count == 0 else ("yellow", "⚠") + style, icon = ( + ("green", "✓") + if result.translated_rule_count == result.english_rule_count + else ("red", "✗") + if result.translated_rule_count == 0 + else ("yellow", "⚠") + ) console.print() console.rule(style="cyan") console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]") @@ -207,11 +212,7 @@ def add_issue(rule: RuleInfo, issue_type: str, payload: dict[str, Any]) -> None: add_issue(rule, "extra_rule", {"line_tr": rule.line_number}) if grouped_issues: - total_grouped_issues = sum( - len(entries) - for group in grouped_issues.values() - for entries in group["by_type"].values() - ) + total_grouped_issues = sum(len(entries) for group in grouped_issues.values() for entries in group["by_type"].values()) console.print( f"\n [magenta]≠[/] [bold]Rule Issues[/] " f"[[magenta]{total_grouped_issues}[/]] [dim](grouped by rule and issue type)[/]" @@ -222,25 +223,18 @@ def add_issue(rule: RuleInfo, issue_type: str, payload: dict[str, Any]) -> None: console.print(f" [dim]•[/] {rule_label(rule)}") for issue_type in sorted(by_type.keys(), key=issue_type_sort_key): entries = by_type[issue_type] - console.print( - f" [dim]{issue_type_label(issue_type)} " - f"[{len(entries)}][/]" - ) + console.print(f" [dim]{issue_type_label(issue_type)} [{len(entries)}][/]") for entry in entries: if issue_type == "missing_rule": - console.print( - f" [dim]•[/] [dim](line {entry['line_en']} in English)[/]" - ) + console.print(f" [dim]•[/] [dim](line {entry['line_en']} in English)[/]") issues += 1 elif issue_type == "extra_rule": - console.print( - f" [dim]•[/] [dim](line {entry['line_tr']} in {target_label})[/]" - ) + console.print(f" [dim]•[/] [dim](line {entry['line_tr']} in {target_label})[/]") issues += 1 elif issue_type == "untranslated_text": console.print( f" [dim]•[/] [dim](line {entry['line_tr']} {target_label})[/] " - f"[yellow]\"{escape(entry['text'])}\"[/]" + f'[yellow]"{escape(entry["text"])}"[/]' ) issues += 1 else: diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index fa60b143..7e00ae3f 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -337,8 +337,7 @@ def test_missing_else_block_is_still_reported() -> None: # CRITICAL: This legitimate difference should still be reported # One file has else:, the other doesn't - a clear missing element assert len(structure_issues) == 1, ( - "Expected missing else block to be reported, " - f"but found {len(structure_issues)} structure issues" + f"Expected missing else block to be reported, but found {len(structure_issues)} structure issues" ) # Verify the issue anchors to the last shared structure token ('then:') @@ -475,9 +474,7 @@ def test_print_warnings_shows_misaligned_structures() -> None: output = capture.get() # Misaligned structure differences should be rendered. - assert "Rule structure differs" in output, ( - "Expected misaligned structure differences to be shown in display" - ) + assert "Rule structure differs" in output, "Expected misaligned structure differences to be shown in display" # The issues count should include both condition + structure differences. condition_diffs = [diff for diff in result.rule_differences if diff.diff_type == "condition"] @@ -504,9 +501,7 @@ def test_print_warnings_still_shows_missing_else() -> None: output = capture.get() # CRITICAL: This legitimate difference should appear in output - assert "Rule structure differs" in output, ( - "Expected missing else block to be shown in output" - ) + assert "Rule structure differs" in output, "Expected missing else block to be shown in output" # Should report exactly 1 issue (the structure difference) assert issues_count == 1, f"Expected 1 issue but got {issues_count}" diff --git a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py index b9b49790..5fa6f137 100644 --- a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py +++ b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py @@ -58,9 +58,7 @@ def test_cli_module_jsonl_output_matches_fixture() -> None: python_scripts_dir = Path(__file__).resolve().parents[2] env = os.environ.copy() - env["PYTHONPATH"] = os.pathsep.join( - [str(python_scripts_dir), env.get("PYTHONPATH", "")] - ).strip(os.pathsep) + env["PYTHONPATH"] = os.pathsep.join([str(python_scripts_dir), env.get("PYTHONPATH", "")]).strip(os.pathsep) result = subprocess.run( [sys.executable, "-m", "audit_translations", *args], @@ -234,9 +232,7 @@ def test_cli_module_rich_output_groups_by_rule_and_type() -> None: python_scripts_dir = Path(__file__).resolve().parents[2] env = os.environ.copy() - env["PYTHONPATH"] = os.pathsep.join( - [str(python_scripts_dir), env.get("PYTHONPATH", "")] - ).strip(os.pathsep) + env["PYTHONPATH"] = os.pathsep.join([str(python_scripts_dir), env.get("PYTHONPATH", "")]).strip(os.pathsep) env["COLUMNS"] = "80" result = subprocess.run( diff --git a/PythonScripts/audit_translations/tests/test_line_resolver.py b/PythonScripts/audit_translations/tests/test_line_resolver.py index 49d57cdc..9c48ccee 100644 --- a/PythonScripts/audit_translations/tests/test_line_resolver.py +++ b/PythonScripts/audit_translations/tests/test_line_resolver.py @@ -44,9 +44,12 @@ def test_resolve_diff_lines_uses_line_map_and_falls_back_to_rule_line(): def make_match_diff(en, tr): return RuleDifference( - english_rule=en, translated_rule=tr, - diff_type="match", description="Match differs", - english_snippet="a", translated_snippet="b", + english_rule=en, + translated_rule=tr, + diff_type="match", + description="Match differs", + english_snippet="a", + translated_snippet="b", ) assert resolve_diff_lines(make_match_diff(en_with_map, tr_with_map)) == (10, 20) diff --git a/PythonScripts/audit_translations/tests/test_output_jsonl.py b/PythonScripts/audit_translations/tests/test_output_jsonl.py index e636fb27..a3baa3d4 100644 --- a/PythonScripts/audit_translations/tests/test_output_jsonl.py +++ b/PythonScripts/audit_translations/tests/test_output_jsonl.py @@ -11,10 +11,11 @@ def load_jsonl(text: str) -> list[dict]: return [json.loads(line) for line in text.splitlines() if line.strip()] + def load_json_array(path: Path) -> list[dict]: items = json.loads(path.read_text(encoding="utf-8")) for item in items: - item.pop("_explanation", None) # only for humans + item.pop("_explanation", None) # only for humans return items diff --git a/PythonScripts/audit_translations/tests/test_parsers.py b/PythonScripts/audit_translations/tests/test_parsers.py index 52d3b6aa..13f13dec 100644 --- a/PythonScripts/audit_translations/tests/test_parsers.py +++ b/PythonScripts/audit_translations/tests/test_parsers.py @@ -1,6 +1,7 @@ """ Tests for parsers.py. """ + from typing import List import pytest diff --git a/PythonScripts/pyproject.toml b/PythonScripts/pyproject.toml index eb2d4102..13d0b08e 100644 --- a/PythonScripts/pyproject.toml +++ b/PythonScripts/pyproject.toml @@ -25,7 +25,10 @@ dependencies = [ audit-translations = "audit_translations.cli:main" [dependency-groups] -dev = ["pytest"] +dev = [ + "pytest", + "ruff>=0.15.4", +] [build-system] requires = ["uv_build>=0.9.25,<0.10.0"] @@ -34,3 +37,10 @@ build-backend = "uv_build" [tool.uv.build-backend] module-name = "audit_translations" module-root = "" + +[tool.ruff] +target-version = "py314" +line-length = 130 # easier for some files. maybe decrease in the future + +[tool.ruff.format] +docstring-code-format = true diff --git a/PythonScripts/uv.lock b/PythonScripts/uv.lock index bddb870a..9b09e2b8 100644 --- a/PythonScripts/uv.lock +++ b/PythonScripts/uv.lock @@ -297,6 +297,7 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "pytest" }, + { name = "ruff" }, ] [package.metadata] @@ -313,7 +314,10 @@ requires-dist = [ ] [package.metadata.requires-dev] -dev = [{ name = "pytest" }] +dev = [ + { name = "pytest" }, + { name = "ruff", specifier = ">=0.15.4" }, +] [[package]] name = "pyyaml" @@ -387,6 +391,31 @@ version = "0.3.4" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/8f/95/9bcc25e84703180c3941062796572e0fc73bd659086efdc4ef9b8af19e36/ruamel_yaml_clibz-0.3.4.tar.gz", hash = "sha256:e99077ac6aa4943af1000161a0cb793a379c5c8cd03ea8dd3803e0b58739b685", size = 231076, upload-time = "2025-12-31T17:11:09.341Z" } +[[package]] +name = "ruff" +version = "0.15.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/31/d6e536cdebb6568ae75a7f00e4b4819ae0ad2640c3604c305a0428680b0c/ruff-0.15.4.tar.gz", hash = "sha256:3412195319e42d634470cc97aa9803d07e9d5c9223b99bcb1518f0c725f26ae1", size = 4569550, upload-time = "2026-02-26T20:04:14.959Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/82/c11a03cfec3a4d26a0ea1e571f0f44be5993b923f905eeddfc397c13d360/ruff-0.15.4-py3-none-linux_armv6l.whl", hash = "sha256:a1810931c41606c686bae8b5b9a8072adac2f611bb433c0ba476acba17a332e0", size = 10453333, upload-time = "2026-02-26T20:04:20.093Z" }, + { url = "https://files.pythonhosted.org/packages/ce/5d/6a1f271f6e31dffb31855996493641edc3eef8077b883eaf007a2f1c2976/ruff-0.15.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5a1632c66672b8b4d3e1d1782859e98d6e0b4e70829530666644286600a33992", size = 10853356, upload-time = "2026-02-26T20:04:05.808Z" }, + { url = "https://files.pythonhosted.org/packages/b1/d8/0fab9f8842b83b1a9c2bf81b85063f65e93fb512e60effa95b0be49bfc54/ruff-0.15.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4386ba2cd6c0f4ff75252845906acc7c7c8e1ac567b7bc3d373686ac8c222ba", size = 10187434, upload-time = "2026-02-26T20:03:54.656Z" }, + { url = "https://files.pythonhosted.org/packages/85/cc/cc220fd9394eff5db8d94dec199eec56dd6c9f3651d8869d024867a91030/ruff-0.15.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2496488bdfd3732747558b6f95ae427ff066d1fcd054daf75f5a50674411e75", size = 10535456, upload-time = "2026-02-26T20:03:52.738Z" }, + { url = "https://files.pythonhosted.org/packages/fa/0f/bced38fa5cf24373ec767713c8e4cadc90247f3863605fb030e597878661/ruff-0.15.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f1c4893841ff2d54cbda1b2860fa3260173df5ddd7b95d370186f8a5e66a4ac", size = 10287772, upload-time = "2026-02-26T20:04:08.138Z" }, + { url = "https://files.pythonhosted.org/packages/2b/90/58a1802d84fed15f8f281925b21ab3cecd813bde52a8ca033a4de8ab0e7a/ruff-0.15.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:820b8766bd65503b6c30aaa6331e8ef3a6e564f7999c844e9a547c40179e440a", size = 11049051, upload-time = "2026-02-26T20:04:03.53Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ac/b7ad36703c35f3866584564dc15f12f91cb1a26a897dc2fd13d7cb3ae1af/ruff-0.15.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9fb74bab47139c1751f900f857fa503987253c3ef89129b24ed375e72873e85", size = 11890494, upload-time = "2026-02-26T20:04:10.497Z" }, + { url = "https://files.pythonhosted.org/packages/93/3d/3eb2f47a39a8b0da99faf9c54d3eb24720add1e886a5309d4d1be73a6380/ruff-0.15.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f80c98765949c518142b3a50a5db89343aa90f2c2bf7799de9986498ae6176db", size = 11326221, upload-time = "2026-02-26T20:04:12.84Z" }, + { url = "https://files.pythonhosted.org/packages/ff/90/bf134f4c1e5243e62690e09d63c55df948a74084c8ac3e48a88468314da6/ruff-0.15.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451a2e224151729b3b6c9ffb36aed9091b2996fe4bdbd11f47e27d8f2e8888ec", size = 11168459, upload-time = "2026-02-26T20:04:00.969Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e5/a64d27688789b06b5d55162aafc32059bb8c989c61a5139a36e1368285eb/ruff-0.15.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a8f157f2e583c513c4f5f896163a93198297371f34c04220daf40d133fdd4f7f", size = 11104366, upload-time = "2026-02-26T20:03:48.099Z" }, + { url = "https://files.pythonhosted.org/packages/f1/f6/32d1dcb66a2559763fc3027bdd65836cad9eb09d90f2ed6a63d8e9252b02/ruff-0.15.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:917cc68503357021f541e69b35361c99387cdbbf99bd0ea4aa6f28ca99ff5338", size = 10510887, upload-time = "2026-02-26T20:03:45.771Z" }, + { url = "https://files.pythonhosted.org/packages/ff/92/22d1ced50971c5b6433aed166fcef8c9343f567a94cf2b9d9089f6aa80fe/ruff-0.15.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e9737c8161da79fd7cfec19f1e35620375bd8b2a50c3e77fa3d2c16f574105cc", size = 10285939, upload-time = "2026-02-26T20:04:22.42Z" }, + { url = "https://files.pythonhosted.org/packages/e6/f4/7c20aec3143837641a02509a4668fb146a642fd1211846634edc17eb5563/ruff-0.15.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:291258c917539e18f6ba40482fe31d6f5ac023994ee11d7bdafd716f2aab8a68", size = 10765471, upload-time = "2026-02-26T20:03:58.924Z" }, + { url = "https://files.pythonhosted.org/packages/d0/09/6d2f7586f09a16120aebdff8f64d962d7c4348313c77ebb29c566cefc357/ruff-0.15.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3f83c45911da6f2cd5936c436cf86b9f09f09165f033a99dcf7477e34041cbc3", size = 11263382, upload-time = "2026-02-26T20:04:24.424Z" }, + { url = "https://files.pythonhosted.org/packages/1b/fa/2ef715a1cd329ef47c1a050e10dee91a9054b7ce2fcfdd6a06d139afb7ec/ruff-0.15.4-py3-none-win32.whl", hash = "sha256:65594a2d557d4ee9f02834fcdf0a28daa8b3b9f6cb2cb93846025a36db47ef22", size = 10506664, upload-time = "2026-02-26T20:03:50.56Z" }, + { url = "https://files.pythonhosted.org/packages/d0/a8/c688ef7e29983976820d18710f955751d9f4d4eb69df658af3d006e2ba3e/ruff-0.15.4-py3-none-win_amd64.whl", hash = "sha256:04196ad44f0df220c2ece5b0e959c2f37c777375ec744397d21d15b50a75264f", size = 11651048, upload-time = "2026-02-26T20:04:17.191Z" }, + { url = "https://files.pythonhosted.org/packages/3e/0a/9e1be9035b37448ce2e68c978f0591da94389ade5a5abafa4cf99985d1b2/ruff-0.15.4-py3-none-win_arm64.whl", hash = "sha256:60d5177e8cfc70e51b9c5fad936c634872a74209f934c1e79107d11787ad5453", size = 10966776, upload-time = "2026-02-26T20:03:56.908Z" }, +] + [[package]] name = "soupsieve" version = "2.8.3"