Retrieval-based-Voice-Conve.../i18n/scan_i18n.py

import ast
import glob
import json
from collections import OrderedDict


def extract_i18n_strings(node):
    i18n_strings = []

    if (
        isinstance(node, ast.Call)
        and isinstance(node.func, ast.Name)
        and node.func.id == "i18n"
    ):
        for arg in node.args:
            if isinstance(arg, ast.Str):
                i18n_strings.append(arg.s)

    for child_node in ast.iter_child_nodes(node):
        i18n_strings.extend(extract_i18n_strings(child_node))

    return i18n_strings


# scan the directory for all .py files (recursively)
# for each file, parse the code into an AST
# for each AST, extract the i18n strings

strings = []
for filename in glob.iglob("**/*.py", recursive=True):
    with open(filename, "r") as f:
        code = f.read()
        if "I18nAuto" in code:
            tree = ast.parse(code)
            i18n_strings = extract_i18n_strings(tree)
            print(filename, len(i18n_strings))
            strings.extend(i18n_strings)
code_keys = set(strings)
"""
n_i18n.py
gui_v1.py 26
app.py 16
infer-web.py 147
scan_i18n.py 0
i18n.py 0
lib/train/process_ckpt.py 1
"""
print()
print("Total unique:", len(code_keys))


standard_file = "i18n/locale/zh_CN.json"
with open(standard_file, "r", encoding="utf-8") as f:
    standard_data = json.load(f, object_pairs_hook=OrderedDict)
standard_keys = set(standard_data.keys())

# Define the standard file name
unused_keys = standard_keys - code_keys
print("Unused keys:", len(unused_keys))
for unused_key in unused_keys:
    print("\t", unused_key)

missing_keys = code_keys - standard_keys
print("Missing keys:", len(missing_keys))
for missing_key in missing_keys:
    print("\t", missing_key)

code_keys_dict = OrderedDict()
for s in strings:
    code_keys_dict[s] = s

# write back
with open(standard_file, "w", encoding="utf-8") as f:
    json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True)
    f.write("\n")
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`import ast`
			`import glob`
			`import json`
			`from collections import OrderedDict`

Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`def extract_i18n_strings(node):`
			`i18n_strings = []`

Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`if (`
			`isinstance(node, ast.Call)`
			`and isinstance(node.func, ast.Name)`
			`and node.func.id == "i18n"`
			`):`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`for arg in node.args:`
			`if isinstance(arg, ast.Str):`
			`i18n_strings.append(arg.s)`

			`for child_node in ast.iter_child_nodes(node):`
			`i18n_strings.extend(extract_i18n_strings(child_node))`

			`return i18n_strings`

Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`# scan the directory for all .py files (recursively)`
			`# for each file, parse the code into an AST`
			`# for each AST, extract the i18n strings`

			`strings = []`
Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`for filename in glob.iglob("*/.py", recursive=True):`
			`with open(filename, "r") as f:`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`code = f.read()`
			`if "I18nAuto" in code:`
			`tree = ast.parse(code)`
			`i18n_strings = extract_i18n_strings(tree)`
			`print(filename, len(i18n_strings))`
			`strings.extend(i18n_strings)`
			`code_keys = set(strings)`
Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`"""`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`n_i18n.py`
			`gui_v1.py 26`
			`app.py 16`
			`infer-web.py 147`
			`scan_i18n.py 0`
			`i18n.py 0`
			`lib/train/process_ckpt.py 1`
Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`"""`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`print()`
Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`print("Total unique:", len(code_keys))`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00

fix genlocale 2023-08-29 08:41:19 +02:00			`standard_file = "i18n/locale/zh_CN.json"`
			`with open(standard_file, "r", encoding="utf-8") as f:`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`standard_data = json.load(f, object_pairs_hook=OrderedDict)`
			`standard_keys = set(standard_data.keys())`

			`# Define the standard file name`
			`unused_keys = standard_keys - code_keys`
Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`print("Unused keys:", len(unused_keys))`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`for unused_key in unused_keys:`
Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`print("\t", unused_key)`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00
			`missing_keys = code_keys - standard_keys`
Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`print("Missing keys:", len(missing_keys))`
add scan_i18n to detect unused and missing keys (#1058) 2023-08-21 10:28:41 +02:00			`for missing_key in missing_keys:`
Format code (#1024) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-21 10:29:27 +02:00			`print("\t", missing_key)`
optimize: use scan_i18n -> extract_locale(#1058) 2023-08-26 19:01:43 +02:00
			`code_keys_dict = OrderedDict()`
Format code (#1096) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-26 19:02:39 +02:00			`for s in strings:`
			`code_keys_dict[s] = s`
optimize: use scan_i18n -> extract_locale(#1058) 2023-08-26 19:01:43 +02:00
			`# write back`
fix genlocale 2023-08-29 08:41:19 +02:00			`with open(standard_file, "w", encoding="utf-8") as f:`
optimize: use scan_i18n -> extract_locale(#1058) 2023-08-26 19:01:43 +02:00			`json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True)`
Format code (#1096) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-26 19:02:39 +02:00			`f.write("\n")`