Retrieval-based-Voice-Conve.../tools/calc_rvc_model_similarity.py

# This code references https://huggingface.co/JosephusCheung/ASimilarityCalculatior/blob/main/qwerty.py
# Fill in the path of the model to be queried and the root directory of the reference models, and this script will return the similarity between the model to be queried and all reference models.
import os
import logging

logger = logging.getLogger(__name__)

import torch
import torch.nn as nn
import torch.nn.functional as F


def cal_cross_attn(to_q, to_k, to_v, rand_input):
    hidden_dim, embed_dim = to_q.shape
    attn_to_q = nn.Linear(hidden_dim, embed_dim, bias=False)
    attn_to_k = nn.Linear(hidden_dim, embed_dim, bias=False)
    attn_to_v = nn.Linear(hidden_dim, embed_dim, bias=False)
    attn_to_q.load_state_dict({"weight": to_q})
    attn_to_k.load_state_dict({"weight": to_k})
    attn_to_v.load_state_dict({"weight": to_v})

    return torch.einsum(
        "ik, jk -> ik",
        F.softmax(
            torch.einsum("ij, kj -> ik", attn_to_q(rand_input), attn_to_k(rand_input)),
            dim=-1,
        ),
        attn_to_v(rand_input),
    )


def model_hash(filename):
    try:
        with open(filename, "rb") as file:
            import hashlib

            m = hashlib.sha256()

            file.seek(0x100000)
            m.update(file.read(0x10000))
            return m.hexdigest()[0:8]
    except FileNotFoundError:
        return "NOFILE"


def eval(model, n, input):
    qk = f"enc_p.encoder.attn_layers.{n}.conv_q.weight"
    uk = f"enc_p.encoder.attn_layers.{n}.conv_k.weight"
    vk = f"enc_p.encoder.attn_layers.{n}.conv_v.weight"
    atoq, atok, atov = model[qk][:, :, 0], model[uk][:, :, 0], model[vk][:, :, 0]

    attn = cal_cross_attn(atoq, atok, atov, input)
    return attn


def main(path, root):
    torch.manual_seed(114514)
    model_a = torch.load(path, map_location="cpu")["weight"]

    logger.info("Query:\t\t%s\t%s" % (path, model_hash(path)))

    map_attn_a = {}
    map_rand_input = {}
    for n in range(6):
        hidden_dim, embed_dim, _ = model_a[
            f"enc_p.encoder.attn_layers.{n}.conv_v.weight"
        ].shape
        rand_input = torch.randn([embed_dim, hidden_dim])

        map_attn_a[n] = eval(model_a, n, rand_input)
        map_rand_input[n] = rand_input

    del model_a

    for name in sorted(list(os.listdir(root))):
        path = "%s/%s" % (root, name)
        model_b = torch.load(path, map_location="cpu")["weight"]

        sims = []
        for n in range(6):
            attn_a = map_attn_a[n]
            attn_b = eval(model_b, n, map_rand_input[n])

            sim = torch.mean(torch.cosine_similarity(attn_a, attn_b))
            sims.append(sim)

        logger.info(
            "Reference:\t%s\t%s\t%s"
            % (path, model_hash(path), f"{torch.mean(torch.stack(sims)) * 1e2:.2f}%")
        )


if __name__ == "__main__":
    query_path = r"assets\weights\mi v3.pth"
    reference_root = r"assets\weights"
    main(query_path, reference_root)
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`# This code references https://huggingface.co/JosephusCheung/ASimilarityCalculatior/blob/main/qwerty.py`
			`# Fill in the path of the model to be queried and the root directory of the reference models, and this script will return the similarity between the model to be queried and all reference models.`
format 2023-08-28 09:08:31 +02:00			`import os`
fix: 卸载音色省显存顺便将所有print换成了统一的logger 2023-09-01 09:18:08 +02:00			`import logging`
Format code (#1162) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-09-02 05:50:52 +02:00
fix: 卸载音色省显存顺便将所有print换成了统一的logger 2023-09-01 09:18:08 +02:00			`logger = logging.getLogger(__name__)`
format 2023-08-28 09:08:31 +02:00
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`import torch`
			`import torch.nn as nn`
			`import torch.nn.functional as F`

Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`def cal_cross_attn(to_q, to_k, to_v, rand_input):`
			`hidden_dim, embed_dim = to_q.shape`
			`attn_to_q = nn.Linear(hidden_dim, embed_dim, bias=False)`
			`attn_to_k = nn.Linear(hidden_dim, embed_dim, bias=False)`
			`attn_to_v = nn.Linear(hidden_dim, embed_dim, bias=False)`
			`attn_to_q.load_state_dict({"weight": to_q})`
			`attn_to_k.load_state_dict({"weight": to_k})`
			`attn_to_v.load_state_dict({"weight": to_v})`

			`return torch.einsum(`
			`"ik, jk -> ik",`
Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00			`F.softmax(`
			`torch.einsum("ij, kj -> ik", attn_to_q(rand_input), attn_to_k(rand_input)),`
			`dim=-1,`
			`),`
			`attn_to_v(rand_input),`
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`)`

Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`def model_hash(filename):`
			`try:`
			`with open(filename, "rb") as file:`
			`import hashlib`
Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`m = hashlib.sha256()`

			`file.seek(0x100000)`
			`m.update(file.read(0x10000))`
			`return m.hexdigest()[0:8]`
			`except FileNotFoundError:`
Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00			`return "NOFILE"`

Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00
			`def eval(model, n, input):`
			`qk = f"enc_p.encoder.attn_layers.{n}.conv_q.weight"`
			`uk = f"enc_p.encoder.attn_layers.{n}.conv_k.weight"`
			`vk = f"enc_p.encoder.attn_layers.{n}.conv_v.weight"`
Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00			`atoq, atok, atov = model[qk][:, :, 0], model[uk][:, :, 0], model[vk][:, :, 0]`
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00
			`attn = cal_cross_attn(atoq, atok, atov, input)`
			`return attn`

Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00
			`def main(path, root):`
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`torch.manual_seed(114514)`
			`model_a = torch.load(path, map_location="cpu")["weight"]`

fix: 卸载音色省显存顺便将所有print换成了统一的logger 2023-09-01 09:18:08 +02:00			`logger.info("Query:\t\t%s\t%s" % (path, model_hash(path)))`
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00
			`map_attn_a = {}`
			`map_rand_input = {}`
			`for n in range(6):`
Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00			`hidden_dim, embed_dim, _ = model_a[`
			`f"enc_p.encoder.attn_layers.{n}.conv_v.weight"`
			`].shape`
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`rand_input = torch.randn([embed_dim, hidden_dim])`

			`map_attn_a[n] = eval(model_a, n, rand_input)`
			`map_rand_input[n] = rand_input`

			`del model_a`

			`for name in sorted(list(os.listdir(root))):`
Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00			`path = "%s/%s" % (root, name)`
Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00			`model_b = torch.load(path, map_location="cpu")["weight"]`

			`sims = []`
			`for n in range(6):`
			`attn_a = map_attn_a[n]`
			`attn_b = eval(model_b, n, map_rand_input[n])`

			`sim = torch.mean(torch.cosine_similarity(attn_a, attn_b))`
			`sims.append(sim)`

fix: 卸载音色省显存顺便将所有print换成了统一的logger 2023-09-01 09:18:08 +02:00			`logger.info(`
fix: index_root searching close #1147 2023-09-01 08:11:55 +02:00			`"Reference:\t%s\t%s\t%s"`
Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00			`% (path, model_hash(path), f"{torch.mean(torch.stack(sims)) * 1e2:.2f}%")`
			`)`

Create calc_rvc_model_similarity.py 2023-08-02 15:20:46 +02:00
			`if __name__ == "__main__":`
fix: weights folder 2023-08-29 18:27:23 +02:00			`query_path = r"assets\weights\mi v3.pth"`
			`reference_root = r"assets\weights"`
Format code (#932) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-08-03 04:25:05 +02:00			`main(query_path, reference_root)`