Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 164 additions & 23 deletions everyvoice/.schema/everyvoice-text-to-wav-0.5.json
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,61 @@
"title": "Punctuation",
"type": "object"
},
"StyleTTS2ASRConfig": {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"description": "Source for the ASR text-aligner checkpoint.",
"properties": {
"repo_id": {
"default": "everyvoice/styletts2-asr-aligner",
"description": "HuggingFace repo ID for the ASR text-aligner.",
"title": "Repo Id",
"type": "string"
},
"checkpoint_filename": {
"default": "epoch_00080.pth",
"description": "Filename of the model checkpoint within the HuggingFace repo.",
"title": "Checkpoint Filename",
"type": "string"
},
"config_filename": {
"default": "config.yml",
"description": "Filename of the model config within the HuggingFace repo.",
"title": "Config Filename",
"type": "string"
},
"local_checkpoint": {
"anyOf": [
{
"format": "path",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Local path to the checkpoint file. If set, overrides repo_id/checkpoint_filename. Warning: this path is expanded (via os.path.expandvars and os.path.expanduser) when the config is loaded, and the resolved absolute path is what gets embedded in saved checkpoints. This may make your checkpoints less portable to other machines. Environment variables (e.g. $HOME or ${MY_MODELS}) and '~' are supported.",
"title": "Local Checkpoint"
},
"local_config": {
"anyOf": [
{
"format": "path",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Local path to the config file. If set, overrides repo_id/config_filename. Warning: this path is expanded (via os.path.expandvars and os.path.expanduser) when the config is loaded, and the resolved absolute path is what gets embedded in saved checkpoints. This may make your checkpoints less portable to other machines. Environment variables (e.g. $HOME or ${MY_MODELS}) and '~' are supported.",
"title": "Local Config"
}
},
"title": "StyleTTS2ASRConfig",
"type": "object"
},
"StyleTTS2DecoderConfig": {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
Expand Down Expand Up @@ -653,6 +708,41 @@
"title": "StyleTTS2DiffusionTransformerConfig",
"type": "object"
},
"StyleTTS2JDCConfig": {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"description": "Source for the JDC F0 extractor checkpoint.",
"properties": {
"repo_id": {
"default": "everyvoice/styletts2-jdc-f0",
"description": "HuggingFace repo ID for the JDC F0 extractor.",
"title": "Repo Id",
"type": "string"
},
"filename": {
"default": "bst.t7",
"description": "Filename of the checkpoint within the HuggingFace repo.",
"title": "Filename",
"type": "string"
},
"local_path": {
"anyOf": [
{
"format": "path",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Local path to the checkpoint. If set, overrides repo_id/filename. Warning: this path is expanded (via os.path.expandvars and os.path.expanduser) when the config is loaded, and the resolved absolute path is what gets embedded in saved checkpoints. This may make your checkpoints less portable to other machines. Environment variables (e.g. $HOME or ${MY_MODELS}) and '~' are supported.",
"title": "Local Path"
}
},
"title": "StyleTTS2JDCConfig",
"type": "object"
},
"StyleTTS2LossConfig": {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
Expand Down Expand Up @@ -845,38 +935,89 @@
"title": "StyleTTS2OptimizerConfig",
"type": "object"
},
"StyleTTS2PretrainedConfig": {
"StyleTTS2PLBERTConfig": {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"description": "Paths to the frozen pretrained models bundled with StyleTTS2.",
"description": "Source for the PLBERT text encoder checkpoint.",
"properties": {
"f0_path": {
"default": "styletts2/pretrained/jdc/bst.t7",
"description": "Path to the JDC F0 extractor checkpoint.",
"format": "path",
"title": "F0 Path",
"repo_id": {
"default": "papercup-ai/multilingual-pl-bert",
"description": "HuggingFace repo ID for the PLBERT text encoder.",
"title": "Repo Id",
"type": "string"
},
"asr_config": {
"default": "styletts2/pretrained/asr/config.yml",
"description": "Path to the ASR model config.",
"format": "path",
"title": "Asr Config",
"checkpoint_filename": {
"default": "step_1100000.t7",
"description": "Filename of the checkpoint within the HuggingFace repo.",
"title": "Checkpoint Filename",
"type": "string"
},
"asr_path": {
"default": "styletts2/pretrained/asr/epoch_00080.pth",
"description": "Path to the ASR model checkpoint.",
"format": "path",
"title": "Asr Path",
"config_filename": {
"default": "config.yml",
"description": "Filename of the model config within the HuggingFace repo.",
"title": "Config Filename",
"type": "string"
},
"plbert_dir": {
"default": "styletts2/pretrained/plbert",
"description": "Directory containing the PLBERT checkpoint and config.",
"format": "path",
"title": "Plbert Dir",
"type": "string"
"local_checkpoint": {
"anyOf": [
{
"format": "path",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Local path to the checkpoint file. If set, overrides repo_id/checkpoint_filename. Warning: this path is expanded (via os.path.expandvars and os.path.expanduser) when the config is loaded, and the resolved absolute path is what gets embedded in saved checkpoints. This may make your checkpoints less portable to other machines. Environment variables (e.g. $HOME or ${MY_MODELS}) and '~' are supported.",
"title": "Local Checkpoint"
},
"local_config": {
"anyOf": [
{
"format": "path",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Local path to the config file. If set, overrides repo_id/config_filename. Warning: this path is expanded (via os.path.expandvars and os.path.expanduser) when the config is loaded, and the resolved absolute path is what gets embedded in saved checkpoints. This may make your checkpoints less portable to other machines. Environment variables (e.g. $HOME or ${MY_MODELS}) and '~' are supported.",
"title": "Local Config"
}
},
"title": "StyleTTS2PLBERTConfig",
"type": "object"
},
"StyleTTS2PretrainedConfig": {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"description": "Sources for the frozen pretrained models used by StyleTTS2.",
"properties": {
"f0": {
"allOf": [
{
"$ref": "#/$defs/StyleTTS2JDCConfig"
}
],
"description": "JDC F0 extractor source (HuggingFace repo or local path)."
},
"asr": {
"allOf": [
{
"$ref": "#/$defs/StyleTTS2ASRConfig"
}
],
"description": "ASR text-aligner source (HuggingFace repo or local paths)."
},
"plbert": {
"allOf": [
{
"$ref": "#/$defs/StyleTTS2PLBERTConfig"
}
],
"description": "PLBERT text encoder source (HuggingFace repo or local paths)."
},
"pretrained_symbols": {
"description": "Ordered symbol list that matches the pretrained text-encoder embedding table. The index of each symbol here is its embedding-table row. Override only if you are using a custom pretrained text encoder.",
Expand Down
13 changes: 13 additions & 0 deletions everyvoice/base_cli/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,17 @@ def summarize_hfgl_generator_model(model_path: Path, checkpoint: dict) -> None:
print(summary(vocoder_model, None, verbose=0))


def summarize_styletts2_model(model_path: Path, checkpoint: dict) -> None:
from torchinfo import summary

from everyvoice.model.e2e.StyleTTS2_lightning.styletts2.lightning import (
StyleTTS2Module,
)

model = StyleTTS2Module.load_from_checkpoint(model_path)
print(summary(model, None, verbose=0))


def summarize_unknown_model(model_path: Path, checkpoint: dict) -> None:
from tabulate import tabulate

Expand Down Expand Up @@ -194,6 +205,7 @@ def inspect(

if show_architecture:
checkpoint = load_checkpoint(model_path, minimal=False)

if "model_info" in checkpoint:
print(
"Inspecting checkpoint according to its model info:",
Expand All @@ -203,6 +215,7 @@ def inspect(
"FastSpeech2": summarize_fs2_model,
"HiFiGAN": summarize_hfgl_model,
"HiFiGANGenerator": summarize_hfgl_generator_model,
"StyleTTS2Module": summarize_styletts2_model,
}
summarizer = model_summarizers.get(checkpoint["model_info"]["name"], None)
if summarizer:
Expand Down
41 changes: 41 additions & 0 deletions everyvoice/base_cli/prediction_writing_callback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Generic base for synthesis-output-writing Lightning callbacks.

Shared by the FS2 and StyleTTS2 prediction-writing callback hierarchies.
Subclasses override ``on_predict_batch_end`` with format-specific logic.
"""

from __future__ import annotations

from pathlib import Path

from pytorch_lightning.callbacks import Callback


class BasePredictionWritingCallback(Callback):
"""Handles output-directory creation and output-path construction.

Concrete subclasses must implement ``on_predict_batch_end``.
"""

def __init__(
self,
save_dir: Path,
file_extension: str,
global_step: int,
include_global_step_in_filename: bool = False,
) -> None:
super().__init__()
self.file_extension = file_extension
self.global_step = f"ckpt={global_step}"
self.save_dir = save_dir
self.sep = "--"
self.include_global_step_in_filename = include_global_step_in_filename
self.save_dir.mkdir(parents=True, exist_ok=True)

def get_filename(self, basename: str, speaker: str, language: str) -> str:
name_parts = [basename, speaker, language, self.file_extension]
if self.include_global_step_in_filename:
name_parts.insert(-1, self.global_step)
path = self.save_dir / self.sep.join(name_parts)
path.parent.mkdir(parents=True, exist_ok=True)
return str(path)
Loading
Loading