Introduction
Quick Start
Speech-to-Text
Text-to-Speech
Released Models
Demos
API Reference
paddleaudio
paddleaudio.backends
paddleaudio.backends.common
paddleaudio.backends.no_backend
paddleaudio.backends.soundfile_backend
paddleaudio.backends.sox_io_backend
paddleaudio.backends.utils
paddleaudio.compliance
paddleaudio.compliance.kaldi
paddleaudio.compliance.librosa
paddleaudio.datasets
paddleaudio.datasets.dataset
paddleaudio.datasets.esc50
paddleaudio.datasets.gtzan
paddleaudio.datasets.hey_snips
paddleaudio.datasets.rirs_noises
paddleaudio.datasets.tess
paddleaudio.datasets.urban_sound
paddleaudio.datasets.voxceleb
paddleaudio.features
paddleaudio.features.layers
paddleaudio.functional
paddleaudio.functional.functional
paddleaudio.functional.window
paddleaudio.metric
paddleaudio.metric.eer
paddleaudio.utils
paddleaudio.utils.download
paddleaudio.utils.env
paddleaudio.utils.error
paddleaudio.utils.log
paddleaudio.utils.numeric
paddleaudio.utils.tensor_utils
paddleaudio.utils.time
paddlespeech
paddlespeech.cli
paddlespeech.cli.base_commands
paddlespeech.cli.download
paddlespeech.cli.entry
paddlespeech.cli.executor
paddlespeech.cli.kws
paddlespeech.cli.kws.infer
paddlespeech.cli.log
paddlespeech.cli.text
paddlespeech.cli.text.infer
paddlespeech.cli.tts
paddlespeech.cli.tts.infer
paddlespeech.cli.utils
paddlespeech.cli.vector
paddlespeech.cli.vector.infer
paddlespeech.cls
paddlespeech.cls.exps
paddlespeech.cls.exps.panns
paddlespeech.cls.exps.panns.deploy
paddlespeech.cls.models
paddlespeech.cls.models.panns
paddlespeech.cls.models.panns.classifier
paddlespeech.cls.models.panns.panns
paddlespeech.kws
paddlespeech.kws.exps
paddlespeech.kws.exps.mdtc
paddlespeech.kws.exps.mdtc.collate
paddlespeech.kws.exps.mdtc.compute_det
paddlespeech.kws.exps.mdtc.score
paddlespeech.kws.exps.mdtc.train
paddlespeech.kws.models
paddlespeech.kws.models.loss
paddlespeech.kws.models.mdtc
paddlespeech.resource
paddlespeech.resource.model_alias
paddlespeech.resource.pretrained_models
paddlespeech.resource.resource
paddlespeech.s2t
paddlespeech.s2t.decoders
paddlespeech.s2t.decoders.beam_search
paddlespeech.s2t.decoders.beam_search.batch_beam_search
paddlespeech.s2t.decoders.beam_search.beam_search
paddlespeech.s2t.decoders.ctcdecoder
paddlespeech.s2t.decoders.ctcdecoder.decoders_deprecated
paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper
paddlespeech.s2t.decoders.scorers
paddlespeech.s2t.decoders.scorers.ctc
paddlespeech.s2t.decoders.scorers.ctc_prefix_score
paddlespeech.s2t.decoders.scorers.length_bonus
paddlespeech.s2t.decoders.scorers.scorer_interface
paddlespeech.s2t.decoders.utils
paddlespeech.s2t.exps
paddlespeech.s2t.exps.deepspeech2
paddlespeech.s2t.exps.deepspeech2.bin
paddlespeech.s2t.exps.deepspeech2.bin.deploy
paddlespeech.s2t.exps.u2
paddlespeech.s2t.exps.u2.bin
paddlespeech.s2t.exps.u2_kaldi
paddlespeech.s2t.exps.u2_kaldi.bin
paddlespeech.s2t.exps.u2_kaldi.bin.test
paddlespeech.s2t.exps.u2_kaldi.bin.train
paddlespeech.s2t.exps.u2_st
paddlespeech.s2t.exps.u2_st.bin
paddlespeech.s2t.frontend
paddlespeech.s2t.frontend.audio
paddlespeech.s2t.frontend.augmentor
paddlespeech.s2t.frontend.augmentor.augmentation
paddlespeech.s2t.frontend.augmentor.base
paddlespeech.s2t.frontend.augmentor.impulse_response
paddlespeech.s2t.frontend.augmentor.noise_perturb
paddlespeech.s2t.frontend.augmentor.online_bayesian_normalization
paddlespeech.s2t.frontend.augmentor.resample
paddlespeech.s2t.frontend.augmentor.shift_perturb
paddlespeech.s2t.frontend.augmentor.spec_augment
paddlespeech.s2t.frontend.augmentor.speed_perturb
paddlespeech.s2t.frontend.augmentor.volume_perturb
paddlespeech.s2t.frontend.normalizer
paddlespeech.s2t.frontend.speech
paddlespeech.s2t.frontend.utility
paddlespeech.s2t.io
paddlespeech.s2t.io.batchfy
paddlespeech.s2t.io.converter
paddlespeech.s2t.io.dataset
paddlespeech.s2t.io.sampler
paddlespeech.s2t.io.utility
paddlespeech.s2t.models
paddlespeech.s2t.models.asr_interface
paddlespeech.s2t.models.ds2
paddlespeech.s2t.models.ds2.conv
paddlespeech.s2t.models.ds2.deepspeech2
paddlespeech.s2t.models.lm
paddlespeech.s2t.models.lm.transformer
paddlespeech.s2t.models.lm_interface
paddlespeech.s2t.models.st_interface
paddlespeech.s2t.models.u2_st
paddlespeech.s2t.models.u2_st.u2_st
paddlespeech.s2t.modules
paddlespeech.s2t.modules.activation
paddlespeech.s2t.modules.align
paddlespeech.s2t.modules.attention
paddlespeech.s2t.modules.cmvn
paddlespeech.s2t.modules.conformer_convolution
paddlespeech.s2t.modules.crf
paddlespeech.s2t.modules.ctc
paddlespeech.s2t.modules.decoder
paddlespeech.s2t.modules.decoder_layer
paddlespeech.s2t.modules.embedding
paddlespeech.s2t.modules.encoder
paddlespeech.s2t.modules.encoder_layer
paddlespeech.s2t.modules.initializer
paddlespeech.s2t.modules.loss
paddlespeech.s2t.modules.mask
paddlespeech.s2t.modules.positionwise_feed_forward
paddlespeech.s2t.modules.subsampling
paddlespeech.s2t.training
paddlespeech.s2t.training.cli
paddlespeech.s2t.training.extensions
paddlespeech.s2t.training.extensions.evaluator
paddlespeech.s2t.training.extensions.extension
paddlespeech.s2t.training.extensions.plot
paddlespeech.s2t.training.gradclip
paddlespeech.s2t.training.optimizer
paddlespeech.s2t.training.reporter
paddlespeech.s2t.training.scheduler
paddlespeech.s2t.training.timer
paddlespeech.s2t.training.trainer
paddlespeech.s2t.training.triggers
paddlespeech.s2t.training.triggers.compare_value_trigger
paddlespeech.s2t.training.triggers.interval_trigger
paddlespeech.s2t.training.triggers.limit_trigger
paddlespeech.s2t.training.triggers.time_trigger
paddlespeech.s2t.training.triggers.utils
paddlespeech.s2t.training.updaters
paddlespeech.s2t.training.updaters.standard_updater
paddlespeech.s2t.training.updaters.updater
paddlespeech.s2t.utils
paddlespeech.s2t.utils.asr_utils
paddlespeech.s2t.utils.bleu_score
paddlespeech.s2t.utils.check_kwargs
paddlespeech.s2t.utils.checkpoint
paddlespeech.s2t.utils.cli_utils
paddlespeech.s2t.utils.ctc_utils
paddlespeech.s2t.utils.dynamic_import
paddlespeech.s2t.utils.dynamic_pip_install
paddlespeech.s2t.utils.error_rate
paddlespeech.s2t.utils.layer_tools
paddlespeech.s2t.utils.log
paddlespeech.s2t.utils.mp_tools
paddlespeech.s2t.utils.profiler
paddlespeech.s2t.utils.socket_server
paddlespeech.s2t.utils.spec_augment
paddlespeech.s2t.utils.tensor_utils
paddlespeech.s2t.utils.text_grid
paddlespeech.s2t.utils.utility
paddlespeech.server.entry
paddlespeech.t2s
paddlespeech.t2s.audio
paddlespeech.t2s.audio.audio
paddlespeech.t2s.audio.codec
paddlespeech.t2s.audio.spec_normalizer
paddlespeech.t2s.datasets
paddlespeech.t2s.datasets.am_batch_fn
paddlespeech.t2s.datasets.batch
paddlespeech.t2s.datasets.data_table
paddlespeech.t2s.datasets.dataset
paddlespeech.t2s.datasets.get_feats
paddlespeech.t2s.datasets.ljspeech
paddlespeech.t2s.datasets.preprocess_utils
paddlespeech.t2s.datasets.sampler
paddlespeech.t2s.datasets.vocoder_batch_fn
paddlespeech.t2s.exps
paddlespeech.t2s.exps.ernie_sat
paddlespeech.t2s.exps.ernie_sat.align
paddlespeech.t2s.exps.ernie_sat.normalize
paddlespeech.t2s.exps.ernie_sat.preprocess
paddlespeech.t2s.exps.ernie_sat.synthesize
paddlespeech.t2s.exps.ernie_sat.synthesize_e2e
paddlespeech.t2s.exps.ernie_sat.train
paddlespeech.t2s.exps.ernie_sat.utils
paddlespeech.t2s.exps.fastspeech2
paddlespeech.t2s.exps.fastspeech2.gen_gta_mel
paddlespeech.t2s.exps.fastspeech2.normalize
paddlespeech.t2s.exps.fastspeech2.preprocess
paddlespeech.t2s.exps.fastspeech2.train
paddlespeech.t2s.exps.fastspeech2.vc2_infer
paddlespeech.t2s.exps.gan_vocoder
paddlespeech.t2s.exps.gan_vocoder.hifigan
paddlespeech.t2s.exps.gan_vocoder.hifigan.train
paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan
paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.train
paddlespeech.t2s.exps.gan_vocoder.normalize
paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan
paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.synthesize_from_wav
paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.train
paddlespeech.t2s.exps.gan_vocoder.preprocess
paddlespeech.t2s.exps.gan_vocoder.style_melgan
paddlespeech.t2s.exps.gan_vocoder.style_melgan.train
paddlespeech.t2s.exps.gan_vocoder.synthesize
paddlespeech.t2s.exps.inference
paddlespeech.t2s.exps.inference_streaming
paddlespeech.t2s.exps.ort_predict
paddlespeech.t2s.exps.ort_predict_e2e
paddlespeech.t2s.exps.ort_predict_streaming
paddlespeech.t2s.exps.speedyspeech
paddlespeech.t2s.exps.speedyspeech.gen_gta_mel
paddlespeech.t2s.exps.speedyspeech.inference
paddlespeech.t2s.exps.speedyspeech.normalize
paddlespeech.t2s.exps.speedyspeech.preprocess
paddlespeech.t2s.exps.speedyspeech.synthesize_e2e
paddlespeech.t2s.exps.speedyspeech.train
paddlespeech.t2s.exps.syn_utils
paddlespeech.t2s.exps.synthesize
paddlespeech.t2s.exps.synthesize_e2e
paddlespeech.t2s.exps.synthesize_streaming
paddlespeech.t2s.exps.tacotron2
paddlespeech.t2s.exps.tacotron2.normalize
paddlespeech.t2s.exps.tacotron2.preprocess
paddlespeech.t2s.exps.tacotron2.train
paddlespeech.t2s.exps.transformer_tts
paddlespeech.t2s.exps.transformer_tts.normalize
paddlespeech.t2s.exps.transformer_tts.preprocess
paddlespeech.t2s.exps.transformer_tts.synthesize
paddlespeech.t2s.exps.transformer_tts.synthesize_e2e
paddlespeech.t2s.exps.transformer_tts.train
paddlespeech.t2s.exps.vits
paddlespeech.t2s.exps.vits.normalize
paddlespeech.t2s.exps.vits.preprocess
paddlespeech.t2s.exps.vits.synthesize
paddlespeech.t2s.exps.vits.synthesize_e2e
paddlespeech.t2s.exps.vits.train
paddlespeech.t2s.exps.vits.voice_cloning
paddlespeech.t2s.exps.voice_cloning
paddlespeech.t2s.exps.waveflow
paddlespeech.t2s.exps.waveflow.config
paddlespeech.t2s.exps.waveflow.ljspeech
paddlespeech.t2s.exps.waveflow.preprocess
paddlespeech.t2s.exps.waveflow.synthesize
paddlespeech.t2s.exps.waveflow.train
paddlespeech.t2s.exps.wavernn
paddlespeech.t2s.exps.wavernn.synthesize
paddlespeech.t2s.exps.wavernn.train
paddlespeech.t2s.frontend
paddlespeech.t2s.frontend.arpabet
paddlespeech.t2s.frontend.g2pw
paddlespeech.t2s.frontend.g2pw.dataset
paddlespeech.t2s.frontend.g2pw.onnx_api
paddlespeech.t2s.frontend.g2pw.utils
paddlespeech.t2s.frontend.generate_lexicon
paddlespeech.t2s.frontend.mix_frontend
paddlespeech.t2s.frontend.normalizer
paddlespeech.t2s.frontend.normalizer.abbrrviation
paddlespeech.t2s.frontend.normalizer.acronyms
paddlespeech.t2s.frontend.normalizer.normalizer
paddlespeech.t2s.frontend.normalizer.numbers
paddlespeech.t2s.frontend.normalizer.width
paddlespeech.t2s.frontend.phonectic
paddlespeech.t2s.frontend.punctuation
paddlespeech.t2s.frontend.tone_sandhi
paddlespeech.t2s.frontend.vocab
paddlespeech.t2s.frontend.zh_frontend
paddlespeech.t2s.frontend.zh_normalization
paddlespeech.t2s.frontend.zh_normalization.char_convert
paddlespeech.t2s.frontend.zh_normalization.chronology
paddlespeech.t2s.frontend.zh_normalization.constants
paddlespeech.t2s.frontend.zh_normalization.num
paddlespeech.t2s.frontend.zh_normalization.phonecode
paddlespeech.t2s.frontend.zh_normalization.quantifier
paddlespeech.t2s.frontend.zh_normalization.text_normlization
paddlespeech.t2s.models
paddlespeech.t2s.models.ernie_sat
paddlespeech.t2s.models.ernie_sat.ernie_sat
paddlespeech.t2s.models.ernie_sat.ernie_sat_updater
paddlespeech.t2s.models.fastspeech2
paddlespeech.t2s.models.fastspeech2.fastspeech2
paddlespeech.t2s.models.fastspeech2.fastspeech2_updater
paddlespeech.t2s.models.hifigan
paddlespeech.t2s.models.hifigan.hifigan
paddlespeech.t2s.models.hifigan.hifigan_updater
paddlespeech.t2s.models.melgan
paddlespeech.t2s.models.melgan.melgan
paddlespeech.t2s.models.melgan.multi_band_melgan_updater
paddlespeech.t2s.models.melgan.style_melgan
paddlespeech.t2s.models.melgan.style_melgan_updater
paddlespeech.t2s.models.parallel_wavegan
paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan
paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan_updater
paddlespeech.t2s.models.speedyspeech
paddlespeech.t2s.models.speedyspeech.speedyspeech
paddlespeech.t2s.models.speedyspeech.speedyspeech_updater
paddlespeech.t2s.models.tacotron2
paddlespeech.t2s.models.tacotron2.tacotron2
paddlespeech.t2s.models.tacotron2.tacotron2_updater
paddlespeech.t2s.models.transformer_tts
paddlespeech.t2s.models.transformer_tts.transformer_tts
paddlespeech.t2s.models.transformer_tts.transformer_tts_updater
paddlespeech.t2s.models.vits
paddlespeech.t2s.models.vits.duration_predictor
paddlespeech.t2s.models.vits.flow
paddlespeech.t2s.models.vits.generator
paddlespeech.t2s.models.vits.monotonic_align
paddlespeech.t2s.models.vits.posterior_encoder
paddlespeech.t2s.models.vits.residual_coupling
paddlespeech.t2s.models.vits.text_encoder
paddlespeech.t2s.models.vits.transform
paddlespeech.t2s.models.vits.vits
paddlespeech.t2s.models.vits.vits_updater
paddlespeech.t2s.models.vits.wavenet
paddlespeech.t2s.models.vits.wavenet.residual_block
paddlespeech.t2s.models.vits.wavenet.wavenet
paddlespeech.t2s.models.waveflow
paddlespeech.t2s.models.wavernn
paddlespeech.t2s.models.wavernn.wavernn
paddlespeech.t2s.models.wavernn.wavernn_updater
paddlespeech.t2s.modules
paddlespeech.t2s.modules.activation
paddlespeech.t2s.modules.causal_conv
paddlespeech.t2s.modules.conformer
paddlespeech.t2s.modules.conformer.convolution
paddlespeech.t2s.modules.conformer.encoder_layer
paddlespeech.t2s.modules.conv
paddlespeech.t2s.modules.geometry
paddlespeech.t2s.modules.layer_norm
paddlespeech.t2s.modules.losses
paddlespeech.t2s.modules.masked_fill
paddlespeech.t2s.modules.nets_utils
paddlespeech.t2s.modules.normalizer
paddlespeech.t2s.modules.positional_encoding
paddlespeech.t2s.modules.pqmf
paddlespeech.t2s.modules.predictor
paddlespeech.t2s.modules.predictor.duration_predictor
paddlespeech.t2s.modules.predictor.length_regulator
paddlespeech.t2s.modules.predictor.variance_predictor
paddlespeech.t2s.modules.residual_block
paddlespeech.t2s.modules.residual_stack
paddlespeech.t2s.modules.style_encoder
paddlespeech.t2s.modules.tacotron2
paddlespeech.t2s.modules.tacotron2.attentions
paddlespeech.t2s.modules.tacotron2.decoder
paddlespeech.t2s.modules.tacotron2.encoder
paddlespeech.t2s.modules.tade_res_block
paddlespeech.t2s.modules.transformer
paddlespeech.t2s.modules.transformer.attention
paddlespeech.t2s.modules.transformer.decoder
paddlespeech.t2s.modules.transformer.decoder_layer
paddlespeech.t2s.modules.transformer.embedding
paddlespeech.t2s.modules.transformer.encoder
paddlespeech.t2s.modules.transformer.encoder_layer
paddlespeech.t2s.modules.transformer.lightconv
paddlespeech.t2s.modules.transformer.mask
paddlespeech.t2s.modules.transformer.multi_layer_conv
paddlespeech.t2s.modules.transformer.positionwise_feed_forward
paddlespeech.t2s.modules.transformer.repeat
paddlespeech.t2s.modules.transformer.subsampling
paddlespeech.t2s.modules.upsample
paddlespeech.t2s.training
paddlespeech.t2s.training.cli
paddlespeech.t2s.training.default_config
paddlespeech.t2s.training.experiment
paddlespeech.t2s.training.extension
paddlespeech.t2s.training.extensions
paddlespeech.t2s.training.extensions.evaluator
paddlespeech.t2s.training.extensions.snapshot
paddlespeech.t2s.training.extensions.visualizer
paddlespeech.t2s.training.optimizer
paddlespeech.t2s.training.reporter
paddlespeech.t2s.training.seeding
paddlespeech.t2s.training.trainer
paddlespeech.t2s.training.trigger
paddlespeech.t2s.training.triggers
paddlespeech.t2s.training.triggers.interval_trigger
paddlespeech.t2s.training.triggers.limit_trigger
paddlespeech.t2s.training.triggers.time_trigger
paddlespeech.t2s.training.updater
paddlespeech.t2s.training.updaters
paddlespeech.t2s.training.updaters.standard_updater
paddlespeech.t2s.utils
paddlespeech.t2s.utils.checkpoint
paddlespeech.t2s.utils.display
paddlespeech.t2s.utils.error_rate
paddlespeech.t2s.utils.h5_utils
paddlespeech.t2s.utils.internals
paddlespeech.t2s.utils.layer_tools
paddlespeech.t2s.utils.mp_tools
paddlespeech.t2s.utils.profiler
paddlespeech.t2s.utils.scheduler
paddlespeech.text
paddlespeech.text.exps
paddlespeech.text.exps.ernie_linear
paddlespeech.text.exps.ernie_linear.avg_model
paddlespeech.text.exps.ernie_linear.punc_restore
paddlespeech.text.exps.ernie_linear.test
paddlespeech.text.exps.ernie_linear.train
paddlespeech.text.models
paddlespeech.text.models.ernie_crf
paddlespeech.text.models.ernie_crf.model
paddlespeech.text.models.ernie_linear
paddlespeech.text.models.ernie_linear.dataset
paddlespeech.text.models.ernie_linear.ernie_linear
paddlespeech.text.models.ernie_linear.ernie_linear_updater
paddlespeech.utils
paddlespeech.utils.dynamic_import
paddlespeech.utils.env
paddlespeech.vector
paddlespeech.vector.cluster
paddlespeech.vector.cluster.diarization
paddlespeech.vector.cluster.plda
paddlespeech.vector.exps
paddlespeech.vector.exps.ge2e
paddlespeech.vector.exps.ge2e.audio_processor
paddlespeech.vector.exps.ge2e.config
paddlespeech.vector.exps.ge2e.dataset_processors
paddlespeech.vector.exps.ge2e.inference
paddlespeech.vector.exps.ge2e.preprocess
paddlespeech.vector.exps.ge2e.random_cycle
paddlespeech.vector.exps.ge2e.speaker_verification_dataset
paddlespeech.vector.exps.ge2e.train
paddlespeech.vector.io
paddlespeech.vector.io.augment
paddlespeech.vector.io.batch
paddlespeech.vector.io.dataset
paddlespeech.vector.io.dataset_from_json
paddlespeech.vector.io.embedding_norm
paddlespeech.vector.io.signal_processing
paddlespeech.vector.models
paddlespeech.vector.models.ecapa_tdnn
paddlespeech.vector.models.lstm_speaker_encoder
paddlespeech.vector.modules
paddlespeech.vector.modules.layer
paddlespeech.vector.modules.loss
paddlespeech.vector.modules.sid_model
paddlespeech.vector.training
paddlespeech.vector.training.scheduler
paddlespeech.vector.training.seeding
paddlespeech.vector.utils
paddlespeech.vector.utils.time
paddlespeech.vector.utils.vector_utils