platypush/platypush/plugins/tts/google/__init__.py

import os
import tempfile
from typing import Optional

from platypush.plugins import action
from platypush.plugins.tts import TtsPlugin


class TtsGooglePlugin(TtsPlugin):
    """
    Advanced text-to-speech engine that leverages the Google Cloud TTS API.
    See https://cloud.google.com/text-to-speech/docs/quickstart-client-libraries#client-libraries-install-python
    for how to enable the API on your account and get your credentials.
    """

    def __init__(
        self,
        language: str = 'en-US',
        voice: Optional[str] = None,
        gender: str = 'FEMALE',
        credentials_file: str = '~/.credentials/platypush/google/platypush-tts.json',
        **kwargs,
    ):
        """
        :param language: Language code, see
            https://cloud.google.com/text-to-speech/docs/basics for supported
            languages.
        :param voice: Voice type, see
            https://cloud.google.com/text-to-speech/docs/basics for supported
            voices.
        :param gender: Voice gender (MALE, FEMALE or NEUTRAL).
        :param credentials_file: Where your GCloud credentials for TTS are
            stored, see https://cloud.google.com/text-to-speech/docs/basics.
        :param kwargs: Extra arguments to be passed to the
            :class:`platypush.plugins.tts.TtsPlugin` constructor.
        """
        super().__init__(**kwargs)

        self.language = language
        self.voice = voice
        self.language = self._parse_language(language)
        self.voice = self._parse_voice(self.language, voice)
        self.gender = getattr(self._gender, gender.upper())
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.expanduser(
            credentials_file
        )

    def _parse_language(self, language):
        if language is None:
            language = self.language or 'en-US'

        if len(language) == 2:
            language = language.lower()
            if language == 'en':
                language = 'en-US'
            else:
                language += '-' + language.upper()

        return language

    @staticmethod
    def _parse_voice(language, voice):
        if voice is not None:
            return voice

        if language == 'en-US':
            return language + '-Wavenet-C'
        return language + '-Wavenet-A'

    @property
    def _gender(self):
        from google.cloud import texttospeech

        return (
            texttospeech.enums.SsmlVoiceGender
            if hasattr(texttospeech, 'enums')
            else texttospeech.SsmlVoiceGender
        )

    @property
    def _voice_selection_params(self):
        from google.cloud import texttospeech

        return (
            texttospeech.types.VoiceSelectionParams
            if hasattr(texttospeech, 'types')
            else texttospeech.VoiceSelectionParams
        )

    @property
    def _synthesis_input(self):
        from google.cloud import texttospeech

        return (
            texttospeech.types.SynthesisInput
            if hasattr(texttospeech, 'types')
            else texttospeech.SynthesisInput
        )

    @property
    def _audio_config(self):
        from google.cloud import texttospeech

        return (
            texttospeech.types.AudioConfig
            if hasattr(texttospeech, 'types')
            else texttospeech.AudioConfig
        )

    @property
    def _audio_encoding(self):
        from google.cloud import texttospeech

        return (
            texttospeech.enums.AudioEncoding
            if hasattr(texttospeech, 'enums')
            else texttospeech.AudioEncoding
        )

    @action
    def say(
        self,
        text: str,
        language: Optional[str] = None,
        voice: Optional[str] = None,
        gender: Optional[str] = None,
        **player_args,
    ):
        """
        Say a phrase.

        :param text: Text to say.
        :param language: Language code override.
        :param voice: Voice type override.
        :param gender: Gender override.
        :param player_args: Extends the additional arguments to be passed to
            :meth:`platypush.plugins.sound.SoundPlugin.play` (like volume,
            duration, channels etc.).
        """

        from google.cloud import texttospeech

        client = texttospeech.TextToSpeechClient()
        synthesis_input = self._synthesis_input(text=text)

        language = self._parse_language(language)
        voice = self._parse_voice(language, voice)

        if gender is None:
            gender = self.gender
        else:
            gender = getattr(self._gender, gender.upper())

        voice = self._voice_selection_params(
            language_code=language, ssml_gender=gender, name=voice
        )

        audio_config = self._audio_config(audio_encoding=self._audio_encoding.MP3)
        response = client.synthesize_speech(
            input=synthesis_input, voice=voice, audio_config=audio_config
        )

        with tempfile.NamedTemporaryFile() as f:
            f.write(response.audio_content)
            self._playback(f.name, **player_args)


# vim:sw=4:ts=4:et: