[#348] Added openai.transcribe action.

This API is the foundation for the `assistant.openai` plugin.
2024-06-02 01:00:07 +02:00 · 2024-06-02 01:00:07 +02:00 · 9cca928d4b
commit 9cca928d4b
parent f356fcd844
1 changed files with 79 additions and 1 deletions
--- a/platypush/plugins/openai/init.py
+++ b/platypush/plugins/openai/init.py
@ -3,7 +3,7 @@ from dataclasses import dataclass
 from datetime import datetime as dt
 from enum import Enum
 from threading import RLock
-from typing import Iterable, List, Optional
+from typing import IO, Iterable, List, Optional
 import requests
@ -265,6 +265,84 @@ class OpenaiPlugin(Plugin):
        self._update_context(msg)
        return msg["content"]
    def _process_transcribe_response(self, resp: requests.Response) -> str:
        rs_json = None
        try:
            rs_json = resp.json()
        except Exception:
            pass
        self.logger.debug("OpenAI response: %s", rs_json)
        resp.raise_for_status()
        return (rs_json or {}).get("text", "")
    def transcribe_file(
        self,
        f: IO,
        model: Optional[str] = 'whisper-1',
        timeout: Optional[float] = None,
    ) -> str:
        resp = requests.post(
            "https://api.openai.com/v1/audio/transcriptions",
            timeout=timeout or self.timeout,
            headers={
                "Authorization": f"Bearer {self._api_key}",
            },
            files={
                "file": f,
            },
            data={
                "model": model or self.model,
            },
        )
        return self._process_transcribe_response(resp)
    def transcribe_raw(
        self,
        audio: bytes,
        extension: str,
        model: Optional[str] = 'whisper-1',
        timeout: Optional[float] = None,
    ) -> str:
        resp = requests.post(
            "https://api.openai.com/v1/audio/transcriptions",
            timeout=timeout or self.timeout,
            headers={
                "Authorization": f"Bearer {self._api_key}",
            },
            files={
                "file": (f"audio.{extension}", audio),
            },
            data={
                "model": model or self.model,
            },
        )
        return self._process_transcribe_response(resp)
    @action
    def transcribe(
        self,
        audio: str,
        model: Optional[str] = 'whisper-1',
        timeout: Optional[float] = None,
    ) -> str:
        """
        Perform speech-to-text on an audio file.
        :param audio: The audio file to transcribe.
        :param model: The model to use for speech-to-text. Default:
            ``whisper-1``. If not set, the configured default model will be
            used.
        :param timeout: Timeout for the API request. If not set, the default
            timeout will be used.
        :return: The transcribed text.
        """
        with open(os.path.expanduser(audio), "rb") as f:
            return self.transcribe_file(f, model=model, timeout=timeout)
    def _update_context(self, *entries: dict):
        """
        Update the context with a new entry.