From 9cca928d4be69fe0d3d525ae1925147a80e495d1 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <fabio@manganiello.tech>
Date: Sun, 2 Jun 2024 01:00:07 +0200
Subject: [PATCH] [#348] Added `openai.transcribe` action.

This API is the foundation for the `assistant.openai` plugin.
---
 platypush/plugins/openai/__init__.py | 80 +++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/platypush/plugins/openai/__init__.py b/platypush/plugins/openai/__init__.py
index 70b5538b0..0002796fa 100644
--- a/platypush/plugins/openai/__init__.py
+++ b/platypush/plugins/openai/__init__.py
@@ -3,7 +3,7 @@ from dataclasses import dataclass
 from datetime import datetime as dt
 from enum import Enum
 from threading import RLock
-from typing import Iterable, List, Optional
+from typing import IO, Iterable, List, Optional
 
 import requests
 
@@ -265,6 +265,84 @@ class OpenaiPlugin(Plugin):
         self._update_context(msg)
         return msg["content"]
 
+    def _process_transcribe_response(self, resp: requests.Response) -> str:
+        rs_json = None
+
+        try:
+            rs_json = resp.json()
+        except Exception:
+            pass
+
+        self.logger.debug("OpenAI response: %s", rs_json)
+        resp.raise_for_status()
+        return (rs_json or {}).get("text", "")
+
+    def transcribe_file(
+        self,
+        f: IO,
+        model: Optional[str] = 'whisper-1',
+        timeout: Optional[float] = None,
+    ) -> str:
+        resp = requests.post(
+            "https://api.openai.com/v1/audio/transcriptions",
+            timeout=timeout or self.timeout,
+            headers={
+                "Authorization": f"Bearer {self._api_key}",
+            },
+            files={
+                "file": f,
+            },
+            data={
+                "model": model or self.model,
+            },
+        )
+
+        return self._process_transcribe_response(resp)
+
+    def transcribe_raw(
+        self,
+        audio: bytes,
+        extension: str,
+        model: Optional[str] = 'whisper-1',
+        timeout: Optional[float] = None,
+    ) -> str:
+        resp = requests.post(
+            "https://api.openai.com/v1/audio/transcriptions",
+            timeout=timeout or self.timeout,
+            headers={
+                "Authorization": f"Bearer {self._api_key}",
+            },
+            files={
+                "file": (f"audio.{extension}", audio),
+            },
+            data={
+                "model": model or self.model,
+            },
+        )
+
+        return self._process_transcribe_response(resp)
+
+    @action
+    def transcribe(
+        self,
+        audio: str,
+        model: Optional[str] = 'whisper-1',
+        timeout: Optional[float] = None,
+    ) -> str:
+        """
+        Perform speech-to-text on an audio file.
+
+        :param audio: The audio file to transcribe.
+        :param model: The model to use for speech-to-text. Default:
+            ``whisper-1``. If not set, the configured default model will be
+            used.
+        :param timeout: Timeout for the API request. If not set, the default
+            timeout will be used.
+        :return: The transcribed text.
+        """
+        with open(os.path.expanduser(audio), "rb") as f:
+            return self.transcribe_file(f, model=model, timeout=timeout)
+
     def _update_context(self, *entries: dict):
         """
         Update the context with a new entry.