[#348] Added openai.transcribe
action.
This API is the foundation for the `assistant.openai` plugin.
This commit is contained in:
parent
f356fcd844
commit
9cca928d4b
1 changed files with 79 additions and 1 deletions
|
@ -3,7 +3,7 @@ from dataclasses import dataclass
|
|||
from datetime import datetime as dt
|
||||
from enum import Enum
|
||||
from threading import RLock
|
||||
from typing import Iterable, List, Optional
|
||||
from typing import IO, Iterable, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
@ -265,6 +265,84 @@ class OpenaiPlugin(Plugin):
|
|||
self._update_context(msg)
|
||||
return msg["content"]
|
||||
|
||||
def _process_transcribe_response(self, resp: requests.Response) -> str:
|
||||
rs_json = None
|
||||
|
||||
try:
|
||||
rs_json = resp.json()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self.logger.debug("OpenAI response: %s", rs_json)
|
||||
resp.raise_for_status()
|
||||
return (rs_json or {}).get("text", "")
|
||||
|
||||
def transcribe_file(
|
||||
self,
|
||||
f: IO,
|
||||
model: Optional[str] = 'whisper-1',
|
||||
timeout: Optional[float] = None,
|
||||
) -> str:
|
||||
resp = requests.post(
|
||||
"https://api.openai.com/v1/audio/transcriptions",
|
||||
timeout=timeout or self.timeout,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
},
|
||||
files={
|
||||
"file": f,
|
||||
},
|
||||
data={
|
||||
"model": model or self.model,
|
||||
},
|
||||
)
|
||||
|
||||
return self._process_transcribe_response(resp)
|
||||
|
||||
def transcribe_raw(
|
||||
self,
|
||||
audio: bytes,
|
||||
extension: str,
|
||||
model: Optional[str] = 'whisper-1',
|
||||
timeout: Optional[float] = None,
|
||||
) -> str:
|
||||
resp = requests.post(
|
||||
"https://api.openai.com/v1/audio/transcriptions",
|
||||
timeout=timeout or self.timeout,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
},
|
||||
files={
|
||||
"file": (f"audio.{extension}", audio),
|
||||
},
|
||||
data={
|
||||
"model": model or self.model,
|
||||
},
|
||||
)
|
||||
|
||||
return self._process_transcribe_response(resp)
|
||||
|
||||
@action
|
||||
def transcribe(
|
||||
self,
|
||||
audio: str,
|
||||
model: Optional[str] = 'whisper-1',
|
||||
timeout: Optional[float] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Perform speech-to-text on an audio file.
|
||||
|
||||
:param audio: The audio file to transcribe.
|
||||
:param model: The model to use for speech-to-text. Default:
|
||||
``whisper-1``. If not set, the configured default model will be
|
||||
used.
|
||||
:param timeout: Timeout for the API request. If not set, the default
|
||||
timeout will be used.
|
||||
:return: The transcribed text.
|
||||
"""
|
||||
with open(os.path.expanduser(audio), "rb") as f:
|
||||
return self.transcribe_file(f, model=model, timeout=timeout)
|
||||
|
||||
def _update_context(self, *entries: dict):
|
||||
"""
|
||||
Update the context with a new entry.
|
||||
|
|
Loading…
Reference in a new issue