OpenAI: Audio, Translate into English

OpenAI: Audio, Translate into English

OpenAI: Audio, 英文へ翻訳

Translates audio into English. English text data is generated from sound or video file containing Japanese or other, using the OpenAI API “whisper-1” model. Abbreviations and technical terms can be set as a PROMPT to achieve more accurate translation.

Auto Step icon
Configs for this Auto Step
AuthzConfU
U: Select HTTP_Authz Setting (Secret API Key as “Fixed Value”) *
SelectConfA1
A1: Select FILE for Audio *
StrConfA2
A2: Set Request Summary PROMPT#{EL}
StrConfB1
B1: Set Sampling Temperature (default “0”)#{EL}
SelectConfC1
C1: Select STRING that stores Translated English Text (update)
SelectConfC2
C2: Select STRING that stores Translated Eng with LF (update)
SelectConfD1
D1: If to store Response JSON as a whole, Select STRING (update)
SelectConfD2
D2: If to store Translated Language, Select STRING (update)
SelectConfD3
D3: If to store Audio Duration, Select DECIMAL|STRING (update)
SelectConfD4
D4: If to store Audio Segments, Select DECIMAL|STRING (update)
Script (click to open)
// GraalJS Script (engine type: 2)

//////// START "main()" /////////////////////////////////////////////////////////////////

main();
function main(){ 

////// == Config Retrieving / 工程コンフィグの参照 ==
const strAuthzSetting   = configs.get      ( "AuthzConfU" );   /// REQUIRED
  engine.log( " AutomatedTask Config: Authz Setting: " + strAuthzSetting );
const filesPocketAudio  = configs.getObject( "SelectConfA1" ); /// REQUIRED
  let filesAudio        = engine.findData( filesPocketAudio );
  if( filesAudio      === null ) {
    throw new Error( "\n AutomatedTask UnexpectedFileError:" +
                     " No File {A1} is attached \n" );
  }else{ // java.util.ArrayList of QfileView
    engine.log( " AutomatedTask FilesArray {A1}: " +
                 filesAudio.size() + " file(s)" );
  }
const strPrompt         = configs.get      ( "StrConfA2" );    // NotRequired
const strTemperature    = configs.get      ( "StrConfB1" );    // NotRequired
const strPocketText     = configs.getObject( "SelectConfC1" ); // NotRequired
const strPocketLfText   = configs.getObject( "SelectConfC2" ); // NotRequired
const strPocketJson     = configs.getObject( "SelectConfD1" ); // NotRequired
const strPocketLang     = configs.getObject( "SelectConfD2" ); // NotRequired
const numstrPocketDuration = configs.getObject( "SelectConfD3" ); // NotRequired
const numstrPocketSegments = configs.getObject( "SelectConfD4" ); // NotRequired



////// == Data Retrieving / ワークフローデータの参照 ==
// (Nothing. Retrieved via Expression Language in Config Retrieving)


////// == Calculating / 演算 ==
//// OpenAI API > Documentation > API REFERENCE > CHAT
//// https://platform.openai.com/docs/api-reference/audio

/// prepare request1
let request1Uri = "https://api.openai.com/v1/audio/translations";
let request1 = httpClient.begin(); // HttpRequestWrapper
    request1 = request1.authSetting( strAuthzSetting ); // with "Authorization: Bearer XX"
    request1 = request1.multipart( "file", filesAudio.get(0) );
    request1 = request1.multipart( "model", "whisper-1" );
    request1 = request1.multipart( "response_format", "verbose_json" ); // "vtt" to WebVTT
    if ( strPrompt !== "" ) {
      request1 = request1.multipart( "prompt",      strPrompt );
    }
    if ( strTemperature !== "" ) {
      request1 = request1.multipart( "temperature", strTemperature );
    }

/// try request1
const response1     = request1.post( request1Uri ); // HttpResponseWrapper
engine.log( " AutomatedTask ApiRequest1 Start: " + request1Uri );
const response1Code = response1.getStatusCode() + ""; // JavaNum to string
const response1Body = response1.getResponseAsString();
engine.log( " AutomatedTask ApiResponse1 Status: " + response1Code );
if( response1Code !== "200"){
  throw new Error( "\n AutomatedTask UnexpectedResponseError: " +
                    response1Code + "\n" + response1Body + "\n" );
}

/// parse response1
/* engine.log( response1Body ); // debug
{
  "task":"translate",
  "language":"english",
  "duration":19.41,
  "segments":[{
    "id":0,
    "seek":0,
    "start":0.0,
    "end":10.0,
    "text":" Once upon a time, there was an old man and an old woman.",
    "tokens":[3443,3564,257,565,11,456,390,364,1331,587,293,364,1331,3059,13],
    "temperature":0.0,
    "avg_logprob":-0.44667461940220426,
    "compression_ratio":1.6734693877551021,
    "no_speech_prob":0.002584837144240737,
    "transient":false
  },{
    "id":1,
    "seek":0,
    "start":10.0,
    "end":13.0,
    "text":" The old man went to the mountains to hunt deer.",
    "tokens":[440,1331,587,1437,281,264,10233,281,12454,17120,13],
    "temperature":0.0,
    "avg_logprob":-0.44667461940220426,
    "compression_ratio":1.6734693877551021,
    "no_speech_prob":0.002584837144240737,
    "transient":false
  },{
    "id":2,
    "seek":0,
    "start":13.0,
    "end":17.0,
    "text":" The old woman also went to the mountains to hunt deer.",
    "tokens":[440,1331,3059,611,1437,281,264,10233,281,12454,17120,13],
    "temperature":0.0,
    "avg_logprob":-0.44667461940220426,
    "compression_ratio":1.6734693877551021,
    "no_speech_prob":0.002584837144240737,
    "transient":false
  },{
    "id":3,
    "seek":1700,
    "start":17.0,
    "end":37.0,
    "text":" Once upon a time, there was an old man and an old woman.",
    "tokens":[50364,3443,3564,257,565,11,456,390,364,1331,587,293,364,1331,3059,13,51364],
    "temperature":0.0,
    "avg_logprob":-0.3337326579623752,
    "compression_ratio":1.018181818181818,
    "no_speech_prob":0.03932468220591545,
    "transient":false
  }],
  "text":"Once upon a time, there was an old man and an old woman. The old man went to the mountains to hunt deer. The old woman also went to the mountains to hunt deer. Once upon a time, there was an old man and an old woman."
}
*/

const response1Obj = JSON.parse( response1Body );


/// extract text
let arrLfText = [];
for ( let i = 0; i < response1Obj.segments.length; i++ ){
  arrLfText.push ( response1Obj.segments[i].text );
}


////// == Data Updating / ワークフローデータへの代入 ==

if( strPocketText !== null ){
  engine.setData( strPocketText,
                  response1Obj.text ?? ""
                );
}
if( strPocketLfText !== null ){
  engine.setData( strPocketLfText,
                  arrLfText.join( '\n' )
                );
}
if( strPocketJson !== null ){
  engine.setData( strPocketJson,
                  response1Body
                );
}
if( strPocketLang !== null ){
  engine.setData( strPocketLang,
                  response1Obj?.language ?? ""
                );
}
if( numstrPocketDuration !== null ){
  if( numstrPocketSegments.matchDataType( "STRING" ) ){
    engine.setData( numstrPocketDuration,
                  response1Obj?.duration + "" ?? ""
                  );
  } else {
    engine.setData( numstrPocketDuration, new java.math.BigDecimal(
                    response1Obj?.duration ?? 0
                  ));
  }
}
if( numstrPocketSegments !== null ){
  if( numstrPocketSegments.matchDataType( "STRING" ) ){
    engine.setData( numstrPocketSegments,
                    response1Obj.segments.length + ""
                  );
  } else {
    engine.setData( numstrPocketSegments, new java.math.BigDecimal(
                    response1Obj.segments.length
                  ));
  }
}


} //////// END "main()" /////////////////////////////////////////////////////////////////


/*
Notes:
- If you place this "Automated Step" in the Workflow diagram,
    - the request will be automatically sent every time the process token arrives.
    - A request is automatically sent to the OpenAI API server. (REST API)
    - The response from the OpenAI API server is automatically parsed.
    - You can incorporate "AI assistance" into your business processes.
- Audio File: Assume that the file for storing FILE type is the audio source.
    - The first file is used as the audio source.
    - The second and subsequent files are not referenced.
    - Audio file to transcribe
        - "mp3", "mp4", "mpeg", "mpga", "m4a", "wav", or "webm"
        - Video files (mp4, etc.) are also supported.
        - Consider the size limit on the API side.
            - Status 413: "Maximum content size limit (26214400)" (about 26MB) (as of 202303)
        - Note: Upload in Questetra BPM Suite is limited to 100MB (as of 202303)
- PROMPT: Text to improve the quality of the generated transcripts
    - The PROMPT should be in English.
    - Summary text for reference in English translation
        - Words or acronyms that the model often misrecognizes in the audio
- An API key is required to use the OpenAI API.
    - Get an API Key in advance.
    - Set "Secret API Key" to "HTTP Authz Setting" (Token Fixed Value)

APPENDIX:
- Sampling temperature
    - range: "[0,1]", default: "0"
- Headers for developers belonging to multiple organizations are not yet supported (as of 202303).
    - `OpenAI-Organization`
- Translated Language
     - Static value "`english`" (as of 202303)
- Original Scenario for PROMPT test
    - "むかしむかし、あるところに、お爺さんとお婆さんがありました。お爺さんは山へ柴刈りに、お婆さんも山へ柴刈りに行きました。なんでやねん。"
    - (Once upon a time, there was an old man and an old woman in a certain place. The old man went to the mountain to gather firewood, and the old woman also went to the mountain to gather firewood. No way!)
    - Translated by DeepL
        - Once upon a time, there was a grandfather and an old woman. The grandfather went to the mountain to harvest bushes, and the grandmother also went to the mountain to harvest bushes. Why did they do that?
    - Translated by Google Translate
        - Once upon a time, there was an old man and an old woman in a certain place. The old man went to the mountains to cut firewood, and the old woman also went to the mountains to cut firewood. why.
- Translated by Whisper-1 (no Prompt)
    - "Once upon a time, there was an old man and an old woman. The old man went to the mountains to hunt deer. The old woman also went to the mountains to hunt deer. Once upon a time, there was an old man and an old woman."
    - (むかしむかし、お爺さんとお婆さんがありました。お爺さんは山に鹿狩りに行きました。お婆さんも山に鹿狩りに行きました。むかしむかし、お爺さんとお婆さんがありました。)
- Translated by Whisper-1 with Prompt
    - Prompt: "The story is about an old man and an old woman going to the mountains to cut small trees." (お爺さんとお婆さんが山へ小さな雑木を刈りに行く話です。)
    - "Once upon a time, there was an old man and an old woman. The old man went to the mountains to cut small trees. The old woman also went to the mountains to cut small trees. And the old woman went to the mountains to cut small trees."
- Translated by Whisper-1 with Prompt
    - Prompt: "The story is about an old man and an old woman going to the mountains to gather firewood." (お爺さんとお婆さんが山へ柴刈りに行く話です。)
    - "Once upon a time, there was an old man and an old woman. The old man went to the mountains to gather firewood. The old woman also went to the mountains to gather firewood. And the old woman was going to the mountains to gather firewood".



Notes-ja:
- この[自動工程]をワークフロー図に配置すれば、案件が到達する度にリクエストが自動送信されます。
    - OpenAI API サーバに対してリクエストが自動送出されます。(REST API通信)
    - OpenAI API サーバからのレスポンスが自動解析されます。
    - "AI による支援" を業務プロセスに組み込むことが出来ます。
- 音声ファイル: ファイル型データの格納ファイルを音源とします。
    - 1つ目に保存されているファイルを音源とします。
    - 2つ目以降のファイルは参照しません。
    - 音声ファイル(会議音声等)フォーマット
        - "mp3", "mp4", "mpeg", "mpga", "m4a", "wav", or "webm"
        - 動画ファイル(mp4 等)にも対応しています。
        - API 側のサイズ制限に配慮が必要です。
            - Status 413: "Maximum content size limit (26214400)" (約26MB) (202303時点)
        - Questetra BPM Suite のアップロード操作が100MBに制限されている点にも注意が必要です(202303時点)
- 概要 PROMPT: 生成文の品質を向上させるためのテキストを登録します。
    - PROMPT設定は英語が推奨。
    - 英文への翻訳の参考となる概要説明テキスト
        - モデルに誤認されがちな略語や熟語など
- OpenAI API の利用には API key が必要です。
    - あらかじめ API Key を取得しておいてください。
    - "Secret API Key" のセット: [HTTP 認証設定]>[トークン直接指定]

APPENDIX-ja:
- サンプリング温度設定(temperature)
    - range:"[0,1]", default:"0"
- 複数組織に所属する開発者向けのヘッダには未対応です(202303時点)
    - `OpenAI-Organization`
- 翻訳文の言語 / Translated Language
    - "`english`" で固定です(202303時点)
- Original Scenario for PROMPT test
    - "むかしむかし、あるところに、お爺さんとお婆さんがありました。お爺さんは山へ柴刈りに、お婆さんも山へ柴刈りに行きました。なんでやねん。"
    - (Once upon a time, there was an old man and an old woman in a certain place. The old man went to the mountain to gather firewood, and the old woman also went to the mountain to gather firewood. No way!)
    - Translated by DeepL
        - Once upon a time, there was a grandfather and an old woman. The grandfather went to the mountain to harvest bushes, and the grandmother also went to the mountain to harvest bushes. Why did they do that?
    - Translated by Google Translate
        - Once upon a time, there was an old man and an old woman in a certain place. The old man went to the mountains to cut firewood, and the old woman also went to the mountains to cut firewood. why.
- Translated by Whisper-1 (no Prompt)
    - "Once upon a time, there was an old man and an old woman. The old man went to the mountains to hunt deer. The old woman also went to the mountains to hunt deer. Once upon a time, there was an old man and an old woman."
    - (むかしむかし、お爺さんとお婆さんがありました。お爺さんは山に鹿狩りに行きました。お婆さんも山に鹿狩りに行きました。むかしむかし、お爺さんとお婆さんがありました。)
- Translated by Whisper-1 with Prompt
    - Prompt: "The story is about an old man and an old woman going to the mountains to cut small trees." (お爺さんとお婆さんが山へ小さな雑木を刈りに行く話です。)
    - "Once upon a time, there was an old man and an old woman. The old man went to the mountains to cut small trees. The old woman also went to the mountains to cut small trees. And the old woman went to the mountains to cut small trees."
- Translated by Whisper-1 with Prompt
    - Prompt: "The story is about an old man and an old woman going to the mountains to gather firewood." (お爺さんとお婆さんが山へ柴刈りに行く話です。)
    - "Once upon a time, there was an old man and an old woman. The old man went to the mountains to gather firewood. The old woman also went to the mountains to gather firewood. And the old woman was going to the mountains to gather firewood".

*/

Download

warning Freely modifiable JavaScript (ECMAScript) code. No warranty of any kind.
(Installing Addon Auto-Steps are available only on the Professional edition.)

Notes

  • If you place this automated step in the Workflow diagram,
    • the request will be automatically sent every time the process token arrives.
    • A request is automatically sent to the OpenAI API server. (REST API)
    • The response from the OpenAI API server is automatically parsed.
    • You can incorporate AI assistance into your business processes.
  • Audio File: Assume that the file for storing FILE type is the audio source.
    • The first file is used as the audio source.
    • The second and subsequent files are not referenced.
    • Audio file to transcribe
      • mp3, mp4, mpeg, mpga, m4a, wav, or webm
      • Video files (mp4, etc.) are also supported.
      • Consider the size limit on the API side.
        • Status 413: “Maximum content size limit (26214400)” (about 26MB) (as of 202303)
      • Note: Upload in Questetra BPM Suite is limited to 100MB (as of 202303)
  • PROMPT: Text to improve the quality of the generated transcripts
    • The PROMPT should be in English.
    • Summary text for reference in English translation
      • Words or acronyms that the model often misrecognizes in the audio
  • An API key is required to use the OpenAI API.
    • Get an API Key in advance.
    • Set “Secret API Key” to “HTTP Authz Setting” (Token Fixed Value)

Capture

Translates audio into English. English text data is generated from sound or video file containing Japanese or other, using the OpenAI API "whisper-1" model. Abbreviations and technical terms can be set as PRMOPT to achieve more accurate translation.

Appendix

  • Sampling temperature
    • range: “[0,1]”, default: “0”
  • Headers for developers belonging to multiple organizations are not yet supported (as of 202303).
    • OpenAI-Organization
  • Translated Language
    • Static value “english” (as of 202303)
  • Original Scenario for PROMPT test
    • “むかしむかし、あるところに、お爺さんとお婆さんがありました。お爺さんは山へ柴刈りに、お婆さんも山へ柴刈りに行きました。なんでやねん。”
    • (Once upon a time, there was an old man and an old woman in a certain place. The old man went to the mountain to gather firewood, and the old woman also went to the mountain to gather firewood. No way!)
    • Translated by DeepL
      • Once upon a time, there was a grandfather and an old woman. The grandfather went to the mountain to harvest bushes, and the grandmother also went to the mountain to harvest bushes. Why did they do that?
    • Translated by Google Translate
      • Once upon a time, there was an old man and an old woman in a certain place. The old man went to the mountains to cut firewood, and the old woman also went to the mountains to cut firewood. why.
  • Translated by Whisper-1 (no Prompt)
    • “Once upon a time, there was an old man and an old woman. The old man went to the mountains to hunt deer. The old woman also went to the mountains to hunt deer. Once upon a time, there was an old man and an old woman.”
    • (むかしむかし、お爺さんとお婆さんがありました。お爺さんは山に鹿狩りに行きました。お婆さんも山に鹿狩りに行きました。むかしむかし、お爺さんとお婆さんがありました。)
  • Translated by Whisper-1 with Prompt
    • Prompt: “The story is about an old man and an old woman going to the mountains to cut small trees.” (お爺さんとお婆さんが山へ小さな雑木を刈りに行く話です。)
    • “Once upon a time, there was an old man and an old woman. The old man went to the mountains to cut small trees. The old woman also went to the mountains to cut small trees. And the old woman went to the mountains to cut small trees.”
  • Translated by Whisper-1 with Prompt
    • Prompt: “The story is about an old man and an old woman going to the mountains to gather firewood.” (お爺さんとお婆さんが山へ柴刈りに行く話です。)
    • “Once upon a time, there was an old man and an old woman. The old man went to the mountains to gather firewood. The old woman also went to the mountains to gather firewood. And the old woman was going to the mountains to gather firewood”.

See Also

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.

%d bloggers like this: