Update README.md
Browse files
README.md
CHANGED
|
@@ -219,6 +219,7 @@ print(result["text"])
|
|
| 219 |
Kotoba-whisper can generate transcription with prompting as below:
|
| 220 |
|
| 221 |
```python
|
|
|
|
| 222 |
import torch
|
| 223 |
from transformers import pipeline
|
| 224 |
from datasets import load_dataset, Audio
|
|
@@ -243,16 +244,17 @@ pipe = pipeline(
|
|
| 243 |
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
| 244 |
|
| 245 |
# --- Without prompt ---
|
| 246 |
-
|
| 247 |
-
print(
|
| 248 |
# 81ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
| 249 |
|
| 250 |
# --- With prompt ---: Let's change `81` to `91`.
|
| 251 |
prompt = "91ζ³"
|
| 252 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors="pt").to(device)
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
| 256 |
# γγ£γΆγ£γγ§γγΉγ«γ¬γγγ91ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
| 257 |
```
|
| 258 |
|
|
|
|
| 219 |
Kotoba-whisper can generate transcription with prompting as below:
|
| 220 |
|
| 221 |
```python
|
| 222 |
+
import re
|
| 223 |
import torch
|
| 224 |
from transformers import pipeline
|
| 225 |
from datasets import load_dataset, Audio
|
|
|
|
| 244 |
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
| 245 |
|
| 246 |
# --- Without prompt ---
|
| 247 |
+
text = pipe(dataset[10]["audio"], generate_kwargs=generate_kwargs)['text']
|
| 248 |
+
print(text)
|
| 249 |
# 81ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
| 250 |
|
| 251 |
# --- With prompt ---: Let's change `81` to `91`.
|
| 252 |
prompt = "91ζ³"
|
| 253 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors="pt").to(device)
|
| 254 |
+
text = pipe(dataset[10]["audio"], generate_kwargs=generate_kwargs)['text']
|
| 255 |
+
# currently the pipeline for ASR appends the prompt at the beginning of the transcription, so remove it
|
| 256 |
+
text = re.sub(rf"\A\s*{prompt}\s*", "", text)
|
| 257 |
+
print(text)
|
| 258 |
# γγ£γΆγ£γγ§γγΉγ«γ¬γγγ91ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
| 259 |
```
|
| 260 |
|