readme
Browse files
README.md
CHANGED
|
@@ -109,12 +109,11 @@ it inherits the benefit of the improved latency compared to [openai/whisper-larg
|
|
| 109 |
|
| 110 |
## Transformers Usage
|
| 111 |
Kotoba-Whisper is supported in the Hugging Face 🤗 Transformers library from version 4.39 onwards. To run the model, first
|
| 112 |
-
install the latest version of Transformers.
|
| 113 |
-
from the Hugging Face Hub:
|
| 114 |
|
| 115 |
```bash
|
| 116 |
pip install --upgrade pip
|
| 117 |
-
pip install --upgrade transformers accelerate
|
| 118 |
```
|
| 119 |
|
| 120 |
### Short-Form Transcription
|
|
@@ -124,7 +123,7 @@ class to transcribe short-form audio files (< 30-seconds) as follows:
|
|
| 124 |
```python
|
| 125 |
import torch
|
| 126 |
from transformers import pipeline
|
| 127 |
-
from datasets import load_dataset
|
| 128 |
|
| 129 |
# config
|
| 130 |
model_id = "kotoba-tech/kotoba-whisper-v1.0"
|
|
@@ -142,7 +141,7 @@ pipe = pipeline(
|
|
| 142 |
model_kwargs=model_kwargs
|
| 143 |
)
|
| 144 |
|
| 145 |
-
# load sample audio
|
| 146 |
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
| 147 |
sample = dataset[0]["audio"]
|
| 148 |
|
|
@@ -222,7 +221,7 @@ Kotoba-whisper can generate transcription with prompting as below:
|
|
| 222 |
import re
|
| 223 |
import torch
|
| 224 |
from transformers import pipeline
|
| 225 |
-
from datasets import load_dataset
|
| 226 |
|
| 227 |
# config
|
| 228 |
model_id = "kotoba-tech/kotoba-whisper-v1.0"
|
|
@@ -240,7 +239,7 @@ pipe = pipeline(
|
|
| 240 |
model_kwargs=model_kwargs
|
| 241 |
)
|
| 242 |
|
| 243 |
-
# load sample audio
|
| 244 |
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
| 245 |
|
| 246 |
# --- Without prompt ---
|
|
@@ -297,11 +296,9 @@ pip install --upgrade transformers datasets[audio] evaluate jiwer
|
|
| 297 |
Evaluation can then be run end-to-end with the following example:
|
| 298 |
|
| 299 |
```python
|
| 300 |
-
from tqdm import tqdm
|
| 301 |
-
|
| 302 |
import torch
|
| 303 |
from transformers import pipeline
|
| 304 |
-
from datasets import load_dataset
|
| 305 |
from evaluate import load
|
| 306 |
|
| 307 |
# model config
|
|
@@ -312,7 +309,6 @@ model_kwargs = {"attn_implementation": "sdpa"} if torch.cuda.is_available() else
|
|
| 312 |
generate_kwargs = {"language": "japanese", "task": "transcribe"}
|
| 313 |
|
| 314 |
# data config
|
| 315 |
-
generate_kwargs = {"language": "japanese", "task": "transcribe"}
|
| 316 |
dataset_name = "japanese-asr/ja_asr.reazonspeech_test"
|
| 317 |
audio_column = 'audio'
|
| 318 |
text_column = 'transcription'
|
|
|
|
| 109 |
|
| 110 |
## Transformers Usage
|
| 111 |
Kotoba-Whisper is supported in the Hugging Face 🤗 Transformers library from version 4.39 onwards. To run the model, first
|
| 112 |
+
install the latest version of Transformers.
|
|
|
|
| 113 |
|
| 114 |
```bash
|
| 115 |
pip install --upgrade pip
|
| 116 |
+
pip install --upgrade transformers accelerate
|
| 117 |
```
|
| 118 |
|
| 119 |
### Short-Form Transcription
|
|
|
|
| 123 |
```python
|
| 124 |
import torch
|
| 125 |
from transformers import pipeline
|
| 126 |
+
from datasets import load_dataset
|
| 127 |
|
| 128 |
# config
|
| 129 |
model_id = "kotoba-tech/kotoba-whisper-v1.0"
|
|
|
|
| 141 |
model_kwargs=model_kwargs
|
| 142 |
)
|
| 143 |
|
| 144 |
+
# load sample audio
|
| 145 |
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
| 146 |
sample = dataset[0]["audio"]
|
| 147 |
|
|
|
|
| 221 |
import re
|
| 222 |
import torch
|
| 223 |
from transformers import pipeline
|
| 224 |
+
from datasets import load_dataset
|
| 225 |
|
| 226 |
# config
|
| 227 |
model_id = "kotoba-tech/kotoba-whisper-v1.0"
|
|
|
|
| 239 |
model_kwargs=model_kwargs
|
| 240 |
)
|
| 241 |
|
| 242 |
+
# load sample audio
|
| 243 |
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
| 244 |
|
| 245 |
# --- Without prompt ---
|
|
|
|
| 296 |
Evaluation can then be run end-to-end with the following example:
|
| 297 |
|
| 298 |
```python
|
|
|
|
|
|
|
| 299 |
import torch
|
| 300 |
from transformers import pipeline
|
| 301 |
+
from datasets import load_dataset
|
| 302 |
from evaluate import load
|
| 303 |
|
| 304 |
# model config
|
|
|
|
| 309 |
generate_kwargs = {"language": "japanese", "task": "transcribe"}
|
| 310 |
|
| 311 |
# data config
|
|
|
|
| 312 |
dataset_name = "japanese-asr/ja_asr.reazonspeech_test"
|
| 313 |
audio_column = 'audio'
|
| 314 |
text_column = 'transcription'
|