Update handler.py
Browse files- handler.py +3 -2
handler.py
CHANGED
|
@@ -5,7 +5,7 @@ from io import BytesIO
|
|
| 5 |
from transformers import AutoModel, AutoTokenizer
|
| 6 |
|
| 7 |
class EndpointHandler:
|
| 8 |
-
def __init__(self, path=""):
|
| 9 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 10 |
|
| 11 |
# Load the model
|
|
@@ -14,7 +14,8 @@ class EndpointHandler:
|
|
| 14 |
trust_remote_code=True,
|
| 15 |
attn_implementation='sdpa',
|
| 16 |
torch_dtype=torch.bfloat16 if self.device.type == "cuda" else torch.float32,
|
| 17 |
-
|
|
|
|
| 18 |
self.model.eval()
|
| 19 |
|
| 20 |
# Load the tokenizer
|
|
|
|
| 5 |
from transformers import AutoModel, AutoTokenizer
|
| 6 |
|
| 7 |
class EndpointHandler:
|
| 8 |
+
def __init__(self, path="/repository"):
|
| 9 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 10 |
|
| 11 |
# Load the model
|
|
|
|
| 14 |
trust_remote_code=True,
|
| 15 |
attn_implementation='sdpa',
|
| 16 |
torch_dtype=torch.bfloat16 if self.device.type == "cuda" else torch.float32,
|
| 17 |
+
device_map="auto",
|
| 18 |
+
)
|
| 19 |
self.model.eval()
|
| 20 |
|
| 21 |
# Load the tokenizer
|