Token Classification
Transformers
Safetensors
English
bert
ner
named-entity-recognition
text-classification
sequence-labeling
transformer
nlp
pretrained-model
dataset-finetuning
deep-learning
huggingface
conll2025
real-time-inference
efficient-nlp
high-accuracy
gpu-optimized
chatbot
information-extraction
search-enhancement
knowledge-graph
legal-nlp
medical-nlp
financial-nlp
Update README.md
Browse files
README.md
CHANGED
|
@@ -539,18 +539,33 @@ model = AutoModelForTokenClassification.from_pretrained("boltuix/NeuroBERT-NER")
|
|
| 539 |
texts = ["Barack Obama visited Microsoft in Seattle on January 2025."]
|
| 540 |
true_labels = [["B-PERSON", "I-PERSON", "O", "B-ORG", "O", "B-GPE", "O", "B-DATE", "I-DATE", "O"]]
|
| 541 |
|
| 542 |
-
# Predict
|
| 543 |
pred_labels = []
|
|
|
|
| 544 |
for text in texts:
|
| 545 |
-
inputs = tokenizer(text, return_tensors="pt")
|
| 546 |
with torch.no_grad():
|
| 547 |
outputs = model(**inputs)
|
| 548 |
-
|
| 549 |
-
|
| 550 |
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
| 551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
# Evaluate
|
|
|
|
|
|
|
|
|
|
| 554 |
print(classification_report(true_labels, pred_labels))
|
| 555 |
```
|
| 556 |
|
|
|
|
| 539 |
texts = ["Barack Obama visited Microsoft in Seattle on January 2025."]
|
| 540 |
true_labels = [["B-PERSON", "I-PERSON", "O", "B-ORG", "O", "B-GPE", "O", "B-DATE", "I-DATE", "O"]]
|
| 541 |
|
|
|
|
| 542 |
pred_labels = []
|
| 543 |
+
|
| 544 |
for text in texts:
|
| 545 |
+
inputs = tokenizer(text, return_tensors="pt", is_split_into_words=False, return_attention_mask=True)
|
| 546 |
with torch.no_grad():
|
| 547 |
outputs = model(**inputs)
|
| 548 |
+
|
| 549 |
+
predictions = outputs.logits.argmax(dim=-1)[0].cpu().numpy()
|
| 550 |
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
| 551 |
+
word_ids = inputs.word_ids(batch_index=0)
|
| 552 |
+
|
| 553 |
+
# Align prediction to word level (first token of each word)
|
| 554 |
+
word_preds = []
|
| 555 |
+
previous_word_idx = None
|
| 556 |
+
for idx, word_idx in enumerate(word_ids):
|
| 557 |
+
if word_idx is None or word_idx == previous_word_idx:
|
| 558 |
+
continue # Skip special tokens and subwords
|
| 559 |
+
label = model.config.id2label[predictions[idx]]
|
| 560 |
+
word_preds.append(label)
|
| 561 |
+
previous_word_idx = word_idx
|
| 562 |
+
|
| 563 |
+
pred_labels.append(word_preds)
|
| 564 |
|
| 565 |
# Evaluate
|
| 566 |
+
print("Predicted:", pred_labels)
|
| 567 |
+
print("True :", true_labels)
|
| 568 |
+
print("\n📊 Evaluation Report:\n")
|
| 569 |
print(classification_report(true_labels, pred_labels))
|
| 570 |
```
|
| 571 |
|