boltuix
/

NeuroBERT-NER

Model card Files Files and versions

boltuix commited on May 28

Commit

29d92f9

·

verified ·

1 Parent(s): 0cbb6d7

Update README.md

Files changed (1) hide show

README.md +20 -5

README.md CHANGED Viewed

@@ -539,18 +539,33 @@ model = AutoModelForTokenClassification.from_pretrained("boltuix/NeuroBERT-NER")
 texts = ["Barack Obama visited Microsoft in Seattle on January 2025."]
 true_labels = [["B-PERSON", "I-PERSON", "O", "B-ORG", "O", "B-GPE", "O", "B-DATE", "I-DATE", "O"]]
-# Predict
 pred_labels = []
 for text in texts:
-    inputs = tokenizer(text, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
-    predictions = outputs.logits.argmax(dim=-1)
-    labels = [model.config.id2label[p.item()] for p in predictions[0]]
     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-    pred_labels.append([lbl for tok, lbl in zip(tokens, labels) if tok not in tokenizer.all_special_tokens])
 # Evaluate
 print(classification_report(true_labels, pred_labels))
 ```

 texts = ["Barack Obama visited Microsoft in Seattle on January 2025."]
 true_labels = [["B-PERSON", "I-PERSON", "O", "B-ORG", "O", "B-GPE", "O", "B-DATE", "I-DATE", "O"]]
 pred_labels = []
 for text in texts:
+    inputs = tokenizer(text, return_tensors="pt", is_split_into_words=False, return_attention_mask=True)
     with torch.no_grad():
         outputs = model(**inputs)
+    predictions = outputs.logits.argmax(dim=-1)[0].cpu().numpy()
     tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+    word_ids = inputs.word_ids(batch_index=0)
+    # Align prediction to word level (first token of each word)
+    word_preds = []
+    previous_word_idx = None
+    for idx, word_idx in enumerate(word_ids):
+        if word_idx is None or word_idx == previous_word_idx:
+            continue  # Skip special tokens and subwords
+        label = model.config.id2label[predictions[idx]]
+        word_preds.append(label)
+        previous_word_idx = word_idx
+    pred_labels.append(word_preds)
 # Evaluate
+print("Predicted:", pred_labels)
+print("True     :", true_labels)
+print("\n📊 Evaluation Report:\n")
 print(classification_report(true_labels, pred_labels))
 ```