Hey guys, im new to fine tuning, and i decided to fine tune a miniLM model with a triplet dataset,
but the problem im facing is that my training loss is not decreasing, I have read the sbert.net documentation, and following the sample code they have, but still, i dont seem to find the problem,
Please let me know if i did any silly mistakes, as im really new in this domain ![]()
The following is my training code,
```python
def finetune_sentencebert(config: dict, dataset_dict, log_file: str = “./log/experiment_log_v2.csv”):
"""
Fine-tune a SentenceTransformer model with flexible loss + logging.
Args:
config (dict): Experiment config (model_name, epochs, batch_size, loss_type, etc.)
dataset_dict (DatasetDict): HuggingFace DatasetDict with 'train', 'validation', 'test'
log_file (str): Path to CSV log file
Returns:
SentenceTransformer: Fine-tuned model
"""
\# ---------------- Load model ----------------
model = SentenceTransformer(config\["model_name"\])
\# ---------------- Dataset preparation ----------------
train_dataset = dataset_dict\["train"\]
eval_dataset = dataset_dict\["validation"\]
test_dataset = dataset_dict\["test"\]
\# ---------------- Training Arguments ----------------
args = SentenceTransformerTrainingArguments(
output_dir=config\["output_path"\],
num_train_epochs=config\["epochs"\],
per_device_train_batch_size=config\["batch_size"\],
per_device_eval_batch_size=config\["batch_size"\],
learning_rate=config.get("learning_rate", 2e-5),
batch_sampler=BatchSamplers.NO_DUPLICATES,
\# Improved evaluation settings
eval_strategy="steps",
eval_steps= 50, # REDUCED from 50 for more frequent evaluation
\# FIXED: Save settings for best model only
save_strategy="steps", # Change from "epoch" to "steps"
save_steps=50, # Same as eval_steps
save_total_limit=1, # Keep only 1 checkpoint
load_best_model_at_end=True, # Change from False to True
metric_for_best_model="eval_loss",
greater_is_better=False,
\# Optional: Add logging
logging_steps=10,
report_to=None,
)
\# ---------------- Evaluators ----------------
\# do NOT need to wrap, accepts raw lists of strings
evaluator_valid = TripletEvaluator(
anchors= eval_dataset\["anchor"\],
positives= eval_dataset\["positive"\],
negatives= eval_dataset\["negative"\],
name="validation_eval"
)
evaluator_test = TripletEvaluator(
anchors= test_dataset\["anchor"\],
positives= test_dataset\["positive"\],
negatives= test_dataset\["negative"\],
name="test_eval"
)
val_before = evaluator_valid(model)
test_before = evaluator_test(model)
print("\[Before Training\] Validation:", {k: f"{v:.4f}" for k, v in val_before.items()})
print("\[Before Training\] Test:", {k: f"{v:.4f}" for k, v in test_before.items()})
\# ---------------- Flexible loss ----------------
if config\["loss_type"\] == "triplet":
train_loss = losses.TripletLoss(model=model, triplet_margin=config.get("triplet_margin", 0.5))
elif config\["loss_type"\] == "cosine":
train_loss = losses.CosineSimilarityLoss(model=model)
elif config\["loss_type"\] == "contrastive":
train_loss = losses.ContrastiveLoss(model=model)
else:
raise ValueError(f"Unknown loss type: {config\['loss_type'\]}")
\# ---------------- Training ----------------
output_dir = config\["output_path"\]
os.makedirs(output_dir, exist_ok=True)
trainer = SentenceTransformerTrainer(
model=model,
args=args,
train_dataset= train_dataset,
eval_dataset=eval_dataset,
loss=train_loss,
evaluator=evaluator_valid if config.get("eval_during_training", True) else None,
callbacks=\[EarlyStoppingCallback(early_stopping_patience=5)\] # INCREASED from 3
)
trainer.train()
\# ---------------- After training evaluation ----------------
val_after = evaluator_valid(model)
test_after = evaluator_test(model)
print("\[After Training\] Validation:", {k: f"{v:.4f}" for k, v in val_after.items()})
print("\[After Training\] Test:", {k: f"{v:.4f}" for k, v in test_after.items()})
\# ---------------- Save best model ----------------
best_model_path = os.path.join(config\["output_path"\], "best_model")
os.makedirs(best_model_path, exist_ok=True)
model.save(best_model_path)
print(f"Saved best model to {best_model_path}")
\# ---------------- Logging ----------------
log_data = {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
\*\*config,
"val_before": val_before,
"test_before": test_before,
"val_after": val_after,
"test_after": test_after
}
if os.path.exists(log_file):
df_log = pd.read_csv(log_file)
df_log = pd.concat(\[df_log, pd.DataFrame(\[log_data\])\], ignore_index=True)
else:
df_log = pd.DataFrame(\[log_data\])
df_log.to_csv(log_file, index=False)
print(f"Logged results to {log_file}")
return model
```