Talk2DINO-ViTB / configuration_talk2dino.py
lorebianchi98's picture
updated loading strategy
f335052
raw
history blame
1.6 kB
from transformers import PretrainedConfig
class Talk2DINOConfig(PretrainedConfig):
model_type = "talk2dino"
def __init__(
self,
avg_self_attn_token=False,
clip_model_name="ViT-B/16",
disentangled_self_attn_token=True,
is_eval=True,
keep_cls=False,
keep_end_seq=False,
loss=None,
model_name="dinov2_vitb14_reg",
pre_trained=True,
proj_class="vitb_mlp_infonce",
proj_model="ProjectionLayer",
proj_name="vitb_mlp_infonce",
resize_dim=518,
type="DINOText",
unfreeze_last_image_layer=False,
unfreeze_last_text_layer=False,
use_avg_text_token=False,
with_bg_clean=False,
**kwargs,
):
super().__init__(**kwargs)
# Store all parameters
self.avg_self_attn_token = avg_self_attn_token
self.clip_model_name = clip_model_name
self.disentangled_self_attn_token = disentangled_self_attn_token
self.is_eval = is_eval
self.keep_cls = keep_cls
self.keep_end_seq = keep_end_seq
self.loss = loss
self.model_name = model_name
self.pre_trained = pre_trained
self.proj_class = proj_class
self.proj_model = proj_model
self.proj_name = proj_name
self.resize_dim = resize_dim
self.type = type
self.unfreeze_last_image_layer = unfreeze_last_image_layer
self.unfreeze_last_text_layer = unfreeze_last_text_layer
self.use_avg_text_token = use_avg_text_token
self.with_bg_clean = with_bg_clean