Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

added_tokens.json +4 -0
config.json +107 -0
model.safetensors +3 -0
pl_config.yaml +162 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +83 -0
vocab.txt +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "[DOC]": 30523,
+  "[QUE]": 30522
+}

config.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "absolute_positional_embedding_type": null,
+  "add_marker_tokens": true,
+  "architectures": [
+    "LirDprTiteModel"
+  ],
+  "backbone_model_type": "tite",
+  "doc_length": 256,
+  "doc_pooling_strategy": "first",
+  "dropout_prob": 0.1,
+  "embedding_dim": null,
+  "hidden_act": "gelu_pytorch_tanh",
+  "hidden_sizes": [
+    768,
+    768,
+    768,
+    768,
+    768,
+    768,
+    768,
+    768,
+    768,
+    768,
+    768,
+    768
+  ],
+  "initializer_range": 0.02,
+  "intermediate_sizes": [
+    3072,
+    3072,
+    3072,
+    3072,
+    3072,
+    3072,
+    3072,
+    3072,
+    3072,
+    3072,
+    3072,
+    3072
+  ],
+  "kernel_sizes": [
+    null,
+    null,
+    null,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "lir-dpr",
+  "norm_location": "post",
+  "norm_type": "layer",
+  "normalize": false,
+  "num_attention_heads": [
+    12,
+    12,
+    12,
+    12,
+    12,
+    12,
+    12,
+    12,
+    12,
+    12,
+    12,
+    12
+  ],
+  "num_hidden_layers": 12,
+  "pad_token_id": null,
+  "pooling_implementation": "triton",
+  "pooling_location": "intra",
+  "positional_embedding_type": null,
+  "projection": null,
+  "query_length": 32,
+  "query_pooling_strategy": "first",
+  "relative_positional_embedding_type": "rotary",
+  "rope_implementation": "eager",
+  "rotary_interleaved": true,
+  "save_step": 10006,
+  "similarity_function": "dot",
+  "sparsification": null,
+  "strides": [
+    null,
+    null,
+    null,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
+  "vocab_size": 30528
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7526fe95152a338edcbb99534a1174a6fa9d7a1c8e20563f1a962271f29a1ab
+size 434027280

pl_config.yaml ADDED Viewed

	@@ -0,0 +1,162 @@

+# lightning.pytorch==2.5.2
+seed_everything: 0
+trainer:
+  accelerator: auto
+  strategy: auto
+  devices: auto
+  num_nodes: 1
+  precision: bf16-mixed
+  logger:
+    class_path: lightning_ir.LightningIRWandbLogger
+    init_args:
+      name: null
+      save_dir: .
+      version: null
+      offline: false
+      dir: null
+      id: null
+      anonymous: null
+      project: tite
+      log_model: false
+      experiment: null
+      prefix: ''
+      checkpoint_name: null
+      entity: tite
+      notes: null
+      tags: null
+      config: null
+      config_exclude_keys: null
+      config_include_keys: null
+      allow_val_change: null
+      group: null
+      job_type: null
+      mode: null
+      force: null
+      reinit: null
+      resume: null
+      resume_from: null
+      fork_from: null
+      save_code: null
+      tensorboard: null
+      sync_tensorboard: null
+      monitor_gym: null
+      settings: null
+  callbacks:
+  - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+    init_args:
+      dirpath: null
+      filename: null
+      monitor: null
+      verbose: false
+      save_last: null
+      save_top_k: 1
+      save_weights_only: false
+      mode: min
+      auto_insert_metric_name: true
+      every_n_train_steps: null
+      train_time_interval: null
+      every_n_epochs: null
+      save_on_train_epoch_end: null
+      enable_version_counter: true
+  - class_path: tite.utils.callbacks.DummyImportCallback
+  fast_dev_run: false
+  max_epochs: null
+  min_epochs: null
+  max_steps: 10100
+  min_steps: null
+  max_time: null
+  limit_train_batches: null
+  limit_val_batches: null
+  limit_test_batches: null
+  limit_predict_batches: null
+  overfit_batches: 0.0
+  val_check_interval: 20000
+  check_val_every_n_epoch: null
+  num_sanity_val_steps: null
+  log_every_n_steps: null
+  enable_checkpointing: null
+  enable_progress_bar: false
+  enable_model_summary: null
+  accumulate_grad_batches: 8
+  gradient_clip_val: 1
+  gradient_clip_algorithm: null
+  deterministic: null
+  benchmark: null
+  inference_mode: true
+  use_distributed_sampler: true
+  profiler: null
+  detect_anomaly: false
+  barebones: false
+  plugins: null
+  sync_batchnorm: false
+  reload_dataloaders_every_n_epochs: 0
+  default_root_dir: null
+  model_registry: null
+model:
+  class_path: lightning_ir.BiEncoderModule
+  init_args:
+    model_name_or_path: webis/tite-2-late
+    config:
+      class_path: lightning_ir.models.DprConfig
+      init_args:
+        query_length: 32
+        doc_length: 256
+        similarity_function: dot
+        normalize: false
+        sparsification: null
+        add_marker_tokens: true
+        query_pooling_strategy: first
+        doc_pooling_strategy: first
+        embedding_dim: 768
+        projection: null
+    model: null
+    loss_functions:
+    - class_path: lightning_ir.SupervisedMarginMSE
+    - class_path: lightning_ir.KLDivergence
+    - class_path: lightning_ir.ScoreBasedInBatchCrossEntropy
+      init_args:
+        min_target_diff: 3.0
+        max_num_neg_samples: null
+    evaluation_metrics:
+    - nDCG@10
+    index_dir: null
+    search_config: null
+    model_kwargs: null
+data:
+  class_path: lightning_ir.LightningIRDataModule
+  init_args:
+    train_dataset:
+      class_path: lightning_ir.RunDataset
+      init_args:
+        run_path_or_id: msmarco-passage/train/rank-distillm-set-encoder
+        depth: 100
+        sample_size: 16
+        sampling_strategy: log_random
+        targets: score
+        normalize_targets: false
+        add_docs_not_in_ranking: false
+    train_batch_size: 64
+    shuffle_train: true
+        inference_batch_size: 1
+    num_workers: 4
+lr_scheduler:
+  class_path: tite.utils.lr_schedulers.ConstantLRSchedulerWithLinearWarmup
+  init_args:
+    num_warmup_steps: 3000
+    num_delay_steps: 0
+optimizer:
+  class_path: torch.optim.AdamW
+  init_args:
+    lr: 5.0e-05
+    betas:
+    - 0.9
+    - 0.999
+    eps: 1.0e-08
+    weight_decay: 0.01
+    amsgrad: false
+    maximize: false
+    foreach: null
+    capturable: false
+    differentiable: false
+    fused: null
+ckpt_path: null

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "add_marker_tokens": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30522": {
+      "content": "[QUE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30523": {
+      "content": "[DOC]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "backbone_tokenizer_class": "TiteTokenizer",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "doc_length": 256,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 512,
+  "model_max_length": 512,
+  "pad_to_multiple_of": 8,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "query_length": 32,
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BiEncoderTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff