fschlatt commited on
Commit
b1780ff
·
verified ·
1 Parent(s): fa2a265

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "[DOC]": 30523,
3
+ "[QUE]": 30522
4
+ }
config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "absolute_positional_embedding_type": null,
3
+ "add_marker_tokens": true,
4
+ "architectures": [
5
+ "LirDprTiteModel"
6
+ ],
7
+ "backbone_model_type": "tite",
8
+ "doc_length": 256,
9
+ "doc_pooling_strategy": "first",
10
+ "dropout_prob": 0.1,
11
+ "embedding_dim": null,
12
+ "hidden_act": "gelu_pytorch_tanh",
13
+ "hidden_sizes": [
14
+ 768,
15
+ 768,
16
+ 768,
17
+ 768,
18
+ 768,
19
+ 768,
20
+ 768,
21
+ 768,
22
+ 768,
23
+ 768,
24
+ 768,
25
+ 768
26
+ ],
27
+ "initializer_range": 0.02,
28
+ "intermediate_sizes": [
29
+ 3072,
30
+ 3072,
31
+ 3072,
32
+ 3072,
33
+ 3072,
34
+ 3072,
35
+ 3072,
36
+ 3072,
37
+ 3072,
38
+ 3072,
39
+ 3072,
40
+ 3072
41
+ ],
42
+ "kernel_sizes": [
43
+ null,
44
+ null,
45
+ null,
46
+ 2,
47
+ 2,
48
+ 2,
49
+ 2,
50
+ 2,
51
+ 2,
52
+ 2,
53
+ 2,
54
+ 2
55
+ ],
56
+ "layer_norm_eps": 1e-12,
57
+ "max_position_embeddings": 512,
58
+ "model_type": "lir-dpr",
59
+ "norm_location": "post",
60
+ "norm_type": "layer",
61
+ "normalize": false,
62
+ "num_attention_heads": [
63
+ 12,
64
+ 12,
65
+ 12,
66
+ 12,
67
+ 12,
68
+ 12,
69
+ 12,
70
+ 12,
71
+ 12,
72
+ 12,
73
+ 12,
74
+ 12
75
+ ],
76
+ "num_hidden_layers": 12,
77
+ "pad_token_id": null,
78
+ "pooling_implementation": "triton",
79
+ "pooling_location": "intra",
80
+ "positional_embedding_type": null,
81
+ "projection": null,
82
+ "query_length": 32,
83
+ "query_pooling_strategy": "first",
84
+ "relative_positional_embedding_type": "rotary",
85
+ "rope_implementation": "eager",
86
+ "rotary_interleaved": true,
87
+ "save_step": 10006,
88
+ "similarity_function": "dot",
89
+ "sparsification": null,
90
+ "strides": [
91
+ null,
92
+ null,
93
+ null,
94
+ 2,
95
+ 2,
96
+ 2,
97
+ 2,
98
+ 2,
99
+ 2,
100
+ 2,
101
+ 2,
102
+ 2
103
+ ],
104
+ "torch_dtype": "float32",
105
+ "transformers_version": "4.52.4",
106
+ "vocab_size": 30528
107
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7526fe95152a338edcbb99534a1174a6fa9d7a1c8e20563f1a962271f29a1ab
3
+ size 434027280
pl_config.yaml ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lightning.pytorch==2.5.2
2
+ seed_everything: 0
3
+ trainer:
4
+ accelerator: auto
5
+ strategy: auto
6
+ devices: auto
7
+ num_nodes: 1
8
+ precision: bf16-mixed
9
+ logger:
10
+ class_path: lightning_ir.LightningIRWandbLogger
11
+ init_args:
12
+ name: null
13
+ save_dir: .
14
+ version: null
15
+ offline: false
16
+ dir: null
17
+ id: null
18
+ anonymous: null
19
+ project: tite
20
+ log_model: false
21
+ experiment: null
22
+ prefix: ''
23
+ checkpoint_name: null
24
+ entity: tite
25
+ notes: null
26
+ tags: null
27
+ config: null
28
+ config_exclude_keys: null
29
+ config_include_keys: null
30
+ allow_val_change: null
31
+ group: null
32
+ job_type: null
33
+ mode: null
34
+ force: null
35
+ reinit: null
36
+ resume: null
37
+ resume_from: null
38
+ fork_from: null
39
+ save_code: null
40
+ tensorboard: null
41
+ sync_tensorboard: null
42
+ monitor_gym: null
43
+ settings: null
44
+ callbacks:
45
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint
46
+ init_args:
47
+ dirpath: null
48
+ filename: null
49
+ monitor: null
50
+ verbose: false
51
+ save_last: null
52
+ save_top_k: 1
53
+ save_weights_only: false
54
+ mode: min
55
+ auto_insert_metric_name: true
56
+ every_n_train_steps: null
57
+ train_time_interval: null
58
+ every_n_epochs: null
59
+ save_on_train_epoch_end: null
60
+ enable_version_counter: true
61
+ - class_path: tite.utils.callbacks.DummyImportCallback
62
+ fast_dev_run: false
63
+ max_epochs: null
64
+ min_epochs: null
65
+ max_steps: 10100
66
+ min_steps: null
67
+ max_time: null
68
+ limit_train_batches: null
69
+ limit_val_batches: null
70
+ limit_test_batches: null
71
+ limit_predict_batches: null
72
+ overfit_batches: 0.0
73
+ val_check_interval: 20000
74
+ check_val_every_n_epoch: null
75
+ num_sanity_val_steps: null
76
+ log_every_n_steps: null
77
+ enable_checkpointing: null
78
+ enable_progress_bar: false
79
+ enable_model_summary: null
80
+ accumulate_grad_batches: 8
81
+ gradient_clip_val: 1
82
+ gradient_clip_algorithm: null
83
+ deterministic: null
84
+ benchmark: null
85
+ inference_mode: true
86
+ use_distributed_sampler: true
87
+ profiler: null
88
+ detect_anomaly: false
89
+ barebones: false
90
+ plugins: null
91
+ sync_batchnorm: false
92
+ reload_dataloaders_every_n_epochs: 0
93
+ default_root_dir: null
94
+ model_registry: null
95
+ model:
96
+ class_path: lightning_ir.BiEncoderModule
97
+ init_args:
98
+ model_name_or_path: webis/tite-2-late
99
+ config:
100
+ class_path: lightning_ir.models.DprConfig
101
+ init_args:
102
+ query_length: 32
103
+ doc_length: 256
104
+ similarity_function: dot
105
+ normalize: false
106
+ sparsification: null
107
+ add_marker_tokens: true
108
+ query_pooling_strategy: first
109
+ doc_pooling_strategy: first
110
+ embedding_dim: 768
111
+ projection: null
112
+ model: null
113
+ loss_functions:
114
+ - class_path: lightning_ir.SupervisedMarginMSE
115
+ - class_path: lightning_ir.KLDivergence
116
+ - class_path: lightning_ir.ScoreBasedInBatchCrossEntropy
117
+ init_args:
118
+ min_target_diff: 3.0
119
+ max_num_neg_samples: null
120
+ evaluation_metrics:
121
+ - nDCG@10
122
+ index_dir: null
123
+ search_config: null
124
+ model_kwargs: null
125
+ data:
126
+ class_path: lightning_ir.LightningIRDataModule
127
+ init_args:
128
+ train_dataset:
129
+ class_path: lightning_ir.RunDataset
130
+ init_args:
131
+ run_path_or_id: msmarco-passage/train/rank-distillm-set-encoder
132
+ depth: 100
133
+ sample_size: 16
134
+ sampling_strategy: log_random
135
+ targets: score
136
+ normalize_targets: false
137
+ add_docs_not_in_ranking: false
138
+ train_batch_size: 64
139
+ shuffle_train: true
140
+ inference_batch_size: 1
141
+ num_workers: 4
142
+ lr_scheduler:
143
+ class_path: tite.utils.lr_schedulers.ConstantLRSchedulerWithLinearWarmup
144
+ init_args:
145
+ num_warmup_steps: 3000
146
+ num_delay_steps: 0
147
+ optimizer:
148
+ class_path: torch.optim.AdamW
149
+ init_args:
150
+ lr: 5.0e-05
151
+ betas:
152
+ - 0.9
153
+ - 0.999
154
+ eps: 1.0e-08
155
+ weight_decay: 0.01
156
+ amsgrad: false
157
+ maximize: false
158
+ foreach: null
159
+ capturable: false
160
+ differentiable: false
161
+ fused: null
162
+ ckpt_path: null
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_marker_tokens": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "[PAD]",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "100": {
13
+ "content": "[UNK]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "101": {
21
+ "content": "[CLS]",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "102": {
29
+ "content": "[SEP]",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "103": {
37
+ "content": "[MASK]",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "30522": {
45
+ "content": "[QUE]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "30523": {
53
+ "content": "[DOC]",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ }
60
+ },
61
+ "backbone_tokenizer_class": "TiteTokenizer",
62
+ "clean_up_tokenization_spaces": false,
63
+ "cls_token": "[CLS]",
64
+ "do_lower_case": true,
65
+ "doc_length": 256,
66
+ "extra_special_tokens": {},
67
+ "mask_token": "[MASK]",
68
+ "max_length": 512,
69
+ "model_max_length": 512,
70
+ "pad_to_multiple_of": 8,
71
+ "pad_token": "[PAD]",
72
+ "pad_token_type_id": 0,
73
+ "padding_side": "right",
74
+ "query_length": 32,
75
+ "sep_token": "[SEP]",
76
+ "stride": 0,
77
+ "strip_accents": null,
78
+ "tokenize_chinese_chars": true,
79
+ "tokenizer_class": "BiEncoderTokenizer",
80
+ "truncation_side": "right",
81
+ "truncation_strategy": "longest_first",
82
+ "unk_token": "[UNK]"
83
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff