InjecAgent-vicuna-7b-v1.5 / trainer_state.json

Upload folder using huggingface_hub

42a29fe verified over 1 year ago

8.87 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.0,
	"eval_steps": 500,
	"global_step": 470,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0425531914893617,
	"grad_norm": 0.4270687699317932,
	"learning_rate": 0.00019999602855426865,
	"loss": 1.013,
	"step": 10
	},
	{
	"epoch": 0.0851063829787234,
	"grad_norm": 0.4152718782424927,
	"learning_rate": 0.00019998411453252217,
	"loss": 0.8289,
	"step": 20
	},
	{
	"epoch": 0.1276595744680851,
	"grad_norm": 0.7277560234069824,
	"learning_rate": 0.0001999642588810784,
	"loss": 0.5959,
	"step": 30
	},
	{
	"epoch": 0.1702127659574468,
	"grad_norm": 0.5505673885345459,
	"learning_rate": 0.00019993646317705016,
	"loss": 0.459,
	"step": 40
	},
	{
	"epoch": 0.2127659574468085,
	"grad_norm": 0.528052031993866,
	"learning_rate": 0.00019990072962822007,
	"loss": 0.3775,
	"step": 50
	},
	{
	"epoch": 0.2553191489361702,
	"grad_norm": 0.6307681202888489,
	"learning_rate": 0.00019985706107286514,
	"loss": 0.3285,
	"step": 60
	},
	{
	"epoch": 0.2978723404255319,
	"grad_norm": 0.6954013109207153,
	"learning_rate": 0.00019980546097953132,
	"loss": 0.2855,
	"step": 70
	},
	{
	"epoch": 0.3404255319148936,
	"grad_norm": 0.6790465116500854,
	"learning_rate": 0.000199745933446758,
	"loss": 0.2782,
	"step": 80
	},
	{
	"epoch": 0.3829787234042553,
	"grad_norm": 1.324937105178833,
	"learning_rate": 0.0001996784832027525,
	"loss": 0.2635,
	"step": 90
	},
	{
	"epoch": 0.425531914893617,
	"grad_norm": 0.8779314756393433,
	"learning_rate": 0.00019960311560501454,
	"loss": 0.1861,
	"step": 100
	},
	{
	"epoch": 0.46808510638297873,
	"grad_norm": 0.693745493888855,
	"learning_rate": 0.00019951983663991056,
	"loss": 0.2001,
	"step": 110
	},
	{
	"epoch": 0.5106382978723404,
	"grad_norm": 1.0649502277374268,
	"learning_rate": 0.00019942865292219838,
	"loss": 0.1378,
	"step": 120
	},
	{
	"epoch": 0.5531914893617021,
	"grad_norm": 0.6962260007858276,
	"learning_rate": 0.0001993295716945017,
	"loss": 0.1579,
	"step": 130
	},
	{
	"epoch": 0.5957446808510638,
	"grad_norm": 0.7934479713439941,
	"learning_rate": 0.00019922260082673497,
	"loss": 0.092,
	"step": 140
	},
	{
	"epoch": 0.6382978723404256,
	"grad_norm": 1.1331907510757446,
	"learning_rate": 0.000199107748815478,
	"loss": 0.1208,
	"step": 150
	},
	{
	"epoch": 0.6808510638297872,
	"grad_norm": 1.3689247369766235,
	"learning_rate": 0.00019898502478330152,
	"loss": 0.0874,
	"step": 160
	},
	{
	"epoch": 0.723404255319149,
	"grad_norm": 0.5304535031318665,
	"learning_rate": 0.00019885443847804211,
	"loss": 0.0881,
	"step": 170
	},
	{
	"epoch": 0.7659574468085106,
	"grad_norm": 0.6805845499038696,
	"learning_rate": 0.0001987160002720283,
	"loss": 0.0584,
	"step": 180
	},
	{
	"epoch": 0.8085106382978723,
	"grad_norm": 0.2527499198913574,
	"learning_rate": 0.00019856972116125653,
	"loss": 0.08,
	"step": 190
	},
	{
	"epoch": 0.851063829787234,
	"grad_norm": 0.799462616443634,
	"learning_rate": 0.0001984156127645178,
	"loss": 0.0556,
	"step": 200
	},
	{
	"epoch": 0.8936170212765957,
	"grad_norm": 0.936975359916687,
	"learning_rate": 0.0001982536873224748,
	"loss": 0.0945,
	"step": 210
	},
	{
	"epoch": 0.9361702127659575,
	"grad_norm": 0.8067993521690369,
	"learning_rate": 0.00019808395769668963,
	"loss": 0.0495,
	"step": 220
	},
	{
	"epoch": 0.9787234042553191,
	"grad_norm": 0.45767834782600403,
	"learning_rate": 0.00019790643736860227,
	"loss": 0.0617,
	"step": 230
	},
	{
	"epoch": 1.0212765957446808,
	"grad_norm": 0.9198794364929199,
	"learning_rate": 0.00019772114043845965,
	"loss": 0.0467,
	"step": 240
	},
	{
	"epoch": 1.0638297872340425,
	"grad_norm": 0.7327796816825867,
	"learning_rate": 0.0001975280816241959,
	"loss": 0.0391,
	"step": 250
	},
	{
	"epoch": 1.1063829787234043,
	"grad_norm": 0.8003076910972595,
	"learning_rate": 0.00019732727626026305,
	"loss": 0.0428,
	"step": 260
	},
	{
	"epoch": 1.148936170212766,
	"grad_norm": 0.10251367837190628,
	"learning_rate": 0.0001971187402964132,
	"loss": 0.032,
	"step": 270
	},
	{
	"epoch": 1.1914893617021276,
	"grad_norm": 0.45093855261802673,
	"learning_rate": 0.00019690249029643162,
	"loss": 0.0673,
	"step": 280
	},
	{
	"epoch": 1.2340425531914894,
	"grad_norm": 0.4845767915248871,
	"learning_rate": 0.0001966785434368211,
	"loss": 0.033,
	"step": 290
	},
	{
	"epoch": 1.2765957446808511,
	"grad_norm": 0.31195056438446045,
	"learning_rate": 0.00019644691750543767,
	"loss": 0.0261,
	"step": 300
	},
	{
	"epoch": 1.3191489361702127,
	"grad_norm": 0.14839951694011688,
	"learning_rate": 0.00019620763090007762,
	"loss": 0.0298,
	"step": 310
	},
	{
	"epoch": 1.3617021276595744,
	"grad_norm": 0.20573872327804565,
	"learning_rate": 0.00019596070262701626,
	"loss": 0.0155,
	"step": 320
	},
	{
	"epoch": 1.4042553191489362,
	"grad_norm": 0.47702595591545105,
	"learning_rate": 0.00019570615229949842,
	"loss": 0.0369,
	"step": 330
	},
	{
	"epoch": 1.4468085106382977,
	"grad_norm": 0.7073186039924622,
	"learning_rate": 0.00019544400013618023,
	"loss": 0.0302,
	"step": 340
	},
	{
	"epoch": 1.4893617021276595,
	"grad_norm": 0.1539478451013565,
	"learning_rate": 0.00019517426695952358,
	"loss": 0.0223,
	"step": 350
	},
	{
	"epoch": 1.5319148936170213,
	"grad_norm": 0.5202814340591431,
	"learning_rate": 0.00019489697419414182,
	"loss": 0.0263,
	"step": 360
	},
	{
	"epoch": 1.574468085106383,
	"grad_norm": 0.968192458152771,
	"learning_rate": 0.00019461214386509842,
	"loss": 0.044,
	"step": 370
	},
	{
	"epoch": 1.6170212765957448,
	"grad_norm": 0.5662522912025452,
	"learning_rate": 0.00019431979859615726,
	"loss": 0.0421,
	"step": 380
	},
	{
	"epoch": 1.6595744680851063,
	"grad_norm": 0.42925137281417847,
	"learning_rate": 0.00019401996160798573,
	"loss": 0.0606,
	"step": 390
	},
	{
	"epoch": 1.702127659574468,
	"grad_norm": 0.5803830027580261,
	"learning_rate": 0.00019371265671631037,
	"loss": 0.0392,
	"step": 400
	},
	{
	"epoch": 1.7446808510638299,
	"grad_norm": 0.4235450327396393,
	"learning_rate": 0.00019339790833002515,
	"loss": 0.0286,
	"step": 410
	},
	{
	"epoch": 1.7872340425531914,
	"grad_norm": 0.519207775592804,
	"learning_rate": 0.00019307574144925287,
	"loss": 0.0522,
	"step": 420
	},
	{
	"epoch": 1.8297872340425532,
	"grad_norm": 0.2344844490289688,
	"learning_rate": 0.00019274618166335912,
	"loss": 0.0281,
	"step": 430
	},
	{
	"epoch": 1.872340425531915,
	"grad_norm": 0.1990007758140564,
	"learning_rate": 0.00019240925514892,
	"loss": 0.0229,
	"step": 440
	},
	{
	"epoch": 1.9148936170212765,
	"grad_norm": 0.10929415374994278,
	"learning_rate": 0.00019206498866764288,
	"loss": 0.0258,
	"step": 450
	},
	{
	"epoch": 1.9574468085106385,
	"grad_norm": 0.4308103024959564,
	"learning_rate": 0.00019171340956424074,
	"loss": 0.0167,
	"step": 460
	},
	{
	"epoch": 2.0,
	"grad_norm": 0.46525439620018005,
	"learning_rate": 0.0001913545457642601,
	"loss": 0.0283,
	"step": 470
	}
	],
	"logging_steps": 10,
	"max_steps": 3525,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 15,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.889350100631552e+16,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}