Spaces:
Build error
Build error
Gabriela Nicole Gonzalez Saez
commited on
Commit
·
32fdb6f
1
Parent(s):
e4bccbf
zh-decode and best probabilities
Browse files
app.py
CHANGED
|
@@ -78,19 +78,18 @@ def get_k_prob_tokens(transition_scores, result, model, k_values=5):
|
|
| 78 |
gen_sequences = result.sequences[:, 1:]
|
| 79 |
|
| 80 |
result_output = []
|
| 81 |
-
# bs_alt = []
|
| 82 |
-
# bs_alt_scores = []
|
| 83 |
|
| 84 |
# First beam only...
|
| 85 |
bs = 0
|
| 86 |
text = ' '
|
| 87 |
for tok, score, i_step in zip(gen_sequences[bs], transition_scores[bs],range(len(gen_sequences[bs]))):
|
| 88 |
-
# bs_alt.append([tokenizer_tr.decode(tok) for tok in result.scores[i_step][bs].topk(k_values).indices ] )
|
| 89 |
-
# bs_alt_scores.append(np.exp(result.scores[i_step][bs].topk(k_values).values))
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
return result_output
|
| 96 |
|
|
@@ -100,15 +99,19 @@ def split_token_from_sequences(sequences, model) -> dict :
|
|
| 100 |
|
| 101 |
gen_sequences_texts = []
|
| 102 |
for bs in range(n_sentences):
|
|
|
|
| 103 |
#### decoder per token.
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
| 107 |
|
|
|
|
| 108 |
#raw dict is bos
|
| 109 |
text = 'bos'
|
| 110 |
new_id = text +'--1'
|
| 111 |
-
dict_parent = [{'id': new_id, 'parentId': None , 'text': text, 'name': 'bos', 'prob':score }]
|
| 112 |
id_dict_pos = {}
|
| 113 |
step_i = 0
|
| 114 |
cont = True
|
|
@@ -151,8 +154,10 @@ def split_token_from_sequences(sequences, model) -> dict :
|
|
| 151 |
dict_parent.append({'id': new_id, 'parentId': parent_id , 'text': step_w, 'name': step_w, 'prob' : score })
|
| 152 |
id_dict_pos[new_id] = len(dict_parent) - 1
|
| 153 |
else:
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
| 156 |
|
| 157 |
step_i += 1
|
| 158 |
return dict_parent
|
|
|
|
| 78 |
gen_sequences = result.sequences[:, 1:]
|
| 79 |
|
| 80 |
result_output = []
|
|
|
|
|
|
|
| 81 |
|
| 82 |
# First beam only...
|
| 83 |
bs = 0
|
| 84 |
text = ' '
|
| 85 |
for tok, score, i_step in zip(gen_sequences[bs], transition_scores[bs],range(len(gen_sequences[bs]))):
|
|
|
|
|
|
|
| 86 |
|
| 87 |
+
beam_i = result.beam_indices[0][i_step]
|
| 88 |
+
if beam_i < 0:
|
| 89 |
+
beam_i = bs
|
| 90 |
+
bs_alt = [tokenizer_tr.decode(tok) for tok in result.scores[i_step][beam_i].topk(k_values).indices ]
|
| 91 |
+
bs_alt_scores = np.exp(result.scores[i_step][beam_i].topk(k_values).values)
|
| 92 |
+
result_output.append([np.array(result.scores[i_step][beam_i].topk(k_values).indices), np.array(bs_alt_scores),bs_alt])
|
| 93 |
|
| 94 |
return result_output
|
| 95 |
|
|
|
|
| 99 |
|
| 100 |
gen_sequences_texts = []
|
| 101 |
for bs in range(n_sentences):
|
| 102 |
+
# gen_sequences_texts.append(dict_tokenizer_tr[model].decode(sequences[:, 1:][bs], skip_special_tokens=True).split(' '))
|
| 103 |
#### decoder per token.
|
| 104 |
+
seq_bs = []
|
| 105 |
+
|
| 106 |
+
for token in sequences[:, 1:][bs]:
|
| 107 |
+
seq_bs.append(dict_tokenizer_tr[model].decode(token, skip_special_tokens=True))
|
| 108 |
+
gen_sequences_texts.append(seq_bs)
|
| 109 |
|
| 110 |
+
score = 0
|
| 111 |
#raw dict is bos
|
| 112 |
text = 'bos'
|
| 113 |
new_id = text +'--1'
|
| 114 |
+
dict_parent = [{'id': new_id, 'parentId': None , 'text': text, 'name': 'bos', 'prob': score }]
|
| 115 |
id_dict_pos = {}
|
| 116 |
step_i = 0
|
| 117 |
cont = True
|
|
|
|
| 154 |
dict_parent.append({'id': new_id, 'parentId': parent_id , 'text': step_w, 'name': step_w, 'prob' : score })
|
| 155 |
id_dict_pos[new_id] = len(dict_parent) - 1
|
| 156 |
else:
|
| 157 |
+
if not (new_id in id_dict_pos):
|
| 158 |
+
dict_parent.append({'id': new_id, 'parentId': parent_id , 'text': step_w, 'name': step_w, 'prob' : score })
|
| 159 |
+
id_dict_pos[new_id] = len(dict_parent) - 1
|
| 160 |
+
|
| 161 |
|
| 162 |
step_i += 1
|
| 163 |
return dict_parent
|