优化评分部分文件结构,增加对比式gpt评分
parent
148a9e1de0
commit
88a58600f6
@ -0,0 +1,35 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# 读取两个csv文件
|
||||||
|
human_score_df = pd.read_csv('logs/other/human.csv')
|
||||||
|
machine_score_df = pd.read_csv('logs/other/result_diff_test_score_53.84256314043283.csv')
|
||||||
|
|
||||||
|
result_df = pd.DataFrame(columns=['question', 'answer', 'predict_finetune', 'predict_origin', 'acc_finetune', 'human_acc_finetune', 'acc_origin', 'human_acc_origin', 'fluency_finetune', 'human_fluency_finetune', 'diff_score', 'human_diff_score'])
|
||||||
|
result_df_row_index = 0
|
||||||
|
for row_index, row in machine_score_df.iterrows():
|
||||||
|
acc_finetune_diff = row['acc_finetune'] - human_score_df.loc[row_index, '准确度(微调后']
|
||||||
|
acc_origin_diff = row['acc_origin'] - human_score_df.loc[row_index, '准确度(微调前']
|
||||||
|
fluency_finetune_diff = row['fluency_finetune'] - human_score_df.loc[row_index, '流畅度(微调后']
|
||||||
|
diff_score_diff = row['diff_score'] - human_score_df.loc[row_index, '是否超过原模型']
|
||||||
|
print("准确度(微调后)差值:", abs(acc_finetune_diff),end=' ')
|
||||||
|
print("准确度(微调前)差值:", abs(acc_origin_diff),end=' ')
|
||||||
|
print("流畅度(微调后)差值:", abs(fluency_finetune_diff),end=' ')
|
||||||
|
print("是否超过原模型差值:", abs(diff_score_diff))
|
||||||
|
if abs(acc_finetune_diff) >= 2:
|
||||||
|
result_df.loc[result_df_row_index, 'question'] = machine_score_df.loc[row_index, 'question']
|
||||||
|
result_df.loc[result_df_row_index, 'answer'] = machine_score_df.loc[row_index, 'answer']
|
||||||
|
result_df.loc[result_df_row_index, 'predict_finetune'] = machine_score_df.loc[row_index, 'predict_finetune']
|
||||||
|
result_df.loc[result_df_row_index, 'predict_origin'] = machine_score_df.loc[row_index, 'predict_origin']
|
||||||
|
result_df.loc[result_df_row_index, 'acc_finetune'] = machine_score_df.loc[row_index, 'acc_finetune']
|
||||||
|
result_df.loc[result_df_row_index, 'human_acc_finetune'] = human_score_df.loc[row_index, '准确度(微调后']
|
||||||
|
result_df.loc[result_df_row_index, 'acc_origin'] = machine_score_df.loc[row_index, 'acc_origin']
|
||||||
|
result_df.loc[result_df_row_index, 'human_acc_origin'] = human_score_df.loc[row_index, '准确度(微调前']
|
||||||
|
result_df.loc[result_df_row_index, 'fluency_finetune'] = machine_score_df.loc[row_index, 'fluency_finetune']
|
||||||
|
result_df.loc[result_df_row_index, 'human_fluency_finetune'] = human_score_df.loc[row_index, '流畅度(微调后']
|
||||||
|
result_df.loc[result_df_row_index, 'diff_score'] = machine_score_df.loc[row_index, 'diff_score']
|
||||||
|
result_df.loc[result_df_row_index, 'human_diff_score'] = human_score_df.loc[row_index, '是否超过原模型']
|
||||||
|
result_df_row_index += 1
|
||||||
|
|
||||||
|
|
||||||
|
result_df.to_csv('logs/other/diff.csv', index=False)
|
||||||
|
# 信息的准确性应当被首要考虑,多余的未知真假的信息不应该带来加分。
|
@ -0,0 +1,10 @@
|
|||||||
|
import evaluate
|
||||||
|
|
||||||
|
perplexity = evaluate.load("../metrics/perplexity")
|
||||||
|
input_texts = ["你好!", "打死哦对吉萨大你去我家而且我就", "这辆车非常优秀"]
|
||||||
|
results = perplexity.compute(model_id='gpt2',
|
||||||
|
add_start_token=False,
|
||||||
|
predictions=input_texts)
|
||||||
|
print(list(results.keys()))
|
||||||
|
print(results["perplexities"])
|
||||||
|
|
Loading…
Reference in New Issue