|
|
from scoring.assessment_engine import AssessmentEngine
|
|
|
|
|
|
assessment_engine = AssessmentEngine("other",
|
|
|
"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")
|
|
|
assessment_engine.eval_result_diff("0408output-dora.csv")
|
|
|
|
|
|
# synthesis_rouge_score = 0.30358589506467687
|
|
|
# print("微调模型ROUGE分数:", synthesis_rouge_score)
|
|
|
# original_rouge_score = 0.26004000118452175
|
|
|
# print("原模型ROUGE分数:", original_rouge_score)
|
|
|
# synthesis_acc_score = 2.768
|
|
|
# print("微调模型准确性分数:", synthesis_acc_score)
|
|
|
# original_acc_score = 2.724
|
|
|
# print("原模型准确性分数:", original_acc_score)
|
|
|
# synthesis_fluency_score = 2.098
|
|
|
# print("微调模型流畅度分数:", synthesis_fluency_score)
|
|
|
# original_fluency_score = 2.236
|
|
|
# print("原模型流畅度分数:", original_fluency_score)
|
|
|
# synthesis_diff_score = 2.278
|
|
|
# print("微调模型优于原模型分数:", synthesis_diff_score)
|
|
|
#
|
|
|
# synthesis_score = (synthesis_rouge_score * 100 + synthesis_acc_score * 100/4 + synthesis_fluency_score * 100/3
|
|
|
# + synthesis_diff_score * 100/3 ) / 4
|
|
|
# original_synthesis_score = (original_rouge_score * 100 + original_acc_score * 100/4 +
|
|
|
# original_fluency_score * 100/3 + 66 ) / 4
|
|
|
#
|
|
|
# print("综合评分:", synthesis_score)
|
|
|
# print("原模型综合评分:", original_synthesis_score)
|