from scoring.assessment_engine import AssessmentEngine assessment_engine = AssessmentEngine("other", "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") assessment_engine.eval_result_diff("0408output-dora.csv") # synthesis_rouge_score = 0.30358589506467687 # print("微调模型ROUGE分数:", synthesis_rouge_score) # original_rouge_score = 0.26004000118452175 # print("原模型ROUGE分数:", original_rouge_score) # synthesis_acc_score = 2.768 # print("微调模型准确性分数:", synthesis_acc_score) # original_acc_score = 2.724 # print("原模型准确性分数:", original_acc_score) # synthesis_fluency_score = 2.098 # print("微调模型流畅度分数:", synthesis_fluency_score) # original_fluency_score = 2.236 # print("原模型流畅度分数:", original_fluency_score) # synthesis_diff_score = 2.278 # print("微调模型优于原模型分数:", synthesis_diff_score) # # synthesis_score = (synthesis_rouge_score * 100 + synthesis_acc_score * 100/4 + synthesis_fluency_score * 100/3 # + synthesis_diff_score * 100/3 ) / 4 # original_synthesis_score = (original_rouge_score * 100 + original_acc_score * 100/4 + # original_fluency_score * 100/3 + 66 ) / 4 # # print("综合评分:", synthesis_score) # print("原模型综合评分:", original_synthesis_score)