import argparse from scoring.assessment_engine import AssessmentEngine def main(args): assessment_engine = AssessmentEngine("logs/lxh", args.api_key, args.llm_engine) assessment_engine.eval_result_diff("rslora.csv") # assessment_engine.eval_result("output-npt-sft-1-0.95-0.5-1.2.json", "json") # synthesis_rouge_score = 0.30358589506467687 # print("微调模型ROUGE分数:", synthesis_rouge_score) # original_rouge_score = 0.26004000118452175 # print("原模型ROUGE分数:", original_rouge_score) # synthesis_acc_score = 2.768 # print("微调模型准确性分数:", synthesis_acc_score) # original_acc_score = 2.724 # print("原模型准确性分数:", original_acc_score) # synthesis_fluency_score = 2.098 # print("微调模型流畅度分数:", synthesis_fluency_score) # original_fluency_score = 2.236 # print("原模型流畅度分数:", original_fluency_score) # synthesis_diff_score = 2.278 # print("微调模型优于原模型分数:", synthesis_diff_score) # # synthesis_score = (synthesis_rouge_score * 100 + synthesis_acc_score * 100/4 + synthesis_fluency_score * 100/3 # + synthesis_diff_score * 100/3 ) / 4 # original_synthesis_score = (original_rouge_score * 100 + original_acc_score * 100/4 + # original_fluency_score * 100/3 + 66 ) / 4 # # print("综合评分:", synthesis_score) # print("原模型综合评分:", original_synthesis_score) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--api_key", type=str, default="xxx") parser.add_argument("--llm_engine", type=str, default="gpt") user_args = parser.parse_args() main(user_args)