You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.7 KiB
Python

import argparse
from scoring.assessment_engine import AssessmentEngine
def main(args):
7 months ago
assessment_engine = AssessmentEngine("logs/lxh",args.openai_key)
assessment_engine.eval_result_diff("0416_DoRA_generated_predictions.csv")
# assessment_engine.eval_result("output-npt-sft-1-0.95-0.5-1.2.json", "json")
# synthesis_rouge_score = 0.30358589506467687
# print("微调模型ROUGE分数", synthesis_rouge_score)
# original_rouge_score = 0.26004000118452175
# print("原模型ROUGE分数", original_rouge_score)
# synthesis_acc_score = 2.768
# print("微调模型准确性分数:", synthesis_acc_score)
# original_acc_score = 2.724
# print("原模型准确性分数:", original_acc_score)
# synthesis_fluency_score = 2.098
# print("微调模型流畅度分数:", synthesis_fluency_score)
# original_fluency_score = 2.236
# print("原模型流畅度分数:", original_fluency_score)
# synthesis_diff_score = 2.278
# print("微调模型优于原模型分数:", synthesis_diff_score)
#
# synthesis_score = (synthesis_rouge_score * 100 + synthesis_acc_score * 100/4 + synthesis_fluency_score * 100/3
# + synthesis_diff_score * 100/3 ) / 4
# original_synthesis_score = (original_rouge_score * 100 + original_acc_score * 100/4 +
# original_fluency_score * 100/3 + 66 ) / 4
#
# print("综合评分:", synthesis_score)
# print("原模型综合评分:", original_synthesis_score)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--openai_key", type=str, default="xxx")
user_args = parser.parse_args()
main(user_args)