|
|
import argparse
|
|
|
from scoring.assessment_engine import AssessmentEngine
|
|
|
|
|
|
|
|
|
def main(args):
|
|
|
assessment_engine = AssessmentEngine("logs/lxh", args.api_key, args.llm_engine)
|
|
|
assessment_engine.eval_result_diff("rslora.csv")
|
|
|
# assessment_engine.eval_result("output-npt-sft-1-0.95-0.5-1.2.json", "json")
|
|
|
|
|
|
# synthesis_rouge_score = 0.30358589506467687
|
|
|
# print("微调模型ROUGE分数:", synthesis_rouge_score)
|
|
|
# original_rouge_score = 0.26004000118452175
|
|
|
# print("原模型ROUGE分数:", original_rouge_score)
|
|
|
# synthesis_acc_score = 2.768
|
|
|
# print("微调模型准确性分数:", synthesis_acc_score)
|
|
|
# original_acc_score = 2.724
|
|
|
# print("原模型准确性分数:", original_acc_score)
|
|
|
# synthesis_fluency_score = 2.098
|
|
|
# print("微调模型流畅度分数:", synthesis_fluency_score)
|
|
|
# original_fluency_score = 2.236
|
|
|
# print("原模型流畅度分数:", original_fluency_score)
|
|
|
# synthesis_diff_score = 2.278
|
|
|
# print("微调模型优于原模型分数:", synthesis_diff_score)
|
|
|
#
|
|
|
# synthesis_score = (synthesis_rouge_score * 100 + synthesis_acc_score * 100/4 + synthesis_fluency_score * 100/3
|
|
|
# + synthesis_diff_score * 100/3 ) / 4
|
|
|
# original_synthesis_score = (original_rouge_score * 100 + original_acc_score * 100/4 +
|
|
|
# original_fluency_score * 100/3 + 66 ) / 4
|
|
|
#
|
|
|
# print("综合评分:", synthesis_score)
|
|
|
# print("原模型综合评分:", original_synthesis_score)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
parser = argparse.ArgumentParser()
|
|
|
parser.add_argument("--api_key", type=str, default="xxx")
|
|
|
parser.add_argument("--llm_engine", type=str, default="gpt")
|
|
|
user_args = parser.parse_args()
|
|
|
main(user_args)
|