import os import argparse import pandas as pd import torch from evaluators.chatgpt import ChatGPT_Evaluator from evaluators.chatglm import ChatGLM_Evaluator from evaluators.chatglm2 import ChatGLM_Evaluator as ChatGLM2_Evaluator from evaluators.chatglm3 import ChatGLM_Evaluator as ChatGLM3_Evaluator import time choices = ["A", "B", "C", "D"] device = torch.device("cpu") def main(args): global device if args.cuda_device: os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_device device = torch.device("cuda") if "turbo" in args.model_name or "gpt-4" in args.model_name: evaluator = ChatGPT_Evaluator( choices=choices, k=args.ntrain, api_key=args.openai_key, model_name=args.model_name ) elif "chatglm3" in args.model_name: if args.finetune: fine_tune_model = args.finetune else: fine_tune_model = None evaluator = ChatGLM3_Evaluator( choices=choices, k=args.ntrain, model_name=args.model_name, device=device, finetune=fine_tune_model, finetune_method=args.finetune_method ) elif "chatglm2" in args.model_name: if args.finetune: fine_tune_model = args.finetune else: fine_tune_model = None evaluator = ChatGLM2_Evaluator( choices=choices, k=args.ntrain, model_name=args.model_name, device=device, finetune=fine_tune_model, finetune_method=args.finetune_method ) elif "chatglm" in args.model_name: if args.finetune: fine_tune_model = args.finetune else: fine_tune_model = None evaluator = ChatGLM_Evaluator( choices=choices, k=args.ntrain, model_name=args.model_name, device=device, finetune=fine_tune_model, finetune_method=args.finetune_method ) else: print("Unknown model name") return -1 if not os.path.exists(r"logs"): os.mkdir(r"logs") run_date = time.strftime('%Y-%m-%d_%H-%M-%S', time.localtime(time.time())) if args.finetune: fine_tune_model_name = args.finetune else: fine_tune_model_name = 'original' save_result_dir = os.path.join(r"logs", f"{args.model_name}_{fine_tune_model_name}/{run_date}") os.mkdir(save_result_dir) # subject_list = ['computer_architecture', 'car_knowledge', 'car_use', 'car_market'] subject_list = ['car_knowledge_in_train', 'car_use_in_train', 'car_market_in_train'] qa_subject_list = ['car_knowledge', 'car_use', 'car_market'] # qa_subject_list = ['car_use', 'car_market'] for subject_name in subject_list: print("Now testing: " + subject_name) # subject_name=args.subject val_file_path = os.path.join('data/val', f'{subject_name}_val.csv') val_df = pd.read_csv(val_file_path) if args.few_shot: dev_file_path = os.path.join('data/dev', f'{subject_name}_dev.csv') dev_df = pd.read_csv(dev_file_path) correct_ratio = evaluator.eval_subject(subject_name, val_df, dev_df, few_shot=args.few_shot, save_result_dir=save_result_dir, cot=args.cot) else: correct_ratio = evaluator.eval_subject(subject_name, val_df, few_shot=args.few_shot, save_result_dir=save_result_dir) print("Acc:", correct_ratio) # for subject_name in qa_subject_list: # print("Now testing: " + subject_name) # qa_file_path = os.path.join('data/qa', f'{subject_name}_qa.csv') # qa_df = pd.read_csv(qa_file_path) # evaluator.eval_qa(subject_name, qa_df, save_result_dir=save_result_dir) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--ntrain", "-k", type=int, default=5) parser.add_argument("--openai_key", type=str, default="xxx") parser.add_argument("--minimax_group_id", type=str, default="xxx") parser.add_argument("--minimax_key", type=str, default="xxx") parser.add_argument("--few_shot", action="store_true") parser.add_argument("--model_name", type=str) parser.add_argument("--cot", action="store_true") # parser.add_argument("--subject","-s",type=str,default="operating_system") parser.add_argument("--cuda_device", type=str) parser.add_argument("--finetune", type=str) parser.add_argument("--finetune_method", type=str) user_args = parser.parse_args() main(user_args)