diff --git a/README.md b/README.md index 7cc99b5..a47deb6 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ A simple program to evaluate large language model. - 请下载[GLM模型](https://hf-mirror.com/THUDM/chatglm-6b)并放置于到`./THUDM/chatglm-6b`文件夹下 - 请下载[GLM2模型](https://hf-mirror.com/THUDM/chatglm2-6b)并放置于到`./THUDM/chatglm2-6b`文件夹下 - 请下载[GLM3模型](https://hf-mirror.com/THUDM/chatglm3-6b)并放置于到`./THUDM/chatglm3-6b`文件夹下 +- 模型也可以通过建立软链接的形式放置于代码文件夹下。 - 微调后的lora模型可放置于`./lora`文件夹下,可应用于ChatGLM2,例:要应用于glm2则放置于`./lora/glm2`文件夹下 - 微调后的ptuning模型可放置于`./ptuning`文件夹下,可应用于ChatGLM/ChatGLM2,例:要应用于glm则放置于`./ptuning/glm1`文件夹下 - 微调后的qlora/dora模型可放置于`./qlora`文件夹下,可应用于ChatGLM3,例:要应用于glm3则放置于`./qlora/glm3`文件夹下 @@ -29,6 +30,15 @@ A simple program to evaluate large language model. - 训练数据按照C-Eval格式,放置于`./data`文件夹下,文件命名和`eval.py`中的`subject_name`相关 - 相较于C-Eval的数据集,代码添加了'qa'的数据集,放置于`./data/qa`文件夹下,为非选择题的问答数据集。 +## Create Environment + +```bash +conda create -n llm python=3.10 +conda activate llm +conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia +pip install transformers==4.33.2 accelerate==0.26.1 tqdm==4.66.1 openai==0.28 peft==0.10.0 google-generativeai pandas sentencepiece==0.2.0 rouge_chinese==1.0.3 jieba==0.42.1 +``` + ## Run 运行模型评估程序: diff --git a/evaluators/chatglm_mixin.py b/evaluators/chatglm_mixin.py index 631f8cd..c1858b8 100644 --- a/evaluators/chatglm_mixin.py +++ b/evaluators/chatglm_mixin.py @@ -7,6 +7,7 @@ from transformers.generation.logits_process import LogitsProcessor from transformers.generation.utils import LogitsProcessorList from evaluators.evaluator import Evaluator + class ChatGLMMixin: def __init__(self): self.tokenizer = None @@ -28,14 +29,15 @@ class ChatGLMMixin: answers = list(test_df['answer']) for row_index, row in tqdm(test_df.iterrows(), total=len(test_df)): question = self.format_example(row, include_answer=False, cot=cot) + history_tmp = history.copy() if few_shot: response, _ = self.model.chat(self.tokenizer, question, max_length=2000, - do_sample=False, history=history) + do_sample=False, history=history_tmp) response = response.strip() ans, direct_extract = self.extract_cot_answer(row, response) else: # zero-shot by extracting answer from distribution response, _ = self.model.chat(self.tokenizer, question, max_length=2000, - do_sample=False, history=history) + do_sample=False, history=history_tmp) response = response.strip() ans, direct_extract = self.extract_cot_answer(row, response) if ans == answers[row_index]: @@ -64,8 +66,9 @@ class ChatGLMMixin: history = self.generate_zero_shot_prompt(is_choice_question=False) for row_index, row in tqdm(qa_df.iterrows(), total=len(qa_df)): question = row['question'] + history_tmp = history.copy() response, _ = self.model.chat(self.tokenizer, question, max_length=2000, - do_sample=False, history=history) + do_sample=False, history=history_tmp) response = response.strip() qa_df.loc[row_index, 'model_output'] = response # current_length = 0