完善readme，修复gpt3保留历史的Bug

7 months ago · 5a182efd6c
parent a603aa64cf
commit 5a182efd6c
2 changed files with 16 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -22,6 +22,7 @@ A simple program to evaluate large language model.
 - 请下载[GLM模型](https://hf-mirror.com/THUDM/chatglm-6b)并放置于到`./THUDM/chatglm-6b`文件夹下
 - 请下载[GLM2模型](https://hf-mirror.com/THUDM/chatglm2-6b)并放置于到`./THUDM/chatglm2-6b`文件夹下
 - 请下载[GLM3模型](https://hf-mirror.com/THUDM/chatglm3-6b)并放置于到`./THUDM/chatglm3-6b`文件夹下
+- 模型也可以通过建立软链接的形式放置于代码文件夹下。
 - 微调后的lora模型可放置于`./lora`文件夹下，可应用于ChatGLM2，例：要应用于glm2则放置于`./lora/glm2`文件夹下
 - 微调后的ptuning模型可放置于`./ptuning`文件夹下，可应用于ChatGLM/ChatGLM2，例：要应用于glm则放置于`./ptuning/glm1`文件夹下
 - 微调后的qlora/dora模型可放置于`./qlora`文件夹下，可应用于ChatGLM3，例：要应用于glm3则放置于`./qlora/glm3`文件夹下
@ -29,6 +30,15 @@ A simple program to evaluate large language model.
 - 训练数据按照C-Eval格式，放置于`./data`文件夹下，文件命名和`eval.py`中的`subject_name`相关
 - 相较于C-Eval的数据集，代码添加了'qa'的数据集，放置于`./data/qa`文件夹下，为非选择题的问答数据集。

+## Create Environment
+
+```bash
+conda create -n llm python=3.10
+conda activate llm
+conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia
+pip install transformers==4.33.2 accelerate==0.26.1 tqdm==4.66.1 openai==0.28 peft==0.10.0 google-generativeai pandas sentencepiece==0.2.0 rouge_chinese==1.0.3 jieba==0.42.1
+```
+
 ## Run

 运行模型评估程序：
--- a/evaluators/chatglm_mixin.py
+++ b/evaluators/chatglm_mixin.py
@ -7,6 +7,7 @@ from transformers.generation.logits_process import LogitsProcessor
 from transformers.generation.utils import LogitsProcessorList
 from evaluators.evaluator import Evaluator

+
 class ChatGLMMixin:
    def __init__(self):
        self.tokenizer = None
@ -28,14 +29,15 @@ class ChatGLMMixin:
        answers = list(test_df['answer'])
        for row_index, row in tqdm(test_df.iterrows(), total=len(test_df)):
            question = self.format_example(row, include_answer=False, cot=cot)
+            history_tmp = history.copy()
            if few_shot:
                response, _ = self.model.chat(self.tokenizer, question, max_length=2000,
-                                              do_sample=False, history=history)
+                                              do_sample=False, history=history_tmp)
                response = response.strip()
                ans, direct_extract = self.extract_cot_answer(row, response)
            else:  # zero-shot by extracting answer from distribution
                response, _ = self.model.chat(self.tokenizer, question, max_length=2000,
-                                              do_sample=False, history=history)
+                                              do_sample=False, history=history_tmp)
                response = response.strip()
                ans, direct_extract = self.extract_cot_answer(row, response)
            if ans == answers[row_index]:
@ -64,8 +66,9 @@ class ChatGLMMixin:
        history = self.generate_zero_shot_prompt(is_choice_question=False)
        for row_index, row in tqdm(qa_df.iterrows(), total=len(qa_df)):
            question = row['question']
+            history_tmp = history.copy()
            response, _ = self.model.chat(self.tokenizer, question, max_length=2000,
-                                          do_sample=False, history=history)
+                                          do_sample=False, history=history_tmp)
            response = response.strip()
            qa_df.loc[row_index, 'model_output'] = response
            # current_length = 0