diff --git a/README.md b/README.md
index 7cc99b5..a47deb6 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,7 @@ A simple program to evaluate large language model.
 - 请下载[GLM模型](https://hf-mirror.com/THUDM/chatglm-6b)并放置于到`./THUDM/chatglm-6b`文件夹下
 - 请下载[GLM2模型](https://hf-mirror.com/THUDM/chatglm2-6b)并放置于到`./THUDM/chatglm2-6b`文件夹下
 - 请下载[GLM3模型](https://hf-mirror.com/THUDM/chatglm3-6b)并放置于到`./THUDM/chatglm3-6b`文件夹下
+- 模型也可以通过建立软链接的形式放置于代码文件夹下。
 - 微调后的lora模型可放置于`./lora`文件夹下，可应用于ChatGLM2，例：要应用于glm2则放置于`./lora/glm2`文件夹下
 - 微调后的ptuning模型可放置于`./ptuning`文件夹下，可应用于ChatGLM/ChatGLM2，例：要应用于glm则放置于`./ptuning/glm1`文件夹下
 - 微调后的qlora/dora模型可放置于`./qlora`文件夹下，可应用于ChatGLM3，例：要应用于glm3则放置于`./qlora/glm3`文件夹下
@@ -29,6 +30,15 @@ A simple program to evaluate large language model.
 - 训练数据按照C-Eval格式，放置于`./data`文件夹下，文件命名和`eval.py`中的`subject_name`相关
 - 相较于C-Eval的数据集，代码添加了'qa'的数据集，放置于`./data/qa`文件夹下，为非选择题的问答数据集。
 
+## Create Environment
+
+```bash
+conda create -n llm python=3.10
+conda activate llm
+conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia
+pip install transformers==4.33.2 accelerate==0.26.1 tqdm==4.66.1 openai==0.28 peft==0.10.0 google-generativeai pandas sentencepiece==0.2.0 rouge_chinese==1.0.3 jieba==0.42.1
+```
+
 ## Run
 
 运行模型评估程序：
diff --git a/evaluators/chatglm_mixin.py b/evaluators/chatglm_mixin.py
index 631f8cd..c1858b8 100644
--- a/evaluators/chatglm_mixin.py
+++ b/evaluators/chatglm_mixin.py
@@ -7,6 +7,7 @@ from transformers.generation.logits_process import LogitsProcessor
 from transformers.generation.utils import LogitsProcessorList
 from evaluators.evaluator import Evaluator
 
+
 class ChatGLMMixin:
     def __init__(self):
         self.tokenizer = None
@@ -28,14 +29,15 @@ class ChatGLMMixin:
         answers = list(test_df['answer'])
         for row_index, row in tqdm(test_df.iterrows(), total=len(test_df)):
             question = self.format_example(row, include_answer=False, cot=cot)
+            history_tmp = history.copy()
             if few_shot:
                 response, _ = self.model.chat(self.tokenizer, question, max_length=2000,
-                                              do_sample=False, history=history)
+                                              do_sample=False, history=history_tmp)
                 response = response.strip()
                 ans, direct_extract = self.extract_cot_answer(row, response)
             else:  # zero-shot by extracting answer from distribution
                 response, _ = self.model.chat(self.tokenizer, question, max_length=2000,
-                                              do_sample=False, history=history)
+                                              do_sample=False, history=history_tmp)
                 response = response.strip()
                 ans, direct_extract = self.extract_cot_answer(row, response)
             if ans == answers[row_index]:
@@ -64,8 +66,9 @@ class ChatGLMMixin:
         history = self.generate_zero_shot_prompt(is_choice_question=False)
         for row_index, row in tqdm(qa_df.iterrows(), total=len(qa_df)):
             question = row['question']
+            history_tmp = history.copy()
             response, _ = self.model.chat(self.tokenizer, question, max_length=2000,
-                                          do_sample=False, history=history)
+                                          do_sample=False, history=history_tmp)
             response = response.strip()
             qa_df.loc[row_index, 'model_output'] = response
             # current_length = 0