You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
51 lines
1.6 KiB
Python
51 lines
1.6 KiB
Python
import re
|
|
import string
|
|
class Evaluator:
|
|
def __init__(self, choices, model_name, k=-1):
|
|
self.choices = choices
|
|
self.model_name = model_name
|
|
self.k = k
|
|
self.puncs = list(string.punctuation)
|
|
|
|
def format_example(self, line, include_answer=True):
|
|
example = line['question']
|
|
# print(example)
|
|
for choice in self.choices:
|
|
example += f'\n{choice}. {line[f"{choice}"]}'
|
|
example += '\n答案:'
|
|
if include_answer:
|
|
example += f'{line["answer"]}\n\n'
|
|
return example
|
|
|
|
def generate_few_shot_prompt(self, subject, dev_df):
|
|
prompt = f"以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n"
|
|
k = self.k
|
|
if self.k == -1:
|
|
k = dev_df.shape[0]
|
|
for i in range(k):
|
|
prompt += self.format_example(dev_df.iloc[i, :])
|
|
return prompt
|
|
|
|
def eval_subject(self, subject_name, test_df, dev_df=None, few_shot=False, save_result_dir=None):
|
|
pass
|
|
|
|
def eval_qa(self, subject_name, qa_df, save_result_dir=None):
|
|
pass
|
|
|
|
def normalize_answer(self,s):
|
|
|
|
def white_space_fix(text):
|
|
return ' '.join(text.split())
|
|
|
|
def remove_punc(text):
|
|
exclude=set(self.puncs)
|
|
return ''.join(ch for ch in text if ch not in exclude)
|
|
|
|
def lower(text):
|
|
return text.lower()
|
|
|
|
return white_space_fix(remove_punc(lower(s)))
|
|
|
|
def exact_match(self,pred, target):
|
|
return self.normalize_answer(pred)==self.normalize_answer(target)
|