import openai
openai.api_key = "YOUR_API_KEY"
turbo = dspy.OpenAI(model='gpt-3.5-turbo')
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)
from dspy.datasets import HotPotQA
# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)
# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset_q = [x.with_inputs('question') for x in dataset.train]
devset_q = [x.with_inputs('question') for x in dataset.dev]
print(len(trainset_q), len(devset_q))
classBasicQA(dspy.Signature):
"""Answer questions with short factoid answers."""
question = dspy.InputField()
answer = dspy.OutputField(desc="often between 1 and 5 words")
generate_answer = dspy.Predict(BasicQA)
# Call the predictor on a particular input.
pred = generate_answer(question=dev_example.question)
# Print the input and the prediction.print(f"Question: {dev_example.question}")
print(f"Predicted Answer: {pred.answer}")
print(f"label Answer: {dev_example.answer}")
# 查看直接的推理历史记录
turbo.inspect_history(n=1)
使用 CoT 思维链
加入 CoT 思维链使得模型重新回答,并输出思考过程。
# Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged.
generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA)
# Call the predictor on the same input.
pred = generate_answer_with_chain_of_thought(question=dev_example.question)
# Print the input, the chain of thought, and the prediction.print(f"Question: {dev_example.question}")
print(f"Thought: {pred.rationale}")
print(f"Predicted Answer: {pred.answer}")
使用检索模型
检索模型就是根据向量匹配度从之前定义好的数据库中检索到背景知识,用于后续和 prompt 一起送进大模型。使用检索器非常简单。模块 dspy.Retrieve(k) 将搜索与给定查询最匹配的前 k 个段落。
from dspy.teleprompt import BootstrapFewShot
# Validation logic: check that the predicted answer is correct.# Also check that the retrieved context does actually contain that answer.defvalidate_context_and_answer(example, pred, trace=None):
answer_EM = dspy.evaluate.answer_exact_match(example, pred)
answer_PM = dspy.evaluate.answer_passage_match(example, pred)
return answer_EM and answer_PM
# Set up a basic teleprompter, which will compile our RAG program.
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)
# Compile!
compiled_rag = teleprompter.compile(RAG(), trainset=trainset_q)
现在可以通过一些提问来进行验证:
# Ask any question you like to this simple RAG program.
my_question = "What castle did David Gregory inherit?"# Get the prediction. This contains `pred.context` and `pred.answer`.
pred = compiled_rag(my_question)
# Print the contexts and the answer.print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...'for c in pred.context]}")
也可以通过 turbo.inspect_history(n=1) 来看到底模型中发生了什么内容。
评估
首先,让我们评估预测答案的准确性(精确匹配)。用于后续结果的进一步优化。
from dspy.evaluate.evaluate import Evaluate
# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)
# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(compiled_rag, metric=metric)