Fix Dockerfile build issue

This commit is contained in:
kyy
2025-03-18 16:41:12 +09:00
parent 6814230bfb
commit 9323aa254a
228 changed files with 467 additions and 3488 deletions

View File

@@ -0,0 +1,6 @@
from .base import make_single_content_qa, make_qa_with_existing_qa
from .llama_index import (
generate_qa_llama_index,
generate_answers,
generate_qa_llama_index_by_ratio,
)

View File

@@ -0,0 +1,239 @@
import logging
import uuid
from typing import Callable, Optional, List
import chromadb
import numpy as np
import pandas as pd
from tqdm import tqdm
import autorag
from autorag.nodes.retrieval.vectordb import vectordb_ingest, vectordb_pure
from autorag.utils.util import (
save_parquet_safe,
fetch_contents,
get_event_loop,
process_batch,
)
logger = logging.getLogger("AutoRAG")
def make_single_content_qa(
corpus_df: pd.DataFrame,
content_size: int,
qa_creation_func: Callable,
output_filepath: Optional[str] = None,
upsert: bool = False,
random_state: int = 42,
cache_batch: int = 32,
**kwargs,
) -> pd.DataFrame:
"""
Make single content (single-hop, single-document) QA dataset using given qa_creation_func.
It generates a single content QA dataset, which means its retrieval ground truth will be only one.
It is the most basic form of QA dataset.
:param corpus_df: The corpus dataframe to make QA dataset from.
:param content_size: This function will generate QA dataset for the given number of contents.
:param qa_creation_func: The function to create QA pairs.
You can use like `generate_qa_llama_index` or `generate_qa_llama_index_by_ratio`.
The input func must have `contents` parameter for the list of content string.
:param output_filepath: Optional filepath to save the parquet file.
If None, the function will return the processed_data as pd.DataFrame, but do not save as parquet.
File directory must exist. File extension must be .parquet
:param upsert: If true, the function will overwrite the existing file if it exists.
Default is False.
:param random_state: The random state for sampling corpus from the given corpus_df.
:param cache_batch: The number of batches to use for caching the generated QA dataset.
When the cache_batch size data is generated, the dataset will save to the designated output_filepath.
If the cache_batch size is too small, the process time will be longer.
:param kwargs: The keyword arguments for qa_creation_func.
:return: QA dataset dataframe.
You can save this as parquet file to use at AutoRAG.
"""
assert content_size > 0, "content_size must be greater than 0."
if content_size > len(corpus_df):
logger.warning(
f"content_size {content_size} is larger than the corpus size {len(corpus_df)}. "
"Setting content_size to the corpus size."
)
content_size = len(corpus_df)
sampled_corpus = corpus_df.sample(n=content_size, random_state=random_state)
sampled_corpus = sampled_corpus.reset_index(drop=True)
def make_query_generation_gt(row):
return row["qa"]["query"], row["qa"]["generation_gt"]
qa_data = pd.DataFrame()
for idx, i in tqdm(enumerate(range(0, len(sampled_corpus), cache_batch))):
qa = qa_creation_func(
contents=sampled_corpus["contents"].tolist()[i : i + cache_batch], **kwargs
)
temp_qa_data = pd.DataFrame(
{
"qa": qa,
"retrieval_gt": sampled_corpus["doc_id"].tolist()[i : i + cache_batch],
}
)
temp_qa_data = temp_qa_data.explode("qa", ignore_index=True)
temp_qa_data["qid"] = [str(uuid.uuid4()) for _ in range(len(temp_qa_data))]
temp_qa_data[["query", "generation_gt"]] = temp_qa_data.apply(
make_query_generation_gt, axis=1, result_type="expand"
)
temp_qa_data = temp_qa_data.drop(columns=["qa"])
temp_qa_data["retrieval_gt"] = temp_qa_data["retrieval_gt"].apply(
lambda x: [[x]]
)
temp_qa_data["generation_gt"] = temp_qa_data["generation_gt"].apply(
lambda x: [x]
)
if idx == 0:
qa_data = temp_qa_data
else:
qa_data = pd.concat([qa_data, temp_qa_data], ignore_index=True)
if output_filepath is not None:
save_parquet_safe(qa_data, output_filepath, upsert=upsert)
return qa_data
def make_qa_with_existing_qa(
corpus_df: pd.DataFrame,
existing_query_df: pd.DataFrame,
content_size: int,
answer_creation_func: Optional[Callable] = None,
exist_gen_gt: Optional[bool] = False,
output_filepath: Optional[str] = None,
embedding_model: str = "openai_embed_3_large",
collection: Optional[chromadb.Collection] = None,
upsert: bool = False,
random_state: int = 42,
cache_batch: int = 32,
top_k: int = 3,
**kwargs,
) -> pd.DataFrame:
"""
Make single-hop QA dataset using given qa_creation_func and existing queries.
:param corpus_df: The corpus dataframe to make QA dataset from.
:param existing_query_df: Dataframe containing existing queries to use for QA pair creation.
:param content_size: This function will generate QA dataset for the given number of contents.
:param answer_creation_func: Optional function to create answer with input query.
If exist_gen_gt is False, this function must be given.
:param exist_gen_gt: Optional boolean to use existing generation_gt.
If True, the existing_query_df must have 'generation_gt' column.
If False, the answer_creation_func must be given.
:param output_filepath: Optional filepath to save the parquet file.
:param embedding_model: The embedding model to use for vectorization.
You can add your own embedding model in the autorag.embedding_models.
Please refer to how to add an embedding model in this doc: https://docs.auto-rag.com/local_model.html
The default is 'openai_embed_3_large'.
:param collection: The chromadb collection to use for vector DB.
You can make any chromadb collection and use it here.
If you already ingested the corpus_df to the collection, the embedding process will not be repeated.
The default is None. If None, it makes a temporary collection.
:param upsert: If true, the function will overwrite the existing file if it exists.
:param random_state: The random state for sampling corpus from the given corpus_df.
:param cache_batch: The number of batches to use for caching the generated QA dataset.
:param top_k: The number of sources to refer by model.
Default is 3.
:param kwargs: The keyword arguments for qa_creation_func.
:return: QA dataset dataframe.
"""
raise DeprecationWarning("This function is deprecated.")
assert (
"query" in existing_query_df.columns
), "existing_query_df must have 'query' column."
if exist_gen_gt:
assert (
"generation_gt" in existing_query_df.columns
), "existing_query_df must have 'generation_gt' column."
else:
assert (
answer_creation_func is not None
), "answer_creation_func must be given when exist_gen_gt is False."
assert content_size > 0, "content_size must be greater than 0."
if content_size > len(corpus_df):
logger.warning(
f"content_size {content_size} is larger than the corpus size {len(corpus_df)}. "
"Setting content_size to the corpus size."
)
content_size = len(corpus_df)
logger.info("Loading local embedding model...")
embeddings = autorag.embedding_models[embedding_model]()
# Vector DB creation
if collection is None:
chroma_client = chromadb.Client()
collection_name = "auto-rag"
collection = chroma_client.get_or_create_collection(collection_name)
# embed corpus_df
vectordb_ingest(collection, corpus_df, embeddings)
query_embeddings = embeddings.get_text_embedding_batch(
existing_query_df["query"].tolist()
)
loop = get_event_loop()
tasks = [
vectordb_pure([query_embedding], top_k, collection)
for query_embedding in query_embeddings
]
results = loop.run_until_complete(process_batch(tasks, batch_size=cache_batch))
retrieved_ids = list(map(lambda x: x[0], results))
retrieved_contents: List[List[str]] = fetch_contents(corpus_df, retrieved_ids)
input_passage_strs: List[str] = list(
map(
lambda x: "\n".join(
[f"Document {i + 1}\n{content}" for i, content in enumerate(x)]
),
retrieved_contents,
)
)
retrieved_qa_df = pd.DataFrame(
{
"qid": [str(uuid.uuid4()) for _ in range(len(existing_query_df))],
"query": existing_query_df["query"].tolist(),
"retrieval_gt": list(map(lambda x: [x], retrieved_ids)),
"input_passage_str": input_passage_strs,
}
)
if exist_gen_gt:
generation_gt = existing_query_df["generation_gt"].tolist()
if isinstance(generation_gt[0], np.ndarray):
retrieved_qa_df["generation_gt"] = generation_gt
else:
raise ValueError(
"In existing_query_df, generation_gt (per query) must be in the form of List[str]."
)
sample_qa_df = retrieved_qa_df.sample(
n=min(content_size, len(retrieved_qa_df)), random_state=random_state
)
qa_df = sample_qa_df.copy(deep=True)
qa_df.drop(columns=["input_passage_str"], inplace=True)
if not exist_gen_gt:
generation_gt = answer_creation_func(
contents=sample_qa_df["input_passage_str"].tolist(),
queries=sample_qa_df["query"].tolist(),
batch=cache_batch,
**kwargs,
)
qa_df["generation_gt"] = generation_gt
if output_filepath is not None:
save_parquet_safe(qa_df, output_filepath, upsert=upsert)
return qa_df

View File

@@ -0,0 +1,253 @@
import os.path
import random
from typing import Optional, List, Dict, Any
import pandas as pd
from llama_index.core.base.llms.types import ChatMessage, MessageRole
from llama_index.core.llms import LLM
from autorag.utils.util import process_batch, get_event_loop
package_dir = os.path.dirname(os.path.realpath(__file__))
def generate_qa_llama_index(
llm: LLM,
contents: List[str],
prompt: Optional[str] = None,
question_num_per_content: int = 1,
max_retries: int = 3,
batch: int = 4,
) -> List[List[Dict]]:
"""
Generate a qa set from the list of contents.
It uses a single prompt for all contents.
If you want to use more than one prompt for generating qa,
you can consider using generate_qa_llama_index_by_ratio.
:param llm: Llama index model
:param contents: List of content strings.
:param prompt: The prompt to use for the qa generation.
The prompt must include the following placeholders:
- {{text}}: The content string
- {{num_questions}}: The number of questions to generate
As default, the prompt is set to the default prompt for the question type.
:param question_num_per_content: Number of questions to generate for each content.
Default is 1.
:param max_retries: The maximum number of retries when generated question number is not equal to the target number.
Default is 3.
:param batch: The batch size to process asynchronously.
Default is 4.
:return: 2-d list of dictionaries containing the query and generation_gt.
"""
# load default prompt
if prompt is None:
prompt = open(
os.path.join(package_dir, "llama_index_default_prompt.txt"), "r"
).read()
tasks = [
async_qa_gen_llama_index(
content, llm, prompt, question_num_per_content, max_retries
)
for content in contents
]
loops = get_event_loop()
results = loops.run_until_complete(process_batch(tasks, batch))
return results
def generate_answers(
llm: LLM,
contents: List[str],
queries: List[str],
batch: int = 4,
) -> List[List[Dict]]:
"""
Generate qa sets from the list of contents using existing queries.
:param llm: Llama index model
:param contents: List of content strings.
:param queries: List of existing queries.
:param batch: The batch size to process asynchronously.
:return: 2-d list of dictionaries containing the query and generation_gt.
"""
tasks = [
generate_basic_answer(llm, content, query)
for content, query in zip(contents, queries)
]
loops = get_event_loop()
results = loops.run_until_complete(process_batch(tasks, batch))
return results
def generate_qa_llama_index_by_ratio(
llm: LLM,
contents: List[str],
prompts_ratio: Dict,
question_num_per_content: int = 1,
max_retries: int = 3,
random_state: int = 42,
batch: int = 4,
) -> List[List[Dict]]:
"""
Generate a qa set from the list of contents.
You can set the ratio of prompts that you want to use for generating qa.
It distributes the number of questions to generate for each content by the ratio randomly.
:param llm: Llama index model
:param contents: List of content strings.
:param prompts_ratio: Dictionary of prompt paths and their ratios.
Example: {"prompt/prompt1.txt": 0.5, "prompt/prompt2.txt": 0.5}
The value sum doesn't have to be 1.
The path must be the absolute path, and the file must exist.
Plus, it has to be a text file which contains proper prompt.
Each prompt must contain the following placeholders:
- {{text}}: The content string
- {{num_questions}}: The number of questions to generate
:param question_num_per_content: Number of questions to generate for each content.
Default is 1.
:param max_retries: The maximum number of retries when generated question number is not equal to the target number.
Default is 3.
:param random_state: Random seed
Default is 42.
:param batch: The batch size to process asynchronously.
Default is 4.
:return: 2-d list of dictionaries containing the query and generation_gt.
"""
prompts = list(map(lambda path: open(path, "r").read(), prompts_ratio.keys()))
assert all([validate_llama_index_prompt(prompt) for prompt in prompts])
content_indices = list(range(len(contents)))
random.seed(random_state)
random.shuffle(content_indices)
slice_content_indices: List[List[str]] = distribute_list_by_ratio(
content_indices, list(prompts_ratio.values())
)
temp_df = pd.DataFrame({"idx": slice_content_indices, "prompt": prompts})
temp_df = temp_df.explode("idx", ignore_index=True)
temp_df = temp_df.sort_values(by="idx", ascending=True)
final_df = pd.DataFrame({"content": contents, "prompt": temp_df["prompt"].tolist()})
tasks = [
async_qa_gen_llama_index(
content, llm, prompt, question_num_per_content, max_retries
)
for content, prompt in zip(
final_df["content"].tolist(), final_df["prompt"].tolist()
)
]
loops = get_event_loop()
results = loops.run_until_complete(process_batch(tasks, batch))
return results
async def async_qa_gen_llama_index(
content: str,
llm: LLM,
prompt: str,
question_num: int = 1,
max_retries: int = 3,
):
"""
Generate a qa set by using the given content and the llama index model.
You must select the question type.
:param content: Content string
:param llm: Llama index model
:param prompt: The prompt to use for the qa generation.
The prompt must include the following placeholders:
- {{text}}: The content string
- {{num_questions}}: The number of questions to generate
:param question_num: The number of questions to generate
:param max_retries: Maximum number of retries when generated question number is not equal to the target number
:return: List of dictionaries containing the query and generation_gt
"""
validate_llama_index_prompt(prompt)
async def generate(content: str, llm: LLM):
for _ in range(max_retries):
output = await llm.acomplete(
prompt.replace("{{text}}", content).replace(
"{{num_questions}}", str(question_num)
)
)
result = parse_output(output.text)
if len(result) == question_num:
return result
raise InterruptedError(
f"Failed to generate output of length {question_num} after {max_retries} retries."
)
return await generate(content, llm)
async def generate_basic_answer(llm: LLM, passage_str: str, query: str) -> str:
basic_answer_system_prompt = """You are an AI assistant to answer the given question in the provide evidence text.
You can find the evidence from the given text about question, and you have to write a proper answer to the given question.
You have to preserve the question's language at the answer.
For example, if the input question is Korean, the output answer must be in Korean.
"""
user_prompt = f"Text:\n<|text_start|>\n{passage_str}\n<|text_end|>\n\nQuestion:\n{query}\n\nAnswer:"
response = await llm.achat(
messages=[
ChatMessage(role=MessageRole.SYSTEM, content=basic_answer_system_prompt),
ChatMessage(role=MessageRole.USER, content=user_prompt),
],
temperature=1.0,
)
return response.message.content
def validate_llama_index_prompt(prompt: str) -> bool:
"""
Validate the prompt for the llama index model.
The prompt must include the following placeholders:
- {{text}}: The content string
- {{num_questions}}: The number of questions to generate
"""
if "{{text}}" not in prompt:
raise ValueError("The prompt must include the placeholder {{text}}.")
if "{{num_questions}}" not in prompt:
raise ValueError("The prompt must include the placeholder {{num_questions}}.")
return True
def parse_output(result: str) -> List[Dict]:
result = result.strip()
result = result.split("[Q]:")
final_result = list()
for res in result:
res = res.strip()
if res and "\n[A]:" in res:
qa = res.split("\n[A]:")
final_result.append(
{"query": qa[0].strip(), "generation_gt": qa[1].strip()}
)
return final_result
def distribute_list_by_ratio(input_list, ratio) -> List[List[Any]]:
total_ratio = sum(ratio)
total_length = len(input_list)
# Calculate the length of each slice
slice_lengths = [int((r / total_ratio) * total_length) for r in ratio]
# Adjust the last slice in case of rounding issues
slice_lengths[-1] = total_length - sum(slice_lengths[:-1])
slices = []
start = 0
for length in slice_lengths:
end = start + length
slices.append(input_list[start:end])
start = end
return slices

View File

@@ -0,0 +1,54 @@
You're an AI tasked to convert Text into a question and answer set.
Cover as many details from Text as possible in the QnA set.
Instructions:
1. Both Questions and Answers MUST BE extracted from given Text
2. Answers must be full sentences
3. Questions should be as detailed as possible from Text
4. Output must always have the provided number of QnAs
5. Create questions that ask about information from the Text
6. MUST include specific keywords from the Text.
7. Do not mention any of these in the questions: "in the given text", "in the provided information", etc.
Question examples:
1. How do owen and riggs know each other?
2. What does the word fore "mean" in golf?
3. What makes charging bull in nyc popular to tourists?
4. What kind of pistol does the army use?
5. Who was the greatest violin virtuoso in the romantic period?
<|separator|>
Text:
<|text_start|>
Mark Hamill as Luke Skywalker : One of the last living Jedi , trained by Obi - Wan and Yoda , who is also a skilled X-wing fighter pilot allied with the Rebellion .
Harrison Ford as Han Solo : A rogue smuggler , who aids the Rebellion against the Empire . Han is Luke and Leia 's friend , as well as Leia 's love interest .
Carrie Fisher as Leia Organa : The former Princess of the destroyed planet Alderaan , who joins the Rebellion ; Luke 's twin sister , and Han 's love interest .
Billy Dee Williams as Lando Calrissian : The former Baron Administrator of Cloud City and one of Han 's friends who aids the Rebellion .
Anthony Daniels as C - 3PO : A humanoid protocol droid , who sides with the Rebellion .
Peter Mayhew as Chewbacca : A Wookiee who is Han 's longtime friend , who takes part in the Rebellion .
Kenny Baker as R2 - D2 : An astromech droid , bought by Luke ; and long - time friend to C - 3PO . He also portrays a GONK power droid in the background .
Ian McDiarmid as the Emperor : The evil founding supreme ruler of the Galactic Empire , and Vader 's Sith Master .
Frank Oz as Yoda : The wise , centuries - old Grand Master of the Jedi , who is Luke 's self - exiled Jedi Master living on Dagobah . After dying , he reappears to Luke as a Force - ghost . Yoda 's Puppetry was assisted by Mike Quinn .
David Prowse as Darth Vader / Anakin Skywalker : A powerful Sith lord and the second in command of the Galactic Empire ; Luke and Leia 's father .
<|text_end|>
Output with 4 QnAs:
<|separator|>
[Q]: who played luke father in return of the jedi
[A]: David Prowse acted as Darth Vader, a.k.a Anakin Skywalker, which is Luke and Leia's father.
[Q]: Who is Han Solo's best friend? And what species is he?
[A]: Han Solo's best friend is Chewbacca, who is a Wookiee.
[Q]: Who played luke's teacher in the return of the jedi
[A]: Yoda, the wise, centuries-old Grand Master of the Jedi, who is Luke's self-exiled Jedi Master living on Dagobah, was played by Frank Oz.
Also, there is a mention of Obi-Wan Kenobi, who trained Luke Skywalker.
But I can't find who played Obi-Wan Kenobi in the given text.
[Q]: Where Yoda lives in the return of the jedi?
[A]: Yoda, the Jedi Master, lives on Dagobah.
<|separator|>
Text:
<|text_start|>
{{text}}
<|text_end|>
Output with {{num_questions}} QnAs:
<|separator|>

View File

@@ -0,0 +1,75 @@
import uuid
from typing import Optional
import pandas as pd
from langchain_core.embeddings import Embeddings
from langchain_core.language_models import BaseChatModel
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from autorag.data.utils.util import corpus_df_to_langchain_documents
from autorag.utils import cast_qa_dataset
def generate_qa_ragas(
corpus_df: pd.DataFrame,
test_size: int,
distributions: Optional[dict] = None,
generator_llm: Optional[BaseChatModel] = None,
critic_llm: Optional[BaseChatModel] = None,
embedding_model: Optional[Embeddings] = None,
**kwargs,
) -> pd.DataFrame:
"""
QA dataset generation using RAGAS.
Returns qa dataset dataframe.
:param corpus_df: Corpus dataframe.
:param test_size: Number of queries to generate.
:param distributions: Distributions of different types of questions.
Default is "simple is 0.5, multi_context is 0.4, and reasoning is 0.1."
Each type of questions refers to Ragas evolution types.
:param generator_llm: Generator language model from Langchain.
:param critic_llm: Critic language model from Langchain.
:param embedding_model: Embedding model from Langchain.
:param kwargs: The additional option to pass to the 'generate_with_langchain_docs' method.
You can input 'with_debugging_logs', 'is_async', 'raise_exceptions', and 'run_config'.
:return: QA dataset dataframe.
"""
from ragas.testset import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
if generator_llm is None:
generator_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")
if critic_llm is None:
critic_llm = ChatOpenAI(model="gpt-4-turbo")
if embedding_model is None:
embedding_model = OpenAIEmbeddings()
if distributions is None:
distributions = {simple: 0.5, multi_context: 0.4, reasoning: 0.1}
assert sum(list(distributions.values())) == 1.0, "Sum of distributions must be 1.0"
generator = TestsetGenerator.from_langchain(
generator_llm, critic_llm, embedding_model
)
langchain_docs = corpus_df_to_langchain_documents(corpus_df)
test_df = generator.generate_with_langchain_docs(
langchain_docs, test_size, distributions=distributions, **kwargs
).to_pandas()
result_df = pd.DataFrame(
{
"qid": [str(uuid.uuid4()) for _ in range(len(test_df))],
"query": test_df["question"].tolist(),
"generation_gt": list(map(lambda x: x, test_df["ground_truth"].tolist())),
}
)
result_df["retrieval_gt"] = test_df["metadata"].apply(
lambda x: list(map(lambda y: y["filename"], x))
)
result_df = cast_qa_dataset(result_df)
return result_df

View File

@@ -0,0 +1,99 @@
import os
import pathlib
import uuid
from typing import Callable
import pandas as pd
def generate_qa_row(llm, corpus_data_row):
"""
this sample code to generate rag dataset using OpenAI chat model
:param llm: guidance model
:param corpus_data_row: need "contents" column
:return: should to be dict which has "query", "generation_gt" columns at least.
"""
from guidance import gen
import guidance
temp_llm = llm
with guidance.user():
temp_llm += f"""
You have to found a passge to solve "the problem".
You need to build a clean and clear set of (problem, passage, answer) in json format
so that you don't have to ask about "the problem" again.
problem need to end with question mark("?").
The process of approaching the answer based on the information of the given passage
must be clearly and neatly displayed in the answer.\n
\n
Here is set of (problem, passage, answer) in JSON format:\n
{{\n
"passage": {corpus_data_row["contents"]}\n
"problem":
"""
with guidance.assistant():
temp_llm += gen("query", stop="?")
with guidance.user():
temp_llm += """
"answer":
"""
with guidance.assistant():
temp_llm += gen("generation_gt")
corpus_data_row["metadata"]["qa_generation"] = "simple"
response = {"query": temp_llm["query"], "generation_gt": temp_llm["generation_gt"]}
return response
def generate_simple_qa_dataset(
llm,
corpus_data: pd.DataFrame,
output_filepath: str,
generate_row_function: Callable,
**kwargs,
):
"""
corpus_data to qa_dataset
qa_dataset will be saved to filepath(file_dir/filename)
:param llm: guidance.models.Model
:param corpus_data: pd.DataFrame. refer to the basic structure
:param output_filepath: file_dir must exist, filepath must not exist. file extension must be .parquet
:param generate_row_function: input(llm, corpus_data_row, kwargs) output(dict[columns contain "query" and "generation_gt"])
:param kwargs: if generate_row_function requires more args, use kwargs
:return: qa_dataset as pd.DataFrame
"""
output_file_dir = pathlib.PurePath(output_filepath).parent
if not os.path.isdir(output_file_dir):
raise NotADirectoryError(f"directory {output_file_dir} not found.")
if not output_filepath.endswith("parquet"):
raise NameError(
f'file path: {output_filepath} filename extension need to be ".parquet"'
)
if os.path.exists(output_filepath):
raise FileExistsError(
f"{output_filepath.split('/')[-1]} already exists in {output_file_dir}."
)
qa_data_lst = []
for _, corpus_data_row in corpus_data.iterrows():
response = generate_row_function(
llm=llm, corpus_data_row=corpus_data_row, **kwargs
)
qa_data_lst.append(
{
"qid": str(uuid.uuid4()),
"query": response["query"],
"retrieval_gt": [[corpus_data_row["doc_id"]]],
"generation_gt": [response["generation_gt"]],
"metadata": corpus_data_row["metadata"],
}
)
qa_dataset = pd.DataFrame(qa_data_lst)
qa_dataset.to_parquet(output_filepath, index=False)
return qa_dataset