Fix Dockerfile build issue
This commit is contained in:
0
autorag/data/qa/generation_gt/__init__.py
Normal file
0
autorag/data/qa/generation_gt/__init__.py
Normal file
16
autorag/data/qa/generation_gt/base.py
Normal file
16
autorag/data/qa/generation_gt/base.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from typing import Dict
|
||||
|
||||
|
||||
def add_gen_gt(row: Dict, new_gen_gt: str) -> Dict:
|
||||
if "generation_gt" in list(row.keys()):
|
||||
if isinstance(row["generation_gt"], list):
|
||||
row["generation_gt"].append(new_gen_gt)
|
||||
elif isinstance(row["generation_gt"], str):
|
||||
row["generation_gt"] = [row["generation_gt"], new_gen_gt]
|
||||
else:
|
||||
raise ValueError(
|
||||
"generation_gt should be either a string or a list of strings."
|
||||
)
|
||||
return row
|
||||
row["generation_gt"] = [new_gen_gt]
|
||||
return row
|
||||
41
autorag/data/qa/generation_gt/llama_index_gen_gt.py
Normal file
41
autorag/data/qa/generation_gt/llama_index_gen_gt.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import itertools
|
||||
from typing import Dict
|
||||
|
||||
|
||||
from llama_index.core.base.llms.base import BaseLLM
|
||||
from llama_index.core.base.llms.types import MessageRole, ChatMessage
|
||||
|
||||
from autorag.data.qa.generation_gt.base import add_gen_gt
|
||||
from autorag.data.qa.generation_gt.prompt import GEN_GT_SYSTEM_PROMPT
|
||||
|
||||
|
||||
async def make_gen_gt_llama_index(row: Dict, llm: BaseLLM, system_prompt: str) -> Dict:
|
||||
retrieval_gt_contents = list(
|
||||
itertools.chain.from_iterable(row["retrieval_gt_contents"])
|
||||
)
|
||||
query = row["query"]
|
||||
passage_str = "\n".join(retrieval_gt_contents)
|
||||
user_prompt = f"Text:\n<|text_start|>\n{passage_str}\n<|text_end|>\n\nQuestion:\n{query}\n\nAnswer:"
|
||||
|
||||
response = await llm.achat(
|
||||
messages=[
|
||||
ChatMessage(role=MessageRole.SYSTEM, content=system_prompt),
|
||||
ChatMessage(role=MessageRole.USER, content=user_prompt),
|
||||
],
|
||||
temperature=0.0,
|
||||
)
|
||||
return add_gen_gt(row, response.message.content)
|
||||
|
||||
|
||||
async def make_concise_gen_gt(row: Dict, llm: BaseLLM, lang: str = "en") -> Dict:
|
||||
return await make_gen_gt_llama_index(
|
||||
row, llm, GEN_GT_SYSTEM_PROMPT["concise"][lang]
|
||||
)
|
||||
|
||||
|
||||
async def make_basic_gen_gt(row: Dict, llm: BaseLLM, lang: str = "en") -> Dict:
|
||||
return await make_gen_gt_llama_index(row, llm, GEN_GT_SYSTEM_PROMPT["basic"][lang])
|
||||
|
||||
|
||||
async def make_custom_gen_gt(row: Dict, llm: BaseLLM, system_prompt: str) -> Dict:
|
||||
return await make_gen_gt_llama_index(row, llm, system_prompt)
|
||||
84
autorag/data/qa/generation_gt/openai_gen_gt.py
Normal file
84
autorag/data/qa/generation_gt/openai_gen_gt.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import itertools
|
||||
from typing import Dict
|
||||
|
||||
from openai import AsyncClient
|
||||
from pydantic import BaseModel
|
||||
|
||||
from autorag.data.qa.generation_gt.base import add_gen_gt
|
||||
from autorag.data.qa.generation_gt.prompt import GEN_GT_SYSTEM_PROMPT
|
||||
|
||||
|
||||
class Response(BaseModel):
|
||||
answer: str
|
||||
|
||||
|
||||
async def make_gen_gt_openai(
|
||||
row: Dict,
|
||||
client: AsyncClient,
|
||||
system_prompt: str,
|
||||
model_name: str = "gpt-4o-2024-08-06",
|
||||
):
|
||||
retrieval_gt_contents = list(
|
||||
itertools.chain.from_iterable(row["retrieval_gt_contents"])
|
||||
)
|
||||
query = row["query"]
|
||||
passage_str = "\n".join(retrieval_gt_contents)
|
||||
user_prompt = f"Text:\n<|text_start|>\n{passage_str}\n<|text_end|>\n\nQuestion:\n{query}\n\nAnswer:"
|
||||
|
||||
completion = await client.beta.chat.completions.parse(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
temperature=0.0,
|
||||
response_format=Response,
|
||||
)
|
||||
response: Response = completion.choices[0].message.parsed
|
||||
return add_gen_gt(row, response.answer)
|
||||
|
||||
|
||||
async def make_concise_gen_gt(
|
||||
row: Dict,
|
||||
client: AsyncClient,
|
||||
model_name: str = "gpt-4o-2024-08-06",
|
||||
lang: str = "en",
|
||||
):
|
||||
"""
|
||||
Generate concise generation_gt using OpenAI Structured Output for preventing errors.
|
||||
It generates a concise answer, so it is generally a word or just a phrase.
|
||||
|
||||
:param row: The input row of the qa dataframe.
|
||||
:param client: The OpenAI async client.
|
||||
:param model_name: The model name that supports structured output.
|
||||
It has to be "gpt-4o-2024-08-06" or "gpt-4o-mini-2024-07-18".
|
||||
:param lang: The language code of the prompt.
|
||||
Default is "en".
|
||||
:return: The output row of the qa dataframe with added "generation_gt" in it.
|
||||
"""
|
||||
return await make_gen_gt_openai(
|
||||
row, client, GEN_GT_SYSTEM_PROMPT["concise"][lang], model_name
|
||||
)
|
||||
|
||||
|
||||
async def make_basic_gen_gt(
|
||||
row: Dict,
|
||||
client: AsyncClient,
|
||||
model_name: str = "gpt-4o-2024-08-06",
|
||||
lang: str = "en",
|
||||
):
|
||||
"""
|
||||
Generate basic generation_gt using OpenAI Structured Output for preventing errors.
|
||||
It generates a "basic" answer, and its prompt is simple.
|
||||
|
||||
:param row: The input row of the qa dataframe.
|
||||
:param client: The OpenAI async client.
|
||||
:param model_name: The model name that supports structured output.
|
||||
It has to be "gpt-4o-2024-08-06" or "gpt-4o-mini-2024-07-18".
|
||||
:param lang: The language code of the prompt.
|
||||
Default is "en".
|
||||
:return: The output row of the qa dataframe with added "generation_gt" in it.
|
||||
"""
|
||||
return await make_gen_gt_openai(
|
||||
row, client, GEN_GT_SYSTEM_PROMPT["basic"][lang], model_name
|
||||
)
|
||||
27
autorag/data/qa/generation_gt/prompt.py
Normal file
27
autorag/data/qa/generation_gt/prompt.py
Normal file
@@ -0,0 +1,27 @@
|
||||
GEN_GT_SYSTEM_PROMPT = {
|
||||
"concise": {
|
||||
"en": """You are an AI assistant to answer the given question in the provide evidence text.
|
||||
You can find the evidence from the given text about question, and you have to write a proper answer to the given question.
|
||||
Your answer have to be concise and relevant to the question.
|
||||
Do not make a verbose answer and make it super clear.
|
||||
It doesn't have to be an full sentence. It can be the answer is a word or a paraphrase.""",
|
||||
"ko": """당신은 주어진 질문에 대해 제공된 Text 내에서 답을 찾는 AI 비서입니다.
|
||||
질문에 대한 답을 Text에서 찾아 적절한 답변을 작성하세요.
|
||||
답변은 간결하고 질문에 관련된 내용만 포함해야 합니다.
|
||||
불필요하게 길게 답변하지 말고, 명확하게 작성하세요.
|
||||
완전한 문장이 아니어도 되며, 답은 단어나 요약일 수 있습니다.""",
|
||||
"ja": """
|
||||
あなたは与えられた質問に対して提供されたText内で答えを探すAI秘書です。
|
||||
質問に対する答えをTextで探して適切な答えを作成しましょう。
|
||||
回答は簡潔で、質問に関連する内容のみを含める必要があります。
|
||||
不必要に長く答えず、明確に作成しましょう。
|
||||
完全な文章でなくてもいいし、答えは単語や要約かもしれません。
|
||||
""",
|
||||
},
|
||||
"basic": {
|
||||
"en": """You are an AI assistant to answer the given question in the provide evidence text.
|
||||
You can find the evidence from the given text about question, and you have to write a proper answer to the given question.""",
|
||||
"ko": "당신은 주어진 질문에 대한 답을 제공된 Text 내에서 찾는 AI 비서입니다. 질문과 관련된 증거를 Text에서 찾아 적절한 답변을 작성하세요.",
|
||||
"ja": "あなたは与えられた質問に対する答えを提供されたText内で探すAI秘書です。 質問に関する証拠をTextで探して適切な回答を作成しましょう。",
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user