Fix Dockerfile build issue

This commit is contained in:
2025-03-18 16:41:12 +09:00
parent 6814230bfb
commit 9323aa254a
228 changed files with 467 additions and 3488 deletions

View File

@@ -0,0 +1,3 @@
from .long_context_reorder import LongContextReorder
from .window_replacement import WindowReplacement
from .fstring import Fstring

View File

@@ -0,0 +1,34 @@
import logging
from abc import ABCMeta
from pathlib import Path
from typing import Union
import pandas as pd
from autorag.schema.base import BaseModule
logger = logging.getLogger("AutoRAG")
class BasePromptMaker(BaseModule, metaclass=ABCMeta):
def __init__(self, project_dir: Union[str, Path], *args, **kwargs):
logger.info(
f"Initialize prompt maker node - {self.__class__.__name__} module..."
)
def __del__(self):
logger.info(f"Prompt maker node - {self.__class__.__name__} module is deleted.")
def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
logger.info(f"Running prompt maker node - {self.__class__.__name__} module...")
# get query and retrieved contents from previous_result
assert (
"query" in previous_result.columns
), "previous_result must have query column."
assert (
"retrieved_contents" in previous_result.columns
), "previous_result must have retrieved_contents column."
query = previous_result["query"].tolist()
retrieved_contents = previous_result["retrieved_contents"].tolist()
prompt = kwargs.pop("prompt")
return query, retrieved_contents, prompt

View File

@@ -0,0 +1,49 @@
from typing import List
import pandas as pd
from autorag.nodes.promptmaker.base import BasePromptMaker
from autorag.utils import result_to_dataframe
class Fstring(BasePromptMaker):
@result_to_dataframe(["prompts"])
def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
query, retrieved_contents, prompt = self.cast_to_run(
previous_result, *args, **kwargs
)
return self._pure(prompt, query, retrieved_contents)
def _pure(
self, prompt: str, queries: List[str], retrieved_contents: List[List[str]]
) -> List[str]:
"""
Make a prompt using f-string from a query and retrieved_contents.
You must type a prompt or prompt list at a config YAML file like this:
.. Code:: yaml
nodes:
- node_type: prompt_maker
modules:
- module_type: fstring
prompt: [Answer this question: {query} \n\n {retrieved_contents},
Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
:param prompt: A prompt string.
:param queries: List of query strings.
:param retrieved_contents: List of retrieved contents.
:return: Prompts that are made by f-string.
"""
def fstring_row(
_prompt: str, _query: str, _retrieved_contents: List[str]
) -> str:
contents_str = "\n\n".join(_retrieved_contents)
return _prompt.format(query=_query, retrieved_contents=contents_str)
return list(
map(
lambda x: fstring_row(prompt, x[0], x[1]),
zip(queries, retrieved_contents),
)
)

View File

@@ -0,0 +1,83 @@
import logging
from typing import List
import numpy as np
import pandas as pd
from autorag.nodes.promptmaker.base import BasePromptMaker
from autorag.utils import result_to_dataframe
logger = logging.getLogger("AutoRAG")
class LongContextReorder(BasePromptMaker):
@result_to_dataframe(["prompts"])
def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
query, retrieved_contents, prompt = self.cast_to_run(
previous_result, *args, **kwargs
)
assert (
"retrieve_scores" in previous_result.columns
), "previous_result must have retrieve_scores column."
retrieve_scores = previous_result["retrieve_scores"].tolist()
return self._pure(prompt, query, retrieved_contents, retrieve_scores)
def _pure(
self,
prompt: str,
queries: List[str],
retrieved_contents: List[List[str]],
retrieve_scores: List[List[float]],
) -> List[str]:
"""
Models struggle to access significant details found
in the center of extended contexts. A study
(https://arxiv.org/abs/2307.03172) observed that the best
performance typically arises when crucial data is positioned
at the start or conclusion of the input context. Additionally,
as the input context lengthens, performance drops notably, even
in models designed for long contexts."
.. Code:: yaml
nodes:
- node_type: prompt_maker
modules:
- module_type: long_context_reorder
prompt: [Answer this question: {query} \n\n {retrieved_contents},
Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
:param prompt: A prompt string.
:param queries: List of query strings.
:param retrieved_contents: List of retrieved contents.
:param retrieve_scores: List of `retrieve scores`.
:return: Prompts that are made by long context reorder.
"""
def long_context_reorder_row(
_prompt: str,
_query: str,
_retrieved_contents: List[str],
_retrieve_scores: List[float],
) -> str:
if isinstance(_retrieved_contents, np.ndarray):
_retrieved_contents = _retrieved_contents.tolist()
if not len(_retrieved_contents) == len(_retrieve_scores):
logger.info("If you use a summarizer, the reorder will not proceed.")
return _prompt.format(
query=_query, retrieved_contents="\n\n".join(_retrieved_contents)
)
content_scores = list(zip(_retrieved_contents, _retrieve_scores))
sorted_content_scores = sorted(
content_scores, key=lambda x: x[1], reverse=True
)
content_result, score_result = zip(*sorted_content_scores)
_retrieved_contents.append(content_result[0])
contents_str = "\n\n".join(_retrieved_contents)
return _prompt.format(query=_query, retrieved_contents=contents_str)
return list(
map(
lambda x: long_context_reorder_row(prompt, x[0], x[1], x[2]),
zip(queries, retrieved_contents, retrieve_scores),
)
)

View File

@@ -0,0 +1,280 @@
import os
import pathlib
from copy import deepcopy
from typing import List, Dict, Optional, Union
import pandas as pd
import tokenlog
from autorag.evaluation import evaluate_generation
from autorag.evaluation.util import cast_metrics
from autorag.schema.metricinput import MetricInput
from autorag.strategy import measure_speed, filter_by_threshold, select_best
from autorag.support import get_support_modules
from autorag.utils import validate_qa_dataset
from autorag.utils.util import make_combinations, explode, split_dataframe
def run_prompt_maker_node(
modules: List,
module_params: List[Dict],
previous_result: pd.DataFrame,
node_line_dir: str,
strategies: Dict,
) -> pd.DataFrame:
"""
Run prompt maker node.
With this function, you can select the best prompt maker module.
As default, when you can use only one module, the evaluation will be skipped.
If you want to select the best prompt among modules, you can use strategies.
When you use them, you must pass 'generator_modules' and its parameters at strategies.
Because it uses generator modules and generator metrics for evaluation this module.
It is recommended to use one params and modules for evaluation,
but you can use multiple params and modules for evaluation.
When you don't set generator module at strategies, it will use the default generator module.
The default generator module is llama_index_llm with openai gpt-3.5-turbo model.
:param modules: Prompt maker module classes to run.
:param module_params: Prompt maker module parameters.
:param previous_result: Previous result dataframe.
Could be query expansion's best result or qa data.
:param node_line_dir: This node line's directory.
:param strategies: Strategies for prompt maker node.
:return: The best result dataframe.
It contains previous result columns and prompt maker's result columns which is 'prompts'.
"""
if not os.path.exists(node_line_dir):
os.makedirs(node_line_dir)
node_dir = os.path.join(node_line_dir, "prompt_maker")
if not os.path.exists(node_dir):
os.makedirs(node_dir)
project_dir = pathlib.PurePath(node_line_dir).parent.parent
# run modules
results, execution_times = zip(
*map(
lambda task: measure_speed(
task[0].run_evaluator,
project_dir=project_dir,
previous_result=previous_result,
**task[1],
),
zip(modules, module_params),
)
)
average_times = list(map(lambda x: x / len(results[0]), execution_times))
# get average token usage
token_usages = []
for i, result in enumerate(results):
token_logger = tokenlog.getLogger(
f"prompt_maker_{i}", strategies.get("tokenizer", "gpt2")
)
token_logger.query_batch(result["prompts"].tolist())
token_usages.append(token_logger.get_token_usage() / len(result))
# save results to folder
filepaths = list(
map(lambda x: os.path.join(node_dir, f"{x}.parquet"), range(len(modules)))
)
list(
map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
) # execute save to parquet
filenames = list(map(lambda x: os.path.basename(x), filepaths))
# make summary file
summary_df = pd.DataFrame(
{
"filename": filenames,
"module_name": list(map(lambda module: module.__name__, modules)),
"module_params": module_params,
"execution_time": average_times,
"average_prompt_token": token_usages,
}
)
metric_names, metric_params = cast_metrics(strategies.get("metrics"))
# Run evaluation when there are more than one module.
if len(modules) > 1:
# pop general keys from strategies (e.g. metrics, speed_threshold)
general_key = ["metrics", "speed_threshold", "token_threshold", "tokenizer"]
general_strategy = dict(
filter(lambda x: x[0] in general_key, strategies.items())
)
extra_strategy = dict(
filter(lambda x: x[0] not in general_key, strategies.items())
)
# first, filter by threshold if it is enabled.
if general_strategy.get("speed_threshold") is not None:
results, filenames = filter_by_threshold(
results, average_times, general_strategy["speed_threshold"], filenames
)
# Calculate tokens and save to summary
if general_strategy.get("token_threshold") is not None:
results, filenames = filter_by_threshold(
results, token_usages, general_strategy["token_threshold"], filenames
)
# run metrics before filtering
if metric_names is None or len(metric_names) <= 0:
raise ValueError(
"You must at least one metrics for prompt maker evaluation."
)
# get generator modules from strategy
generator_callables, generator_params = make_generator_callable_params(
extra_strategy
)
# get generation_gt
qa_data = pd.read_parquet(
os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
)
validate_qa_dataset(qa_data)
generation_gt = qa_data["generation_gt"].tolist()
generation_gt = list(map(lambda x: x.tolist(), generation_gt))
metric_inputs = [MetricInput(generation_gt=gen_gt) for gen_gt in generation_gt]
all_prompts = []
for result in results:
all_prompts.extend(result["prompts"].tolist())
evaluation_result_all = evaluate_one_prompt_maker_node(
all_prompts,
generator_callables,
generator_params,
metric_inputs * len(results),
general_strategy["metrics"],
project_dir,
strategy_name=strategies.get("strategy", "mean"),
)
evaluation_results = split_dataframe(
evaluation_result_all, chunk_size=len(results[0])
)
evaluation_df = pd.DataFrame(
{
"filename": filenames,
**{
f"prompt_maker_{metric_name}": list(
map(lambda x: x[metric_name].mean(), evaluation_results)
)
for metric_name in metric_names
},
}
)
summary_df = pd.merge(
on="filename", left=summary_df, right=evaluation_df, how="left"
)
best_result, best_filename = select_best(
evaluation_results,
metric_names,
filenames,
strategies.get("strategy", "mean"),
)
# change metric name columns to prompt_maker_metric_name
best_result = best_result.rename(
columns={
metric_name: f"prompt_maker_{metric_name}"
for metric_name in metric_names
}
)
best_result = best_result.drop(columns=["generated_texts"])
else:
best_result, best_filename = results[0], filenames[0]
# add 'is_best' column at summary file
summary_df["is_best"] = summary_df["filename"] == best_filename
best_result = pd.concat([previous_result, best_result], axis=1)
# save files
summary_df.to_csv(os.path.join(node_dir, "summary.csv"), index=False)
best_result.to_parquet(
os.path.join(node_dir, f"best_{os.path.splitext(best_filename)[0]}.parquet"),
index=False,
)
return best_result
def make_generator_callable_params(strategy_dict: Dict):
node_dict = deepcopy(strategy_dict)
generator_module_list: Optional[List[Dict]] = node_dict.pop(
"generator_modules", None
)
if generator_module_list is None:
generator_module_list = [
{
"module_type": "llama_index_llm",
"llm": "openai",
"model": "gpt-3.5-turbo",
}
]
node_params = node_dict
modules = list(
map(
lambda module_dict: get_support_modules(module_dict.pop("module_type")),
generator_module_list,
)
)
param_combinations = list(
map(
lambda module_dict: make_combinations({**module_dict, **node_params}),
generator_module_list,
)
)
return explode(modules, param_combinations)
def evaluate_one_prompt_maker_node(
prompts: List[str],
generator_classes: List,
generator_params: List[Dict],
metric_inputs: List[MetricInput],
metrics: Union[List[str], List[Dict]],
project_dir,
strategy_name: str,
) -> pd.DataFrame:
input_df = pd.DataFrame({"prompts": prompts})
generator_results = list(
map(
lambda x: x[0].run_evaluator(
project_dir=project_dir, previous_result=input_df, **x[1]
),
zip(generator_classes, generator_params),
)
)
evaluation_results = list(
map(
lambda x: evaluate_generator_result(x[0], metric_inputs, metrics),
zip(generator_results, generator_classes),
)
)
metric_names = (
list(map(lambda x: x["metric_name"], metrics))
if isinstance(metrics[0], dict)
else metrics
)
best_result, _ = select_best(
evaluation_results, metric_names, strategy_name=strategy_name
)
best_result = pd.concat([input_df, best_result], axis=1)
return best_result # it has 'generated_texts' column
def evaluate_generator_result(
result_df: pd.DataFrame,
metric_inputs: List[MetricInput],
metrics: Union[List[str], List[Dict]],
) -> pd.DataFrame:
@evaluate_generation(metric_inputs=metric_inputs, metrics=metrics)
def evaluate(df):
return df["generated_texts"].tolist()
return evaluate(result_df)

View File

@@ -0,0 +1,85 @@
import logging
import os
from typing import List, Dict
import pandas as pd
from autorag.nodes.promptmaker.base import BasePromptMaker
from autorag.utils import result_to_dataframe, fetch_contents
logger = logging.getLogger("AutoRAG")
class WindowReplacement(BasePromptMaker):
def __init__(self, project_dir: str, *args, **kwargs):
super().__init__(project_dir, *args, **kwargs)
# load corpus
data_dir = os.path.join(project_dir, "data")
self.corpus_data = pd.read_parquet(
os.path.join(data_dir, "corpus.parquet"), engine="pyarrow"
)
@result_to_dataframe(["prompts"])
def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
query, retrieved_contents, prompt = self.cast_to_run(
previous_result, *args, **kwargs
)
retrieved_ids = previous_result["retrieved_ids"].tolist()
# get metadata from corpus
retrieved_metadata = fetch_contents(
self.corpus_data, retrieved_ids, column_name="metadata"
)
return self._pure(prompt, query, retrieved_contents, retrieved_metadata)
def _pure(
self,
prompt: str,
queries: List[str],
retrieved_contents: List[List[str]],
retrieved_metadata: List[List[Dict]],
) -> List[str]:
"""
Replace retrieved_contents with a window to create a Prompt
(only available for corpus chunked with Sentence window method)
You must type a prompt or prompt list at a config YAML file like this:
.. Code:: yaml
nodes:
- node_type: prompt_maker
modules:
- module_type: window_replacement
prompt: [Answer this question: {query} \n\n {retrieved_contents},
Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
:param prompt: A prompt string.
:param queries: List of query strings.
:param retrieved_contents: List of retrieved contents.
:param retrieved_metadata: List of retrieved metadata.
:return: Prompts that are made by window_replacement.
"""
def window_replacement_row(
_prompt: str,
_query: str,
_retrieved_contents,
_retrieved_metadata: List[Dict],
) -> str:
window_list = []
for content, metadata in zip(_retrieved_contents, _retrieved_metadata):
if "window" in metadata:
window_list.append(metadata["window"])
else:
window_list.append(content)
logger.info(
"Only available for corpus chunked with Sentence window method."
"window_replacement will not proceed."
)
contents_str = "\n\n".join(window_list)
return _prompt.format(query=_query, retrieved_contents=contents_str)
return list(
map(
lambda x: window_replacement_row(prompt, x[0], x[1], x[2]),
zip(queries, retrieved_contents, retrieved_metadata),
)
)