Fix Dockerfile build issue
This commit is contained in:
3
autorag/nodes/promptmaker/__init__.py
Normal file
3
autorag/nodes/promptmaker/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .long_context_reorder import LongContextReorder
|
||||
from .window_replacement import WindowReplacement
|
||||
from .fstring import Fstring
|
||||
34
autorag/nodes/promptmaker/base.py
Normal file
34
autorag/nodes/promptmaker/base.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import logging
|
||||
from abc import ABCMeta
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from autorag.schema.base import BaseModule
|
||||
|
||||
logger = logging.getLogger("AutoRAG")
|
||||
|
||||
|
||||
class BasePromptMaker(BaseModule, metaclass=ABCMeta):
|
||||
def __init__(self, project_dir: Union[str, Path], *args, **kwargs):
|
||||
logger.info(
|
||||
f"Initialize prompt maker node - {self.__class__.__name__} module..."
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
logger.info(f"Prompt maker node - {self.__class__.__name__} module is deleted.")
|
||||
|
||||
def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
|
||||
logger.info(f"Running prompt maker node - {self.__class__.__name__} module...")
|
||||
# get query and retrieved contents from previous_result
|
||||
assert (
|
||||
"query" in previous_result.columns
|
||||
), "previous_result must have query column."
|
||||
assert (
|
||||
"retrieved_contents" in previous_result.columns
|
||||
), "previous_result must have retrieved_contents column."
|
||||
query = previous_result["query"].tolist()
|
||||
retrieved_contents = previous_result["retrieved_contents"].tolist()
|
||||
prompt = kwargs.pop("prompt")
|
||||
return query, retrieved_contents, prompt
|
||||
49
autorag/nodes/promptmaker/fstring.py
Normal file
49
autorag/nodes/promptmaker/fstring.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from typing import List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from autorag.nodes.promptmaker.base import BasePromptMaker
|
||||
from autorag.utils import result_to_dataframe
|
||||
|
||||
|
||||
class Fstring(BasePromptMaker):
|
||||
@result_to_dataframe(["prompts"])
|
||||
def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
|
||||
query, retrieved_contents, prompt = self.cast_to_run(
|
||||
previous_result, *args, **kwargs
|
||||
)
|
||||
return self._pure(prompt, query, retrieved_contents)
|
||||
|
||||
def _pure(
|
||||
self, prompt: str, queries: List[str], retrieved_contents: List[List[str]]
|
||||
) -> List[str]:
|
||||
"""
|
||||
Make a prompt using f-string from a query and retrieved_contents.
|
||||
You must type a prompt or prompt list at a config YAML file like this:
|
||||
|
||||
.. Code:: yaml
|
||||
nodes:
|
||||
- node_type: prompt_maker
|
||||
modules:
|
||||
- module_type: fstring
|
||||
prompt: [Answer this question: {query} \n\n {retrieved_contents},
|
||||
Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
|
||||
|
||||
:param prompt: A prompt string.
|
||||
:param queries: List of query strings.
|
||||
:param retrieved_contents: List of retrieved contents.
|
||||
:return: Prompts that are made by f-string.
|
||||
"""
|
||||
|
||||
def fstring_row(
|
||||
_prompt: str, _query: str, _retrieved_contents: List[str]
|
||||
) -> str:
|
||||
contents_str = "\n\n".join(_retrieved_contents)
|
||||
return _prompt.format(query=_query, retrieved_contents=contents_str)
|
||||
|
||||
return list(
|
||||
map(
|
||||
lambda x: fstring_row(prompt, x[0], x[1]),
|
||||
zip(queries, retrieved_contents),
|
||||
)
|
||||
)
|
||||
83
autorag/nodes/promptmaker/long_context_reorder.py
Normal file
83
autorag/nodes/promptmaker/long_context_reorder.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from autorag.nodes.promptmaker.base import BasePromptMaker
|
||||
from autorag.utils import result_to_dataframe
|
||||
|
||||
logger = logging.getLogger("AutoRAG")
|
||||
|
||||
|
||||
class LongContextReorder(BasePromptMaker):
|
||||
@result_to_dataframe(["prompts"])
|
||||
def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
|
||||
query, retrieved_contents, prompt = self.cast_to_run(
|
||||
previous_result, *args, **kwargs
|
||||
)
|
||||
assert (
|
||||
"retrieve_scores" in previous_result.columns
|
||||
), "previous_result must have retrieve_scores column."
|
||||
retrieve_scores = previous_result["retrieve_scores"].tolist()
|
||||
return self._pure(prompt, query, retrieved_contents, retrieve_scores)
|
||||
|
||||
def _pure(
|
||||
self,
|
||||
prompt: str,
|
||||
queries: List[str],
|
||||
retrieved_contents: List[List[str]],
|
||||
retrieve_scores: List[List[float]],
|
||||
) -> List[str]:
|
||||
"""
|
||||
Models struggle to access significant details found
|
||||
in the center of extended contexts. A study
|
||||
(https://arxiv.org/abs/2307.03172) observed that the best
|
||||
performance typically arises when crucial data is positioned
|
||||
at the start or conclusion of the input context. Additionally,
|
||||
as the input context lengthens, performance drops notably, even
|
||||
in models designed for long contexts."
|
||||
|
||||
.. Code:: yaml
|
||||
nodes:
|
||||
- node_type: prompt_maker
|
||||
modules:
|
||||
- module_type: long_context_reorder
|
||||
prompt: [Answer this question: {query} \n\n {retrieved_contents},
|
||||
Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
|
||||
|
||||
:param prompt: A prompt string.
|
||||
:param queries: List of query strings.
|
||||
:param retrieved_contents: List of retrieved contents.
|
||||
:param retrieve_scores: List of `retrieve scores`.
|
||||
:return: Prompts that are made by long context reorder.
|
||||
"""
|
||||
|
||||
def long_context_reorder_row(
|
||||
_prompt: str,
|
||||
_query: str,
|
||||
_retrieved_contents: List[str],
|
||||
_retrieve_scores: List[float],
|
||||
) -> str:
|
||||
if isinstance(_retrieved_contents, np.ndarray):
|
||||
_retrieved_contents = _retrieved_contents.tolist()
|
||||
if not len(_retrieved_contents) == len(_retrieve_scores):
|
||||
logger.info("If you use a summarizer, the reorder will not proceed.")
|
||||
return _prompt.format(
|
||||
query=_query, retrieved_contents="\n\n".join(_retrieved_contents)
|
||||
)
|
||||
content_scores = list(zip(_retrieved_contents, _retrieve_scores))
|
||||
sorted_content_scores = sorted(
|
||||
content_scores, key=lambda x: x[1], reverse=True
|
||||
)
|
||||
content_result, score_result = zip(*sorted_content_scores)
|
||||
_retrieved_contents.append(content_result[0])
|
||||
contents_str = "\n\n".join(_retrieved_contents)
|
||||
return _prompt.format(query=_query, retrieved_contents=contents_str)
|
||||
|
||||
return list(
|
||||
map(
|
||||
lambda x: long_context_reorder_row(prompt, x[0], x[1], x[2]),
|
||||
zip(queries, retrieved_contents, retrieve_scores),
|
||||
)
|
||||
)
|
||||
280
autorag/nodes/promptmaker/run.py
Normal file
280
autorag/nodes/promptmaker/run.py
Normal file
@@ -0,0 +1,280 @@
|
||||
import os
|
||||
import pathlib
|
||||
from copy import deepcopy
|
||||
from typing import List, Dict, Optional, Union
|
||||
|
||||
import pandas as pd
|
||||
import tokenlog
|
||||
|
||||
from autorag.evaluation import evaluate_generation
|
||||
from autorag.evaluation.util import cast_metrics
|
||||
from autorag.schema.metricinput import MetricInput
|
||||
from autorag.strategy import measure_speed, filter_by_threshold, select_best
|
||||
from autorag.support import get_support_modules
|
||||
from autorag.utils import validate_qa_dataset
|
||||
from autorag.utils.util import make_combinations, explode, split_dataframe
|
||||
|
||||
|
||||
def run_prompt_maker_node(
|
||||
modules: List,
|
||||
module_params: List[Dict],
|
||||
previous_result: pd.DataFrame,
|
||||
node_line_dir: str,
|
||||
strategies: Dict,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Run prompt maker node.
|
||||
With this function, you can select the best prompt maker module.
|
||||
As default, when you can use only one module, the evaluation will be skipped.
|
||||
If you want to select the best prompt among modules, you can use strategies.
|
||||
When you use them, you must pass 'generator_modules' and its parameters at strategies.
|
||||
Because it uses generator modules and generator metrics for evaluation this module.
|
||||
It is recommended to use one params and modules for evaluation,
|
||||
but you can use multiple params and modules for evaluation.
|
||||
When you don't set generator module at strategies, it will use the default generator module.
|
||||
The default generator module is llama_index_llm with openai gpt-3.5-turbo model.
|
||||
|
||||
:param modules: Prompt maker module classes to run.
|
||||
:param module_params: Prompt maker module parameters.
|
||||
:param previous_result: Previous result dataframe.
|
||||
Could be query expansion's best result or qa data.
|
||||
:param node_line_dir: This node line's directory.
|
||||
:param strategies: Strategies for prompt maker node.
|
||||
:return: The best result dataframe.
|
||||
It contains previous result columns and prompt maker's result columns which is 'prompts'.
|
||||
"""
|
||||
if not os.path.exists(node_line_dir):
|
||||
os.makedirs(node_line_dir)
|
||||
node_dir = os.path.join(node_line_dir, "prompt_maker")
|
||||
if not os.path.exists(node_dir):
|
||||
os.makedirs(node_dir)
|
||||
project_dir = pathlib.PurePath(node_line_dir).parent.parent
|
||||
|
||||
# run modules
|
||||
results, execution_times = zip(
|
||||
*map(
|
||||
lambda task: measure_speed(
|
||||
task[0].run_evaluator,
|
||||
project_dir=project_dir,
|
||||
previous_result=previous_result,
|
||||
**task[1],
|
||||
),
|
||||
zip(modules, module_params),
|
||||
)
|
||||
)
|
||||
average_times = list(map(lambda x: x / len(results[0]), execution_times))
|
||||
|
||||
# get average token usage
|
||||
token_usages = []
|
||||
for i, result in enumerate(results):
|
||||
token_logger = tokenlog.getLogger(
|
||||
f"prompt_maker_{i}", strategies.get("tokenizer", "gpt2")
|
||||
)
|
||||
token_logger.query_batch(result["prompts"].tolist())
|
||||
token_usages.append(token_logger.get_token_usage() / len(result))
|
||||
|
||||
# save results to folder
|
||||
filepaths = list(
|
||||
map(lambda x: os.path.join(node_dir, f"{x}.parquet"), range(len(modules)))
|
||||
)
|
||||
list(
|
||||
map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
|
||||
) # execute save to parquet
|
||||
filenames = list(map(lambda x: os.path.basename(x), filepaths))
|
||||
|
||||
# make summary file
|
||||
summary_df = pd.DataFrame(
|
||||
{
|
||||
"filename": filenames,
|
||||
"module_name": list(map(lambda module: module.__name__, modules)),
|
||||
"module_params": module_params,
|
||||
"execution_time": average_times,
|
||||
"average_prompt_token": token_usages,
|
||||
}
|
||||
)
|
||||
|
||||
metric_names, metric_params = cast_metrics(strategies.get("metrics"))
|
||||
|
||||
# Run evaluation when there are more than one module.
|
||||
if len(modules) > 1:
|
||||
# pop general keys from strategies (e.g. metrics, speed_threshold)
|
||||
general_key = ["metrics", "speed_threshold", "token_threshold", "tokenizer"]
|
||||
general_strategy = dict(
|
||||
filter(lambda x: x[0] in general_key, strategies.items())
|
||||
)
|
||||
extra_strategy = dict(
|
||||
filter(lambda x: x[0] not in general_key, strategies.items())
|
||||
)
|
||||
|
||||
# first, filter by threshold if it is enabled.
|
||||
if general_strategy.get("speed_threshold") is not None:
|
||||
results, filenames = filter_by_threshold(
|
||||
results, average_times, general_strategy["speed_threshold"], filenames
|
||||
)
|
||||
|
||||
# Calculate tokens and save to summary
|
||||
if general_strategy.get("token_threshold") is not None:
|
||||
results, filenames = filter_by_threshold(
|
||||
results, token_usages, general_strategy["token_threshold"], filenames
|
||||
)
|
||||
|
||||
# run metrics before filtering
|
||||
if metric_names is None or len(metric_names) <= 0:
|
||||
raise ValueError(
|
||||
"You must at least one metrics for prompt maker evaluation."
|
||||
)
|
||||
|
||||
# get generator modules from strategy
|
||||
generator_callables, generator_params = make_generator_callable_params(
|
||||
extra_strategy
|
||||
)
|
||||
|
||||
# get generation_gt
|
||||
qa_data = pd.read_parquet(
|
||||
os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
|
||||
)
|
||||
validate_qa_dataset(qa_data)
|
||||
generation_gt = qa_data["generation_gt"].tolist()
|
||||
generation_gt = list(map(lambda x: x.tolist(), generation_gt))
|
||||
|
||||
metric_inputs = [MetricInput(generation_gt=gen_gt) for gen_gt in generation_gt]
|
||||
|
||||
all_prompts = []
|
||||
for result in results:
|
||||
all_prompts.extend(result["prompts"].tolist())
|
||||
|
||||
evaluation_result_all = evaluate_one_prompt_maker_node(
|
||||
all_prompts,
|
||||
generator_callables,
|
||||
generator_params,
|
||||
metric_inputs * len(results),
|
||||
general_strategy["metrics"],
|
||||
project_dir,
|
||||
strategy_name=strategies.get("strategy", "mean"),
|
||||
)
|
||||
evaluation_results = split_dataframe(
|
||||
evaluation_result_all, chunk_size=len(results[0])
|
||||
)
|
||||
|
||||
evaluation_df = pd.DataFrame(
|
||||
{
|
||||
"filename": filenames,
|
||||
**{
|
||||
f"prompt_maker_{metric_name}": list(
|
||||
map(lambda x: x[metric_name].mean(), evaluation_results)
|
||||
)
|
||||
for metric_name in metric_names
|
||||
},
|
||||
}
|
||||
)
|
||||
summary_df = pd.merge(
|
||||
on="filename", left=summary_df, right=evaluation_df, how="left"
|
||||
)
|
||||
|
||||
best_result, best_filename = select_best(
|
||||
evaluation_results,
|
||||
metric_names,
|
||||
filenames,
|
||||
strategies.get("strategy", "mean"),
|
||||
)
|
||||
# change metric name columns to prompt_maker_metric_name
|
||||
best_result = best_result.rename(
|
||||
columns={
|
||||
metric_name: f"prompt_maker_{metric_name}"
|
||||
for metric_name in metric_names
|
||||
}
|
||||
)
|
||||
best_result = best_result.drop(columns=["generated_texts"])
|
||||
else:
|
||||
best_result, best_filename = results[0], filenames[0]
|
||||
|
||||
# add 'is_best' column at summary file
|
||||
summary_df["is_best"] = summary_df["filename"] == best_filename
|
||||
|
||||
best_result = pd.concat([previous_result, best_result], axis=1)
|
||||
|
||||
# save files
|
||||
summary_df.to_csv(os.path.join(node_dir, "summary.csv"), index=False)
|
||||
best_result.to_parquet(
|
||||
os.path.join(node_dir, f"best_{os.path.splitext(best_filename)[0]}.parquet"),
|
||||
index=False,
|
||||
)
|
||||
|
||||
return best_result
|
||||
|
||||
|
||||
def make_generator_callable_params(strategy_dict: Dict):
|
||||
node_dict = deepcopy(strategy_dict)
|
||||
generator_module_list: Optional[List[Dict]] = node_dict.pop(
|
||||
"generator_modules", None
|
||||
)
|
||||
if generator_module_list is None:
|
||||
generator_module_list = [
|
||||
{
|
||||
"module_type": "llama_index_llm",
|
||||
"llm": "openai",
|
||||
"model": "gpt-3.5-turbo",
|
||||
}
|
||||
]
|
||||
node_params = node_dict
|
||||
modules = list(
|
||||
map(
|
||||
lambda module_dict: get_support_modules(module_dict.pop("module_type")),
|
||||
generator_module_list,
|
||||
)
|
||||
)
|
||||
param_combinations = list(
|
||||
map(
|
||||
lambda module_dict: make_combinations({**module_dict, **node_params}),
|
||||
generator_module_list,
|
||||
)
|
||||
)
|
||||
return explode(modules, param_combinations)
|
||||
|
||||
|
||||
def evaluate_one_prompt_maker_node(
|
||||
prompts: List[str],
|
||||
generator_classes: List,
|
||||
generator_params: List[Dict],
|
||||
metric_inputs: List[MetricInput],
|
||||
metrics: Union[List[str], List[Dict]],
|
||||
project_dir,
|
||||
strategy_name: str,
|
||||
) -> pd.DataFrame:
|
||||
input_df = pd.DataFrame({"prompts": prompts})
|
||||
generator_results = list(
|
||||
map(
|
||||
lambda x: x[0].run_evaluator(
|
||||
project_dir=project_dir, previous_result=input_df, **x[1]
|
||||
),
|
||||
zip(generator_classes, generator_params),
|
||||
)
|
||||
)
|
||||
evaluation_results = list(
|
||||
map(
|
||||
lambda x: evaluate_generator_result(x[0], metric_inputs, metrics),
|
||||
zip(generator_results, generator_classes),
|
||||
)
|
||||
)
|
||||
metric_names = (
|
||||
list(map(lambda x: x["metric_name"], metrics))
|
||||
if isinstance(metrics[0], dict)
|
||||
else metrics
|
||||
)
|
||||
best_result, _ = select_best(
|
||||
evaluation_results, metric_names, strategy_name=strategy_name
|
||||
)
|
||||
best_result = pd.concat([input_df, best_result], axis=1)
|
||||
return best_result # it has 'generated_texts' column
|
||||
|
||||
|
||||
def evaluate_generator_result(
|
||||
result_df: pd.DataFrame,
|
||||
metric_inputs: List[MetricInput],
|
||||
metrics: Union[List[str], List[Dict]],
|
||||
) -> pd.DataFrame:
|
||||
@evaluate_generation(metric_inputs=metric_inputs, metrics=metrics)
|
||||
def evaluate(df):
|
||||
return df["generated_texts"].tolist()
|
||||
|
||||
return evaluate(result_df)
|
||||
85
autorag/nodes/promptmaker/window_replacement.py
Normal file
85
autorag/nodes/promptmaker/window_replacement.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Dict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from autorag.nodes.promptmaker.base import BasePromptMaker
|
||||
from autorag.utils import result_to_dataframe, fetch_contents
|
||||
|
||||
logger = logging.getLogger("AutoRAG")
|
||||
|
||||
|
||||
class WindowReplacement(BasePromptMaker):
|
||||
def __init__(self, project_dir: str, *args, **kwargs):
|
||||
super().__init__(project_dir, *args, **kwargs)
|
||||
# load corpus
|
||||
data_dir = os.path.join(project_dir, "data")
|
||||
self.corpus_data = pd.read_parquet(
|
||||
os.path.join(data_dir, "corpus.parquet"), engine="pyarrow"
|
||||
)
|
||||
|
||||
@result_to_dataframe(["prompts"])
|
||||
def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
|
||||
query, retrieved_contents, prompt = self.cast_to_run(
|
||||
previous_result, *args, **kwargs
|
||||
)
|
||||
retrieved_ids = previous_result["retrieved_ids"].tolist()
|
||||
# get metadata from corpus
|
||||
retrieved_metadata = fetch_contents(
|
||||
self.corpus_data, retrieved_ids, column_name="metadata"
|
||||
)
|
||||
return self._pure(prompt, query, retrieved_contents, retrieved_metadata)
|
||||
|
||||
def _pure(
|
||||
self,
|
||||
prompt: str,
|
||||
queries: List[str],
|
||||
retrieved_contents: List[List[str]],
|
||||
retrieved_metadata: List[List[Dict]],
|
||||
) -> List[str]:
|
||||
"""
|
||||
Replace retrieved_contents with a window to create a Prompt
|
||||
(only available for corpus chunked with Sentence window method)
|
||||
You must type a prompt or prompt list at a config YAML file like this:
|
||||
|
||||
.. Code:: yaml
|
||||
nodes:
|
||||
- node_type: prompt_maker
|
||||
modules:
|
||||
- module_type: window_replacement
|
||||
prompt: [Answer this question: {query} \n\n {retrieved_contents},
|
||||
Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
|
||||
|
||||
:param prompt: A prompt string.
|
||||
:param queries: List of query strings.
|
||||
:param retrieved_contents: List of retrieved contents.
|
||||
:param retrieved_metadata: List of retrieved metadata.
|
||||
:return: Prompts that are made by window_replacement.
|
||||
"""
|
||||
|
||||
def window_replacement_row(
|
||||
_prompt: str,
|
||||
_query: str,
|
||||
_retrieved_contents,
|
||||
_retrieved_metadata: List[Dict],
|
||||
) -> str:
|
||||
window_list = []
|
||||
for content, metadata in zip(_retrieved_contents, _retrieved_metadata):
|
||||
if "window" in metadata:
|
||||
window_list.append(metadata["window"])
|
||||
else:
|
||||
window_list.append(content)
|
||||
logger.info(
|
||||
"Only available for corpus chunked with Sentence window method."
|
||||
"window_replacement will not proceed."
|
||||
)
|
||||
contents_str = "\n\n".join(window_list)
|
||||
return _prompt.format(query=_query, retrieved_contents=contents_str)
|
||||
|
||||
return list(
|
||||
map(
|
||||
lambda x: window_replacement_row(prompt, x[0], x[1], x[2]),
|
||||
zip(queries, retrieved_contents, retrieved_metadata),
|
||||
)
|
||||
)
|
||||
Reference in New Issue
Block a user