Fix Dockerfile build issue

2025-03-18 16:41:12 +09:00
parent 6814230bfb
commit 9323aa254a
228 changed files with 467 additions and 3488 deletions
--- a/autorag/nodes/init.py
+++ b/autorag/nodes/init.py
--- a/autorag/nodes/generator/init.py
+++ b/autorag/nodes/generator/init.py
@@ -0,0 +1,4 @@
+from .llama_index_llm import LlamaIndexLLM
+from .openai_llm import OpenAILLM
+from .vllm import Vllm
+from .vllm_api import VllmAPI
--- a/autorag/nodes/generator/base.py
+++ b/autorag/nodes/generator/base.py
@@ -0,0 +1,103 @@
+import abc
+import functools
+import logging
+from pathlib import Path
+from typing import Union, Tuple, List
+
+import pandas as pd
+from llama_index.core.output_parsers import PydanticOutputParser
+
+from autorag import generator_models
+from autorag.schema import BaseModule
+from autorag.utils import result_to_dataframe
+
+logger = logging.getLogger("AutoRAG")
+
+
+class BaseGenerator(BaseModule, metaclass=abc.ABCMeta):
+	def __init__(self, project_dir: str, llm: str, *args, **kwargs):
+		logger.info(f"Initialize generator node - {self.__class__.__name__}")
+		self.llm = llm
+
+	def __del__(self):
+		logger.info(f"Deleting generator module - {self.__class__.__name__}")
+
+	def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
+		logger.info(f"Running generator node - {self.__class__.__name__} module...")
+		assert (
+			"prompts" in previous_result.columns
+		), "previous_result must contain prompts column."
+		prompts = previous_result["prompts"].tolist()
+		return prompts
+
+	def structured_output(self, prompts: List[str], output_cls):
+		response, _, _ = self._pure(prompts)
+		parser = PydanticOutputParser(output_cls)
+		result = []
+		for res in response:
+			try:
+				result.append(parser.parse(res))
+			except Exception as e:
+				logger.warning(
+					f"Error parsing response: {e} \nSo returning None instead in this case."
+				)
+				result.append(None)
+		return result
+
+	@abc.abstractmethod
+	async def astream(self, prompt: str, **kwargs):
+		pass
+
+	@abc.abstractmethod
+	def stream(self, prompt: str, **kwargs):
+		pass
+
+
+def generator_node(func):
+	@functools.wraps(func)
+	@result_to_dataframe(["generated_texts", "generated_tokens", "generated_log_probs"])
+	def wrapper(
+		project_dir: Union[str, Path], previous_result: pd.DataFrame, llm: str, **kwargs
+	) -> Tuple[List[str], List[List[int]], List[List[float]]]:
+		"""
+		This decorator makes a generator module to be a node.
+		It automatically extracts prompts from previous_result and runs the generator function.
+		Plus, it retrieves the llm instance from autorag.generator_models.
+
+		:param project_dir: The project directory.
+		:param previous_result: The previous result that contains prompts,
+		:param llm: The llm name that you want to use.
+		:param kwargs: The extra parameters for initializing the llm instance.
+		:return: Pandas dataframe that contains generated texts, generated tokens, and generated log probs.
+		    Each column is "generated_texts", "generated_tokens", and "generated_log_probs".
+		"""
+		logger.info(f"Running generator node - {func.__name__} module...")
+		assert (
+			"prompts" in previous_result.columns
+		), "previous_result must contain prompts column."
+		prompts = previous_result["prompts"].tolist()
+		if func.__name__ == "llama_index_llm":
+			if llm not in generator_models:
+				raise ValueError(
+					f"{llm} is not a valid llm name. Please check the llm name."
+					"You can check valid llm names from autorag.generator_models."
+				)
+			batch = kwargs.pop("batch", 16)
+			if llm == "huggingfacellm":
+				model_name = kwargs.pop("model", None)
+				if model_name is not None:
+					kwargs["model_name"] = model_name
+				else:
+					if "model_name" not in kwargs.keys():
+						raise ValueError(
+							"`model` or `model_name` parameter must be provided for using huggingfacellm."
+						)
+				kwargs["tokenizer_name"] = kwargs["model_name"]
+			llm_instance = generator_models[llm](**kwargs)
+			result = func(prompts=prompts, llm=llm_instance, batch=batch)
+			del llm_instance
+			return result
+		else:
+			return func(prompts=prompts, llm=llm, **kwargs)
+
+	return wrapper
--- a/autorag/nodes/generator/llama_index_llm.py
+++ b/autorag/nodes/generator/llama_index_llm.py
@@ -0,0 +1,97 @@
+from typing import List, Tuple
+
+import pandas as pd
+from llama_index.core.base.llms.base import BaseLLM
+from transformers import AutoTokenizer
+
+from autorag import generator_models
+from autorag.nodes.generator.base import BaseGenerator
+from autorag.utils.util import (
+	get_event_loop,
+	process_batch,
+	result_to_dataframe,
+	pop_params,
+)
+
+
+class LlamaIndexLLM(BaseGenerator):
+	def __init__(self, project_dir: str, llm: str, batch: int = 16, *args, **kwargs):
+		"""
+		Initialize the Llama Index LLM module.
+
+		:param project_dir: The project directory.
+		:param llm: A llama index LLM instance.
+		:param batch: The batch size for llm.
+			Set low if you face some errors.
+			Default is 16.
+		:param kwargs: The extra parameters for initializing the llm instance.
+		"""
+		super().__init__(project_dir=project_dir, llm=llm)
+		if self.llm not in generator_models.keys():
+			raise ValueError(
+				f"{self.llm} is not a valid llm name. Please check the llm name."
+				"You can check valid llm names from autorag.generator_models."
+			)
+		self.batch = batch
+		llm_class = generator_models[self.llm]
+
+		if llm_class.class_name() in [
+			"HuggingFace_LLM",
+			"HuggingFaceInferenceAPI",
+			"TextGenerationInference",
+		]:
+			model_name = kwargs.pop("model", None)
+			if model_name is not None:
+				kwargs["model_name"] = model_name
+			else:
+				if "model_name" not in kwargs.keys():
+					raise ValueError(
+						"`model` or `model_name` parameter must be provided for using huggingfacellm."
+					)
+			kwargs["tokenizer_name"] = kwargs["model_name"]
+		self.llm_instance: BaseLLM = llm_class(**pop_params(llm_class.__init__, kwargs))
+
+	def __del__(self):
+		super().__del__()
+		del self.llm_instance
+
+	@result_to_dataframe(["generated_texts", "generated_tokens", "generated_log_probs"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		prompts = self.cast_to_run(previous_result=previous_result)
+		return self._pure(prompts)
+
+	def _pure(
+		self,
+		prompts: List[str],
+	) -> Tuple[List[str], List[List[int]], List[List[float]]]:
+		"""
+		Llama Index LLM module.
+		It gets the LLM instance from llama index, and returns generated text by the input prompt.
+		It does not generate the right log probs, but it returns the pseudo log probs,
+		which are not meant to be used for other modules.
+
+		:param prompts: A list of prompts.
+		:return: A tuple of three elements.
+			The first element is a list of a generated text.
+			The second element is a list of generated text's token ids, used tokenizer is GPT2Tokenizer.
+			The third element is a list of generated text's pseudo log probs.
+		"""
+		tasks = [self.llm_instance.acomplete(prompt) for prompt in prompts]
+		loop = get_event_loop() # get_event_loop()
+		results = loop.run_until_complete(process_batch(tasks, batch_size=self.batch))
+
+		generated_texts = list(map(lambda x: x.text, results))
+		tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=False)
+		tokenized_ids = tokenizer(generated_texts).data["input_ids"]
+		pseudo_log_probs = list(map(lambda x: [0.5] * len(x), tokenized_ids))
+		return generated_texts, tokenized_ids, pseudo_log_probs
+
+	async def astream(self, prompt: str, **kwargs):
+		async for completion_response in await self.llm_instance.astream_complete(
+			prompt
+		):
+			yield completion_response.text
+
+	def stream(self, prompt: str, **kwargs):
+		for completion_response in self.llm_instance.stream_complete(prompt):
+			yield completion_response.text
--- a/autorag/nodes/generator/openai_llm.py
+++ b/autorag/nodes/generator/openai_llm.py
@@ -0,0 +1,296 @@
+import logging
+from typing import List, Tuple
+
+import pandas as pd
+import tiktoken
+from openai import AsyncOpenAI
+from tiktoken import Encoding
+
+from autorag.nodes.generator.base import BaseGenerator
+from autorag.utils.util import (
+	get_event_loop,
+	process_batch,
+	pop_params,
+	result_to_dataframe,
+)
+
+logger = logging.getLogger("AutoRAG")
+
+MAX_TOKEN_DICT = {  # model name : token limit
+	"gpt-4.5-preview": 128_000,
+	"gpt-4.5-preview-2025-02-27": 128_000,
+	"o1": 200_000,
+	"o1-preview": 128_000,
+	"o1-preview-2024-09-12": 128_000,
+	"o1-mini": 128_000,
+	"o1-mini-2024-09-12": 128_000,
+	"o3-mini": 200_000,
+	"gpt-4o-mini": 128_000,
+	"gpt-4o-mini-2024-07-18": 128_000,
+	"gpt-4o": 128_000,
+	"gpt-4o-2024-08-06": 128_000,
+	"gpt-4o-2024-05-13": 128_000,
+	"chatgpt-4o-latest": 128_000,
+	"gpt-4-turbo": 128_000,
+	"gpt-4-turbo-2024-04-09": 128_000,
+	"gpt-4-turbo-preview": 128_000,
+	"gpt-4-0125-preview": 128_000,
+	"gpt-4-1106-preview": 128_000,
+	"gpt-4-vision-preview": 128_000,
+	"gpt-4-1106-vision-preview": 128_000,
+	"gpt-4": 8_192,
+	"gpt-4-0613": 8_192,
+	"gpt-4-32k": 32_768,
+	"gpt-4-32k-0613": 32_768,
+	"gpt-3.5-turbo-0125": 16_385,
+	"gpt-3.5-turbo": 16_385,
+	"gpt-3.5-turbo-1106": 16_385,
+	"gpt-3.5-turbo-instruct": 4_096,
+	"gpt-3.5-turbo-16k": 16_385,
+	"gpt-3.5-turbo-0613": 4_096,
+	"gpt-3.5-turbo-16k-0613": 16_385,
+}
+
+
+class OpenAILLM(BaseGenerator):
+	def __init__(self, project_dir, llm: str, batch: int = 16, *args, **kwargs):
+		super().__init__(project_dir, llm, *args, **kwargs)
+		assert batch > 0, "batch size must be greater than 0."
+		self.batch = batch
+
+		client_init_params = pop_params(AsyncOpenAI.__init__, kwargs)
+		self.client = AsyncOpenAI(**client_init_params)
+
+		if self.llm.startswith("gpt-4.5"):
+			self.tokenizer = tiktoken.get_encoding("o200k_base")
+		else:
+			self.tokenizer = tiktoken.encoding_for_model(self.llm)
+
+		self.max_token_size = (
+			MAX_TOKEN_DICT.get(self.llm) - 7
+		)  # because of chat token usage
+		if self.max_token_size is None:
+			raise ValueError(
+				f"Model {self.llm} does not supported. "
+				f"Please select the model between {list(MAX_TOKEN_DICT.keys())}"
+			)
+
+	@result_to_dataframe(["generated_texts", "generated_tokens", "generated_log_probs"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		prompts = self.cast_to_run(previous_result)
+		return self._pure(prompts, **kwargs)
+
+	def _pure(
+		self,
+		prompts: List[str],
+		truncate: bool = True,
+		**kwargs,
+	) -> Tuple[List[str], List[List[int]], List[List[float]]]:
+		"""
+		OpenAI generator module.
+		Uses an official openai library for generating answer from the given prompt.
+		It returns real token ids and log probs, so you must use this for using token ids and log probs.
+
+		:param prompts: A list of prompts.
+		:param llm: A model name for openai.
+		    Default is gpt-3.5-turbo.
+		:param batch: Batch size for openai api call.
+		    If you get API limit errors, you should lower the batch size.
+		    Default is 16.
+		:param truncate: Whether to truncate the input prompt.
+		    Default is True.
+		:param api_key: OpenAI API key. You can set this by passing env variable `OPENAI_API_KEY`
+		:param kwargs: The optional parameter for openai api call `openai.chat.completion`
+		    See https://platform.openai.com/docs/api-reference/chat/create for more details.
+		:return: A tuple of three elements.
+		    The first element is a list of generated text.
+		    The second element is a list of generated text's token ids.
+		    The third element is a list of generated text's log probs.
+		"""
+		if kwargs.get("logprobs") is not None:
+			kwargs.pop("logprobs")
+			logger.warning(
+				"parameter logprob does not effective. It always set to True."
+			)
+		if kwargs.get("n") is not None:
+			kwargs.pop("n")
+			logger.warning("parameter n does not effective. It always set to 1.")
+
+		# TODO: fix this after updating tiktoken for the gpt-4.5 model. It is not yet supported yet.
+		if truncate:
+			prompts = list(
+				map(
+					lambda prompt: truncate_by_token(
+						prompt, self.tokenizer, self.max_token_size
+					),
+					prompts,
+				)
+			)
+
+		openai_chat_params = pop_params(self.client.chat.completions.create, kwargs)
+		loop = get_event_loop()
+		if self.llm.startswith("o1") or self.llm.startswith("o3"):
+			tasks = [
+				self.get_result_o1(prompt, **openai_chat_params) for prompt in prompts
+			]
+		else:
+			tasks = [
+				self.get_result(prompt, **openai_chat_params) for prompt in prompts
+			]
+		result = loop.run_until_complete(process_batch(tasks, self.batch))
+		answer_result = list(map(lambda x: x[0], result))
+		token_result = list(map(lambda x: x[1], result))
+		logprob_result = list(map(lambda x: x[2], result))
+		return answer_result, token_result, logprob_result
+
+	def structured_output(self, prompts: List[str], output_cls, **kwargs):
+		supported_models = [
+			"gpt-4o-mini-2024-07-18",
+			"gpt-4o-2024-08-06",
+		]
+		if self.llm not in supported_models:
+			raise ValueError(
+				f"{self.llm} is not a valid model name for structured output. "
+				f"Please select the model between {supported_models}"
+			)
+
+		if kwargs.get("logprobs") is not None:
+			kwargs.pop("logprobs")
+			logger.warning(
+				"parameter logprob does not effective. It always set to False."
+			)
+		if kwargs.get("n") is not None:
+			kwargs.pop("n")
+			logger.warning("parameter n does not effective. It always set to 1.")
+
+		# TODO: fix this after updating tiktoken for the gpt-4.5 model. It is not yet supported yet.
+		prompts = list(
+			map(
+				lambda prompt: truncate_by_token(
+					prompt, self.tokenizer, self.max_token_size
+				),
+				prompts,
+			)
+		)
+
+		openai_chat_params = pop_params(self.client.beta.chat.completions.parse, kwargs)
+		loop = get_event_loop()
+		tasks = [
+			self.get_structured_result(prompt, output_cls, **openai_chat_params)
+			for prompt in prompts
+		]
+		result = loop.run_until_complete(process_batch(tasks, self.batch))
+		return result
+
+	async def astream(self, prompt: str, **kwargs):
+		# TODO: gpt-4.5-preview does not support logprobs. It should be fixed after the openai update.
+		if kwargs.get("logprobs") is not None:
+			kwargs.pop("logprobs")
+			logger.warning(
+				"parameter logprob does not effective. It always set to False."
+			)
+		if kwargs.get("n") is not None:
+			kwargs.pop("n")
+			logger.warning("parameter n does not effective. It always set to 1.")
+
+		prompt = truncate_by_token(prompt, self.tokenizer, self.max_token_size)
+
+		openai_chat_params = pop_params(self.client.chat.completions.create, kwargs)
+
+		stream = await self.client.chat.completions.create(
+			model=self.llm,
+			messages=[
+				{"role": "user", "content": prompt},
+			],
+			logprobs=False,
+			n=1,
+			stream=True,
+			**openai_chat_params,
+		)
+		result = ""
+		async for chunk in stream:
+			if chunk.choices[0].delta.content is not None:
+				result += chunk.choices[0].delta.content
+				yield result
+
+	def stream(self, prompt: str, **kwargs):
+		raise NotImplementedError("stream method is not implemented yet.")
+
+	async def get_structured_result(self, prompt: str, output_cls, **kwargs):
+		logprobs = True
+		if self.llm.startswith("gpt-4.5"):
+			logprobs = False
+		response = await self.client.beta.chat.completions.parse(
+			model=self.llm,
+			messages=[
+				{"role": "user", "content": prompt},
+			],
+			response_format=output_cls,
+			logprobs=logprobs,
+			n=1,
+			**kwargs,
+		)
+		return response.choices[0].message.parsed
+
+	async def get_result(self, prompt: str, **kwargs):
+		# TODO: gpt-4.5-preview does not support logprobs. It should be fixed after the openai update.
+		logprobs = True
+		if self.llm.startswith("gpt-4.5"):
+			logprobs = False
+		response = await self.client.chat.completions.create(
+			model=self.llm,
+			messages=[
+				{"role": "user", "content": prompt},
+			],
+			logprobs=logprobs,
+			n=1,
+			**kwargs,
+		)
+		choice = response.choices[0]
+		answer = choice.message.content
+		# TODO: gpt-4.5-preview does not support logprobs. It should be fixed after the openai update.
+		if self.llm.startswith("gpt-4.5"):
+			tokens = self.tokenizer.encode(answer, allowed_special="all")
+			logprobs = [0.5] * len(tokens)
+			logger.warning("gpt-4.5-preview does not support logprobs yet.")
+		else:
+			logprobs = list(map(lambda x: x.logprob, choice.logprobs.content))
+			tokens = list(
+				map(
+					lambda x: self.tokenizer.encode(x.token, allowed_special="all")[0],
+					choice.logprobs.content,
+				)
+			)
+			assert len(tokens) == len(
+				logprobs
+			), "tokens and logprobs size is different."
+		return answer, tokens, logprobs
+
+	async def get_result_o1(self, prompt: str, **kwargs):
+		assert self.llm.startswith("o1") or self.llm.startswith(
+			"o3"
+		), "This function only supports o1 or o3 model."
+		# The default temperature for the o1 model is 1. 1 is only supported.
+		# See https://platform.openai.com/docs/guides/reasoning about beta limitation of o1 models.
+		kwargs["temperature"] = 1
+		kwargs["top_p"] = 1
+		kwargs["presence_penalty"] = 0
+		kwargs["frequency_penalty"] = 0
+		response = await self.client.chat.completions.create(
+			model=self.llm,
+			messages=[
+				{"role": "user", "content": prompt},
+			],
+			logprobs=False,
+			n=1,
+			**kwargs,
+		)
+		answer = response.choices[0].message.content
+		tokens = self.tokenizer.encode(answer, allowed_special="all")
+		pseudo_log_probs = [0.5] * len(tokens)
+		return answer, tokens, pseudo_log_probs
+
+
+def truncate_by_token(prompt: str, tokenizer: Encoding, max_token_size: int):
+	tokens = tokenizer.encode(prompt, allowed_special="all")
+	return tokenizer.decode(tokens[:max_token_size])
--- a/autorag/nodes/generator/run.py
+++ b/autorag/nodes/generator/run.py
@@ -0,0 +1,144 @@
+import os
+import pathlib
+from typing import List, Dict, Union
+
+import pandas as pd
+
+from autorag.evaluation import evaluate_generation
+from autorag.evaluation.util import cast_metrics
+from autorag.schema.metricinput import MetricInput
+from autorag.strategy import measure_speed, filter_by_threshold, select_best
+from autorag.utils.util import to_list
+
+
+def run_generator_node(
+	modules: List,
+	module_params: List[Dict],
+	previous_result: pd.DataFrame,
+	node_line_dir: str,
+	strategies: Dict,
+) -> pd.DataFrame:
+	"""
+	Run evaluation and select the best module among generator node results.
+	And save the results and summary to generator node directory.
+
+	:param modules: Generator modules to run.
+	:param module_params: Generator module parameters.
+	    Including node parameters, which is used for every module in this node.
+	:param previous_result: Previous result dataframe.
+	    Could be prompt maker node's result.
+	:param node_line_dir: This node line's directory.
+	:param strategies: Strategies for generator node.
+	:return: The best result dataframe.
+	    It contains previous result columns and generator node's result columns.
+	"""
+	if not os.path.exists(node_line_dir):
+		os.makedirs(node_line_dir)
+	project_dir = pathlib.PurePath(node_line_dir).parent.parent
+	node_dir = os.path.join(node_line_dir, "generator")  # node name
+	if not os.path.exists(node_dir):
+		os.makedirs(node_dir)
+	qa_data = pd.read_parquet(
+		os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
+	)
+	if "generation_gt" not in qa_data.columns:
+		raise ValueError("You must have 'generation_gt' column in qa.parquet.")
+
+	results, execution_times = zip(
+		*map(
+			lambda x: measure_speed(
+				x[0].run_evaluator,
+				project_dir=project_dir,
+				previous_result=previous_result,
+				**x[1],
+			),
+			zip(modules, module_params),
+		)
+	)
+	average_times = list(map(lambda x: x / len(results[0]), execution_times))
+
+	# get average token usage
+	token_usages = list(map(lambda x: x["generated_tokens"].apply(len).mean(), results))
+
+	# make rows to metric_inputs
+	generation_gt = to_list(qa_data["generation_gt"].tolist())
+
+	metric_inputs = [MetricInput(generation_gt=gen_gt) for gen_gt in generation_gt]
+
+	metric_names, metric_params = cast_metrics(strategies.get("metrics"))
+	if metric_names is None or len(metric_names) <= 0:
+		raise ValueError("You must at least one metrics for generator evaluation.")
+	results = list(
+		map(
+			lambda result: evaluate_generator_node(
+				result, metric_inputs, strategies.get("metrics")
+			),
+			results,
+		)
+	)
+
+	# save results to folder
+	filepaths = list(
+		map(lambda x: os.path.join(node_dir, f"{x}.parquet"), range(len(modules)))
+	)
+	list(
+		map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
+	)  # execute save to parquet
+	filenames = list(map(lambda x: os.path.basename(x), filepaths))
+
+	summary_df = pd.DataFrame(
+		{
+			"filename": filenames,
+			"module_name": list(map(lambda module: module.__name__, modules)),
+			"module_params": module_params,
+			"execution_time": average_times,
+			"average_output_token": token_usages,
+			**{
+				metric: list(map(lambda x: x[metric].mean(), results))
+				for metric in metric_names
+			},
+		}
+	)
+
+	# filter by strategies
+	if strategies.get("speed_threshold") is not None:
+		results, filenames = filter_by_threshold(
+			results, average_times, strategies["speed_threshold"], filenames
+		)
+	if strategies.get("token_threshold") is not None:
+		results, filenames = filter_by_threshold(
+			results, token_usages, strategies["token_threshold"], filenames
+		)
+	selected_result, selected_filename = select_best(
+		results, metric_names, filenames, strategies.get("strategy", "mean")
+	)
+	best_result = pd.concat([previous_result, selected_result], axis=1)
+
+	# add 'is_best' column at summary file
+	summary_df["is_best"] = summary_df["filename"] == selected_filename
+
+	# save files
+	summary_df.to_csv(os.path.join(node_dir, "summary.csv"), index=False)
+	best_result.to_parquet(
+		os.path.join(
+			node_dir, f"best_{os.path.splitext(selected_filename)[0]}.parquet"
+		),
+		index=False,
+	)
+	return best_result
+
+
+def evaluate_generator_node(
+	result_df: pd.DataFrame,
+	metric_inputs: List[MetricInput],
+	metrics: Union[List[str], List[Dict]],
+):
+	@evaluate_generation(metric_inputs=metric_inputs, metrics=metrics)
+	def evaluate_generation_module(df: pd.DataFrame):
+		return (
+			df["generated_texts"].tolist(),
+			df["generated_tokens"].tolist(),
+			df["generated_log_probs"].tolist(),
+		)
+
+	return evaluate_generation_module(result_df)
--- a/autorag/nodes/generator/vllm.py
+++ b/autorag/nodes/generator/vllm.py
@@ -0,0 +1,121 @@
+import gc
+from copy import deepcopy
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.generator.base import BaseGenerator
+from autorag.utils import result_to_dataframe
+from autorag.utils.util import pop_params, to_list
+
+
+class Vllm(BaseGenerator):
+	def __init__(self, project_dir: str, llm: str, **kwargs):
+		super().__init__(project_dir, llm, **kwargs)
+		try:
+			from vllm import SamplingParams, LLM
+		except ImportError:
+			raise ImportError(
+				"Please install vllm library. You can install it by running `pip install vllm`."
+			)
+
+		model_from_kwargs = kwargs.pop("model", None)
+		model = llm if model_from_kwargs is None else model_from_kwargs
+
+		input_kwargs = deepcopy(kwargs)
+		sampling_params_init_params = pop_params(
+			SamplingParams.from_optional, input_kwargs
+		)
+		self.vllm_model = LLM(model, **input_kwargs)
+
+		# delete not sampling param keys in the kwargs
+		kwargs_keys = list(kwargs.keys())
+		for key in kwargs_keys:
+			if key not in sampling_params_init_params:
+				kwargs.pop(key)
+
+	def __del__(self):
+		try:
+			import torch
+			import contextlib
+
+			if torch.cuda.is_available():
+				from vllm.distributed.parallel_state import (
+					destroy_model_parallel,
+					destroy_distributed_environment,
+				)
+
+				destroy_model_parallel()
+				destroy_distributed_environment()
+				del self.vllm_model.llm_engine.model_executor
+				del self.vllm_model
+				with contextlib.suppress(AssertionError):
+					torch.distributed.destroy_process_group()
+				gc.collect()
+				torch.cuda.empty_cache()
+				torch.cuda.synchronize()
+		except ImportError:
+			del self.vllm_model
+
+		super().__del__()
+
+	@result_to_dataframe(["generated_texts", "generated_tokens", "generated_log_probs"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		prompts = self.cast_to_run(previous_result)
+		return self._pure(prompts, **kwargs)
+
+	def _pure(
+		self, prompts: List[str], **kwargs
+	) -> Tuple[List[str], List[List[int]], List[List[float]]]:
+		"""
+		Vllm module.
+		It gets the VLLM instance and returns generated texts by the input prompt.
+		You can set logprobs to get the log probs of the generated text.
+		Default logprobs is 1.
+
+		:param prompts: A list of prompts.
+		:param kwargs: The extra parameters for generating the text.
+		:return: A tuple of three elements.
+		    The first element is a list of generated text.
+		    The second element is a list of generated text's token ids.
+		    The third element is a list of generated text's log probs.
+		"""
+		try:
+			from vllm.outputs import RequestOutput
+			from vllm.sequence import SampleLogprobs
+			from vllm import SamplingParams
+		except ImportError:
+			raise ImportError(
+				"Please install vllm library. You can install it by running `pip install vllm`."
+			)
+
+		if "logprobs" not in kwargs:
+			kwargs["logprobs"] = 1
+
+		sampling_params = pop_params(SamplingParams.from_optional, kwargs)
+		generate_params = SamplingParams(**sampling_params)
+		results: List[RequestOutput] = self.vllm_model.generate(
+			prompts, generate_params
+		)
+		generated_texts = list(map(lambda x: x.outputs[0].text, results))
+		generated_token_ids = list(map(lambda x: x.outputs[0].token_ids, results))
+		log_probs: List[SampleLogprobs] = list(
+			map(lambda x: x.outputs[0].logprobs, results)
+		)
+		generated_log_probs = list(
+			map(
+				lambda x: list(map(lambda y: y[0][y[1]].logprob, zip(x[0], x[1]))),
+				zip(log_probs, generated_token_ids),
+			)
+		)
+		return (
+			to_list(generated_texts),
+			to_list(generated_token_ids),
+			to_list(generated_log_probs),
+		)
+
+	async def astream(self, prompt: str, **kwargs):
+		raise NotImplementedError
+
+	def stream(self, prompt: str, **kwargs):
+		raise NotImplementedError
--- a/autorag/nodes/generator/vllm_api.py
+++ b/autorag/nodes/generator/vllm_api.py
@@ -0,0 +1,176 @@
+import logging
+from typing import List, Tuple
+import time
+
+import pandas as pd
+import requests
+from asyncio import to_thread
+
+from autorag.nodes.generator.base import BaseGenerator
+from autorag.utils.util import get_event_loop, process_batch, result_to_dataframe
+
+logger = logging.getLogger("AutoRAG")
+
+DEFAULT_MAX_TOKENS = 4096  # Default token limit
+
+
+class VllmAPI(BaseGenerator):
+	def __init__(
+		self,
+		project_dir,
+		llm: str,
+		uri: str,
+		max_tokens: int = None,
+		batch: int = 16,
+		*args,
+		**kwargs,
+	):
+		"""
+		VLLM API Wrapper for OpenAI-compatible chat/completions format.
+
+		:param project_dir: Project directory.
+		:param llm: Model name (e.g., LLaMA model).
+		:param uri: VLLM API server URI.
+		:param max_tokens: Maximum token limit.
+		    Default is 4096.
+		:param batch: Request batch size.
+		    Default is 16.
+		"""
+		super().__init__(project_dir, llm, *args, **kwargs)
+		assert batch > 0, "Batch size must be greater than 0."
+		self.uri = uri.rstrip("/")  # Set API URI
+		self.batch = batch
+		# Use the provided max_tokens if available, otherwise use the default
+		self.max_token_size = max_tokens if max_tokens else DEFAULT_MAX_TOKENS
+		self.max_model_len = self.get_max_model_length()
+		logger.info(f"{llm} max model length: {self.max_model_len}")
+
+	@result_to_dataframe(["generated_texts", "generated_tokens", "generated_log_probs"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		prompts = self.cast_to_run(previous_result)
+		return self._pure(prompts, **kwargs)
+
+	def _pure(
+		self, prompts: List[str], truncate: bool = True, **kwargs
+	) -> Tuple[List[str], List[List[int]], List[List[float]]]:
+		"""
+		Method to call the VLLM API to generate text.
+
+		:param prompts: List of input prompts.
+		:param truncate: Whether to truncate input prompts to fit within the token limit.
+		:param kwargs: Additional options (e.g., temperature, top_p).
+		:return: Generated text, token lists, and log probability lists.
+		"""
+		if kwargs.get("logprobs") is not None:
+			kwargs.pop("logprobs")
+			logger.warning(
+				"parameter logprob does not effective. It always set to True."
+			)
+		if kwargs.get("n") is not None:
+			kwargs.pop("n")
+			logger.warning("parameter n does not effective. It always set to 1.")
+
+		if truncate:
+			prompts = list(map(lambda p: self.truncate_by_token(p), prompts))
+		loop = get_event_loop()
+		tasks = [to_thread(self.get_result, prompt, **kwargs) for prompt in prompts]
+		results = loop.run_until_complete(process_batch(tasks, self.batch))
+
+		answer_result = list(map(lambda x: x[0], results))
+		token_result = list(map(lambda x: x[1], results))
+		logprob_result = list(map(lambda x: x[2], results))
+		return answer_result, token_result, logprob_result
+
+	def truncate_by_token(self, prompt: str) -> str:
+		"""
+		Function to truncate prompts to fit within the maximum token limit.
+		"""
+		tokens = self.encoding_for_model(prompt)["tokens"]  # Simple tokenization
+		return self.decoding_for_model(tokens[: self.max_model_len])["prompt"]
+
+	def call_vllm_api(self, prompt: str, **kwargs) -> dict:
+		"""
+		Calls the VLLM API to get chat/completions responses.
+
+		:param prompt: Input prompt.
+		:param kwargs: Additional API options (e.g., temperature, max_tokens).
+		:return: API response.
+		"""
+		payload = {
+			"model": self.llm,
+			"messages": [{"role": "user", "content": prompt}],
+			"temperature": kwargs.get("temperature", 0.4),
+			"max_tokens": min(
+				kwargs.get("max_tokens", self.max_token_size), self.max_token_size
+			),
+			"logprobs": True,
+			"n": 1,
+		}
+		start_time = time.time()  # Record request start time
+		response = requests.post(f"{self.uri}/v1/chat/completions", json=payload)
+		end_time = time.time()  # Record request end time
+
+		response.raise_for_status()
+		elapsed_time = end_time - start_time  # Calculate elapsed time
+		logger.info(
+			f"Request chat completions to vllm server completed in {elapsed_time:.2f} seconds"
+		)
+		return response.json()
+
+	# Additional method: abstract method implementation
+	async def astream(self, prompt: str, **kwargs):
+		"""
+		Asynchronous streaming method not implemented.
+		"""
+		raise NotImplementedError("astream method is not implemented for VLLM API yet.")
+
+	def stream(self, prompt: str, **kwargs):
+		"""
+		Synchronous streaming method not implemented.
+		"""
+		raise NotImplementedError("stream method is not implemented for VLLM API yet.")
+
+	def get_result(self, prompt: str, **kwargs):
+		response = self.call_vllm_api(prompt, **kwargs)
+		choice = response["choices"][0]
+		answer = choice["message"]["content"]
+
+		# Handle cases where logprobs is None
+		if choice.get("logprobs") and "content" in choice["logprobs"]:
+			logprobs = list(map(lambda x: x["logprob"], choice["logprobs"]["content"]))
+			tokens = list(
+				map(
+					lambda x: self.encoding_for_model(x["token"])["tokens"],
+					choice["logprobs"]["content"],
+				)
+			)
+		else:
+			logprobs = []
+			tokens = []
+
+		return answer, tokens, logprobs
+
+	def encoding_for_model(self, answer_piece: str):
+		payload = {
+			"model": self.llm,
+			"prompt": answer_piece,
+			"add_special_tokens": True,
+		}
+		response = requests.post(f"{self.uri}/tokenize", json=payload)
+		response.raise_for_status()
+		return response.json()
+
+	def decoding_for_model(self, tokens: list[int]):
+		payload = {
+			"model": self.llm,
+			"tokens": tokens,
+		}
+		response = requests.post(f"{self.uri}/detokenize", json=payload)
+		response.raise_for_status()
+		return response.json()
+
+	def get_max_model_length(self):
+		response = requests.get(f"{self.uri}/v1/models")
+		response.raise_for_status()
+		json_data = response.json()
+		return json_data["data"][0]["max_model_len"]
--- a/autorag/nodes/passageaugmenter/init.py
+++ b/autorag/nodes/passageaugmenter/init.py
@@ -0,0 +1,2 @@
+from .pass_passage_augmenter import PassPassageAugmenter
+from .prev_next_augmenter import PrevNextPassageAugmenter
--- a/autorag/nodes/passageaugmenter/base.py
+++ b/autorag/nodes/passageaugmenter/base.py
@@ -0,0 +1,80 @@
+import abc
+import logging
+import os
+
+import pandas as pd
+
+from autorag.schema import BaseModule
+from autorag.utils import (
+	validate_qa_dataset,
+	sort_by_scores,
+	validate_corpus_dataset,
+	cast_corpus_dataset,
+)
+from autorag.utils.util import select_top_k
+
+logger = logging.getLogger("AutoRAG")
+
+
+class BasePassageAugmenter(BaseModule, metaclass=abc.ABCMeta):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		logger.info(
+			f"Initialize passage augmenter node - {self.__class__.__name__} module..."
+		)
+		data_dir = os.path.join(project_dir, "data")
+		corpus_df = pd.read_parquet(
+			os.path.join(data_dir, "corpus.parquet"), engine="pyarrow"
+		)
+		validate_corpus_dataset(corpus_df)
+		corpus_df = cast_corpus_dataset(corpus_df)
+		self.corpus_df = corpus_df
+
+	def __del__(self):
+		logger.info(
+			f"Initialize passage augmenter node - {self.__class__.__name__} module..."
+		)
+
+	def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
+		logger.info(
+			f"Running passage augmenter node - {self.__class__.__name__} module..."
+		)
+		validate_qa_dataset(previous_result)
+
+		# find ids columns
+		assert (
+			"retrieved_ids" in previous_result.columns
+		), "previous_result must have retrieved_ids column."
+		ids = previous_result["retrieved_ids"].tolist()
+
+		return ids
+
+	@staticmethod
+	def sort_by_scores(
+		augmented_contents,
+		augmented_ids,
+		augmented_scores,
+		top_k: int,
+		reverse: bool = True,
+	):
+		# sort by scores
+		df = pd.DataFrame(
+			{
+				"contents": augmented_contents,
+				"ids": augmented_ids,
+				"scores": augmented_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			lambda row: sort_by_scores(row, reverse=reverse),
+			axis=1,
+			result_type="expand",
+		)
+
+		# select by top_k
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
--- a/autorag/nodes/passageaugmenter/pass_passage_augmenter.py
+++ b/autorag/nodes/passageaugmenter/pass_passage_augmenter.py
@@ -0,0 +1,43 @@
+from typing import List
+
+import pandas as pd
+
+from autorag.nodes.passageaugmenter.base import BasePassageAugmenter
+from autorag.utils import result_to_dataframe
+
+
+class PassPassageAugmenter(BasePassageAugmenter):
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		"""
+		Run the passage augmenter node - PassPassageAugmenter module.
+
+		:param previous_result: The previous result Dataframe.
+		:param top_k: You must input the top_k value to get the top k results.
+		:param kwargs: Not affected.
+		:return: DataFrame with retrieved_contents, retrieved_ids, and retrieve_scores columns
+		"""
+		top_k = kwargs.pop("top_k")
+
+		ids = self.cast_to_run(previous_result)
+		contents = previous_result["retrieved_contents"].tolist()
+		scores = previous_result["retrieve_scores"].tolist()
+
+		augmented_ids, augmented_contents, augmented_scores = self._pure(
+			ids, contents, scores
+		)
+		return self.sort_by_scores(
+			augmented_contents, augmented_ids, augmented_scores, top_k
+		)
+
+	def _pure(
+		self,
+		ids_list: List[List[str]],
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+	):
+		"""
+		Do not perform augmentation.
+		Return given passages, scores, and ids as is.
+		"""
+		return ids_list, contents_list, scores_list
--- a/autorag/nodes/passageaugmenter/prev_next_augmenter.py
+++ b/autorag/nodes/passageaugmenter/prev_next_augmenter.py
@@ -0,0 +1,155 @@
+from typing import List, Union
+
+import numpy as np
+import pandas as pd
+
+from autorag.embedding.base import EmbeddingModel
+from autorag.evaluation.metric.util import calculate_cosine_similarity
+from autorag.nodes.passageaugmenter.base import BasePassageAugmenter
+from autorag.utils.util import (
+	filter_dict_keys,
+	fetch_contents,
+	embedding_query_content,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class PrevNextPassageAugmenter(BasePassageAugmenter):
+	def __init__(
+		self,
+		project_dir: str,
+		embedding_model: Union[str, dict] = "openai",
+		*args,
+		**kwargs,
+	):
+		"""
+		Initialize the PrevNextPassageAugmenter module.
+
+		:param project_dir:
+		:param embedding_model: The embedding model name to use for calculating cosine similarity
+			Default is openai (text-embedding-ada-002)
+		:param kwargs:
+		"""
+		super().__init__(project_dir, *args, **kwargs)
+		slim_corpus_df = self.corpus_df[["doc_id", "metadata"]]
+		slim_corpus_df.loc[:, "metadata"] = slim_corpus_df["metadata"].apply(
+			filter_dict_keys, keys=["prev_id", "next_id"]
+		)
+		self.slim_corpus_df = slim_corpus_df
+
+		# init embedding model
+		self.embedding_model = EmbeddingModel.load(embedding_model)()
+
+	def __del__(self):
+		del self.embedding_model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		"""
+		Run the passage augmenter node - PrevNextPassageAugmenter module.
+
+		:param previous_result: The previous result Dataframe.
+		:param top_k: You must input the top_k value to get the top k results.
+		:param kwargs: Not affected.
+		:return: DataFrame with retrieved_contents, retrieved_ids, and retrieve_scores columns
+		"""
+		top_k = kwargs.pop("top_k")
+
+		ids = self.cast_to_run(previous_result)
+		# find queries columns
+		assert (
+			"query" in previous_result.columns
+		), "previous_result must have query column."
+		queries = previous_result["query"].tolist()
+
+		mode = kwargs.pop("mode", "both")
+		num_passages = kwargs.pop("num_passages", 1)
+		augmented_ids = self._pure(ids, num_passages, mode)
+
+		# fetch contents from corpus to use augmented ids
+		augmented_contents = fetch_contents(self.corpus_df, augmented_ids)
+
+		query_embeddings, contents_embeddings = embedding_query_content(
+			queries, augmented_contents, self.embedding_model, batch=128
+		)
+
+		# get scores from calculated cosine similarity
+		augmented_scores = [
+			np.array(
+				[
+					calculate_cosine_similarity(query_embedding, x)
+					for x in content_embeddings
+				]
+			).tolist()
+			for query_embedding, content_embeddings in zip(
+				query_embeddings, contents_embeddings
+			)
+		]
+		return self.sort_by_scores(
+			augmented_contents, augmented_ids, augmented_scores, top_k
+		)
+
+	def _pure(
+		self,
+		ids_list: List[List[str]],
+		num_passages: int = 1,
+		mode: str = "both",
+	) -> List[List[str]]:
+		"""
+		Add passages before and/or after the retrieved passage.
+		For more information, visit https://docs.llamaindex.ai/en/stable/examples/node_postprocessor/PrevNextPostprocessorDemo/.
+
+		:param ids_list: The list of lists of ids retrieved
+		:param num_passages: The number of passages to add before and after the retrieved passage
+		    Default is 1.
+		:param mode: The mode of augmentation
+		    'prev': add passages before the retrieved passage
+		    'next': add passages after the retrieved passage
+		    'both': add passages before and after the retrieved passage
+		    Default is 'next'.
+		:return: The list of lists of augmented ids
+		"""
+		if mode not in ["prev", "next", "both"]:
+			raise ValueError(f"mode must be 'prev', 'next', or 'both', but got {mode}")
+
+		augmented_ids = [
+			(
+				lambda ids: prev_next_augmenter_pure(
+					ids, self.slim_corpus_df, mode, num_passages
+				)
+			)(ids)
+			for ids in ids_list
+		]
+
+		return augmented_ids
+
+
+def prev_next_augmenter_pure(
+	ids: List[str], corpus_df: pd.DataFrame, mode: str, num_passages: int
+):
+	def fetch_id_sequence(start_id, key):
+		sequence = []
+		current_id = start_id
+		for _ in range(num_passages):
+			current_id = (
+				corpus_df.loc[corpus_df["doc_id"] == current_id]["metadata"]
+				.values[0]
+				.get(key)
+			)
+			if current_id is None:
+				break
+			sequence.append(current_id)
+		return sequence
+
+	augmented_group = []
+	for id_ in ids:
+		current_ids = [id_]
+		if mode in ["prev", "both"]:
+			current_ids = fetch_id_sequence(id_, "prev_id")[::-1] + current_ids
+		if mode in ["next", "both"]:
+			current_ids += fetch_id_sequence(id_, "next_id")
+		augmented_group.extend(current_ids)
+	return augmented_group
--- a/autorag/nodes/passageaugmenter/run.py
+++ b/autorag/nodes/passageaugmenter/run.py
@@ -0,0 +1,131 @@
+import logging
+import os
+import pathlib
+from typing import List, Dict
+
+import pandas as pd
+
+from autorag.nodes.retrieval.run import evaluate_retrieval_node
+from autorag.schema.metricinput import MetricInput
+from autorag.strategy import measure_speed, filter_by_threshold, select_best
+from autorag.utils.util import apply_recursive, to_list
+
+logger = logging.getLogger("AutoRAG")
+
+
+def run_passage_augmenter_node(
+	modules: List,
+	module_params: List[Dict],
+	previous_result: pd.DataFrame,
+	node_line_dir: str,
+	strategies: Dict,
+) -> pd.DataFrame:
+	if not os.path.exists(node_line_dir):
+		os.makedirs(node_line_dir)
+	project_dir = pathlib.PurePath(node_line_dir).parent.parent
+	qa_df = pd.read_parquet(
+		os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
+	)
+	retrieval_gt = qa_df["retrieval_gt"].tolist()
+	retrieval_gt = apply_recursive(lambda x: str(x), to_list(retrieval_gt))
+
+	results, execution_times = zip(
+		*map(
+			lambda task: measure_speed(
+				task[0].run_evaluator,
+				project_dir=project_dir,
+				previous_result=previous_result,
+				**task[1],
+			),
+			zip(modules, module_params),
+		)
+	)
+	average_times = list(map(lambda x: x / len(results[0]), execution_times))
+	metric_inputs = [
+		MetricInput(retrieval_gt=ret_gt, query=query, generation_gt=gen_gt)
+		for ret_gt, query, gen_gt in zip(
+			retrieval_gt,
+			previous_result["query"].tolist(),
+			previous_result["generation_gt"].tolist(),
+		)
+	]
+
+	# run metrics before filtering
+	if strategies.get("metrics") is None:
+		raise ValueError(
+			"You must at least one metrics for passage_augmenter evaluation."
+		)
+	results = list(
+		map(
+			lambda x: evaluate_retrieval_node(
+				x,
+				metric_inputs,
+				strategies.get("metrics"),
+			),
+			results,
+		)
+	)
+
+	# save results to folder
+	save_dir = os.path.join(node_line_dir, "passage_augmenter")  # node name
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+	filepaths = list(
+		map(lambda x: os.path.join(save_dir, f"{x}.parquet"), range(len(modules)))
+	)
+	list(
+		map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
+	)  # execute save to parquet
+	filenames = list(map(lambda x: os.path.basename(x), filepaths))
+
+	summary_df = pd.DataFrame(
+		{
+			"filename": filenames,
+			"module_name": list(map(lambda module: module.__name__, modules)),
+			"module_params": module_params,
+			"execution_time": average_times,
+			**{
+				f"passage_augmenter_{metric}": list(
+					map(lambda result: result[metric].mean(), results)
+				)
+				for metric in strategies.get("metrics")
+			},
+		}
+	)
+
+	# filter by strategies
+	if strategies.get("speed_threshold") is not None:
+		results, filenames = filter_by_threshold(
+			results, average_times, strategies["speed_threshold"], filenames
+		)
+	selected_result, selected_filename = select_best(
+		results,
+		strategies.get("metrics"),
+		filenames,
+		strategies.get("strategy", "mean"),
+	)
+	# change metric name columns to passage_augmenter_metric_name
+	selected_result = selected_result.rename(
+		columns={
+			metric_name: f"passage_augmenter_{metric_name}"
+			for metric_name in strategies["metrics"]
+		}
+	)
+	# drop retrieval result columns in previous_result
+	previous_result = previous_result.drop(
+		columns=["retrieved_contents", "retrieved_ids", "retrieve_scores"]
+	)
+	best_result = pd.concat([previous_result, selected_result], axis=1)
+
+	# add 'is_best' column to summary file
+	summary_df["is_best"] = summary_df["filename"] == selected_filename
+
+	# save files
+	summary_df.to_csv(os.path.join(save_dir, "summary.csv"), index=False)
+	best_result.to_parquet(
+		os.path.join(
+			save_dir, f"best_{os.path.splitext(selected_filename)[0]}.parquet"
+		),
+		index=False,
+	)
+	return best_result
--- a/autorag/nodes/passagecompressor/init.py
+++ b/autorag/nodes/passagecompressor/init.py
@@ -0,0 +1,4 @@
+from .longllmlingua import LongLLMLingua
+from .pass_compressor import PassCompressor
+from .refine import Refine
+from .tree_summarize import TreeSummarize
--- a/autorag/nodes/passagecompressor/base.py
+++ b/autorag/nodes/passagecompressor/base.py
@@ -0,0 +1,83 @@
+import abc
+import logging
+from typing import Dict
+
+import pandas as pd
+from llama_index.core.llms import LLM
+
+from autorag import generator_models
+from autorag.schema import BaseModule
+from autorag.utils import result_to_dataframe
+
+logger = logging.getLogger("AutoRAG")
+
+
+class BasePassageCompressor(BaseModule, metaclass=abc.ABCMeta):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		logger.info(
+			f"Initialize passage compressor node - {self.__class__.__name__} module..."
+		)
+
+	def __del__(self):
+		logger.info(
+			f"Deleting passage compressor node - {self.__class__.__name__} module..."
+		)
+
+	def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
+		logger.info(
+			f"Running passage compressor node - {self.__class__.__name__} module..."
+		)
+		assert all(
+			[
+				column in previous_result.columns
+				for column in [
+					"query",
+					"retrieved_contents",
+				]
+			]
+		), "previous_result must have retrieved_contents, retrieved_ids, and retrieve_scores columns."
+		assert len(previous_result) > 0, "previous_result must have at least one row."
+
+		queries = previous_result["query"].tolist()
+		retrieved_contents = previous_result["retrieved_contents"].tolist()
+		return queries, retrieved_contents
+
+
+class LlamaIndexCompressor(BasePassageCompressor, metaclass=abc.ABCMeta):
+	param_list = ["prompt", "chat_prompt", "batch"]
+
+	def __init__(self, project_dir: str, **kwargs):
+		"""
+		Initialize passage compressor module.
+
+		:param project_dir: The project directory
+		:param llm: The llm name that will be used to summarize.
+			The LlamaIndex LLM model can be used in here.
+		:param kwargs: Extra parameter for init llm
+		"""
+		super().__init__(project_dir)
+		kwargs_dict = dict(
+			filter(lambda x: x[0] not in self.param_list, kwargs.items())
+		)
+		llm_name = kwargs_dict.pop("llm")
+		self.llm: LLM = make_llm(llm_name, kwargs_dict)
+
+	def __del__(self):
+		del self.llm
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, retrieved_contents = self.cast_to_run(previous_result)
+		param_dict = dict(filter(lambda x: x[0] in self.param_list, kwargs.items()))
+		result = self._pure(queries, retrieved_contents, **param_dict)
+		return list(map(lambda x: [x], result))
+
+
+def make_llm(llm_name: str, kwargs: Dict) -> LLM:
+	if llm_name not in generator_models:
+		raise KeyError(
+			f"{llm_name} is not supported. "
+			"You can add it manually by calling autorag.generator_models."
+		)
+	return generator_models[llm_name](**kwargs)
--- a/autorag/nodes/passagecompressor/longllmlingua.py
+++ b/autorag/nodes/passagecompressor/longllmlingua.py
@@ -0,0 +1,115 @@
+from typing import List, Optional
+
+import pandas as pd
+
+from autorag.nodes.passagecompressor.base import BasePassageCompressor
+from autorag.utils.util import pop_params, result_to_dataframe, empty_cuda_cache
+
+
+# TODO: Parallel Processing Refactoring at #460
+
+
+class LongLLMLingua(BasePassageCompressor):
+	def __init__(
+		self, project_dir: str, model_name: str = "NousResearch/Llama-2-7b-hf", **kwargs
+	):
+		try:
+			from llmlingua import PromptCompressor
+		except ImportError:
+			raise ImportError(
+				"LongLLMLingua is not installed. Please install it by running `pip install llmlingua`."
+			)
+
+		super().__init__(project_dir)
+		model_init_params = pop_params(PromptCompressor.__init__, kwargs)
+		self.llm_lingua = PromptCompressor(model_name=model_name, **model_init_params)
+
+	def __del__(self):
+		del self.llm_lingua
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, retrieved_contents = self.cast_to_run(previous_result)
+		results = self._pure(queries, retrieved_contents, **kwargs)
+		return list(map(lambda x: [x], results))
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents: List[List[str]],
+		instructions: Optional[str] = None,
+		target_token: int = 300,
+		**kwargs,
+	) -> List[str]:
+		"""
+		Compresses the retrieved texts using LongLLMLingua.
+		For more information, visit https://github.com/microsoft/LLMLingua.
+
+		:param queries: The queries for retrieved passages.
+		:param contents: The contents of retrieved passages.
+		:param model_name: The model name to use for compression.
+		    The default is "NousResearch/Llama-2-7b-hf".
+		:param instructions: The instructions for compression.
+		    Default is None. When it is None, it will use default instructions.
+		:param target_token: The target token for compression.
+		    Default is 300.
+		:param kwargs: Additional keyword arguments.
+		:return: The list of compressed texts.
+		"""
+		if instructions is None:
+			instructions = "Given the context, please answer the final question"
+		results = [
+			llmlingua_pure(
+				query, contents_, self.llm_lingua, instructions, target_token, **kwargs
+			)
+			for query, contents_ in zip(queries, contents)
+		]
+
+		return results
+
+
+def llmlingua_pure(
+	query: str,
+	contents: List[str],
+	llm_lingua,
+	instructions: str,
+	target_token: int = 300,
+	**kwargs,
+) -> str:
+	"""
+	Return the compressed text.
+
+	:param query: The query for retrieved passages.
+	:param contents: The contents of retrieved passages.
+	:param llm_lingua: The llm instance, that will be used to compress.
+	:param instructions: The instructions for compression.
+	:param target_token: The target token for compression.
+	    Default is 300.
+	:param kwargs: Additional keyword arguments.
+	:return: The compressed text.
+	"""
+	try:
+		from llmlingua import PromptCompressor
+	except ImportError:
+		raise ImportError(
+			"LongLLMLingua is not installed. Please install it by running `pip install llmlingua`."
+		)
+	# split by "\n\n" (recommended by LongLLMLingua authors)
+	new_context_texts = [c for context in contents for c in context.split("\n\n")]
+	compress_prompt_params = pop_params(PromptCompressor.compress_prompt, kwargs)
+	compressed_prompt = llm_lingua.compress_prompt(
+		new_context_texts,
+		question=query,
+		instruction=instructions,
+		rank_method="longllmlingua",
+		target_token=target_token,
+		**compress_prompt_params,
+	)
+	compressed_prompt_txt = compressed_prompt["compressed_prompt"]
+
+	# separate out the question and instruction
+	result = "\n\n".join(compressed_prompt_txt.split("\n\n")[1:-1])
+
+	return result
--- a/autorag/nodes/passagecompressor/pass_compressor.py
+++ b/autorag/nodes/passagecompressor/pass_compressor.py
@@ -0,0 +1,16 @@
+from typing import List
+
+import pandas as pd
+
+from autorag.nodes.passagecompressor.base import BasePassageCompressor
+from autorag.utils import result_to_dataframe
+
+
+class PassCompressor(BasePassageCompressor):
+	@result_to_dataframe(["retrieved_contents"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		_, contents = self.cast_to_run(previous_result)
+		return self._pure(contents)
+
+	def _pure(self, contents: List[List[str]]):
+		return contents
--- a/autorag/nodes/passagecompressor/refine.py
+++ b/autorag/nodes/passagecompressor/refine.py
@@ -0,0 +1,54 @@
+from typing import List, Optional
+
+from llama_index.core import PromptTemplate
+from llama_index.core.prompts import PromptType
+from llama_index.core.prompts.utils import is_chat_model
+from llama_index.core.response_synthesizers import Refine as rf
+
+from autorag.nodes.passagecompressor.base import LlamaIndexCompressor
+from autorag.utils.util import get_event_loop, process_batch
+
+
+class Refine(LlamaIndexCompressor):
+	def _pure(
+		self,
+		queries: List[str],
+		contents: List[List[str]],
+		prompt: Optional[str] = None,
+		chat_prompt: Optional[str] = None,
+		batch: int = 16,
+	) -> List[str]:
+		"""
+		Refine a response to a query across text chunks.
+		This function is a wrapper for llama_index.response_synthesizers.Refine.
+		For more information, visit https://docs.llamaindex.ai/en/stable/examples/response_synthesizers/refine/.
+
+		:param queries: The queries for retrieved passages.
+		:param contents: The contents of retrieved passages.
+		:param prompt: The prompt template for refine.
+		    If you want to use chat prompt, you should pass chat_prompt instead.
+		    At prompt, you must specify where to put 'context_msg' and 'query_str'.
+		    Default is None. When it is None, it will use llama index default prompt.
+		:param chat_prompt: The chat prompt template for refine.
+		    If you want to use normal prompt, you should pass prompt instead.
+		    At prompt, you must specify where to put 'context_msg' and 'query_str'.
+		    Default is None. When it is None, it will use llama index default chat prompt.
+		:param batch: The batch size for llm.
+		    Set low if you face some errors.
+		    Default is 16.
+		:return: The list of compressed texts.
+		"""
+		if prompt is not None and not is_chat_model(self.llm):
+			refine_template = PromptTemplate(prompt, prompt_type=PromptType.REFINE)
+		elif chat_prompt is not None and is_chat_model(self.llm):
+			refine_template = PromptTemplate(chat_prompt, prompt_type=PromptType.REFINE)
+		else:
+			refine_template = None
+		summarizer = rf(llm=self.llm, refine_template=refine_template, verbose=True)
+		tasks = [
+			summarizer.aget_response(query, content)
+			for query, content in zip(queries, contents)
+		]
+		loop = get_event_loop()
+		results = loop.run_until_complete(process_batch(tasks, batch_size=batch))
+		return results
--- a/autorag/nodes/passagecompressor/run.py
+++ b/autorag/nodes/passagecompressor/run.py
@@ -0,0 +1,186 @@
+import os.path
+import pathlib
+from typing import List, Dict
+
+import pandas as pd
+
+from autorag.evaluation.metric import (
+	retrieval_token_recall,
+	retrieval_token_precision,
+	retrieval_token_f1,
+)
+from autorag.schema.metricinput import MetricInput
+from autorag.strategy import measure_speed, filter_by_threshold, select_best
+from autorag.utils.util import fetch_contents
+
+
+def run_passage_compressor_node(
+	modules: List,
+	module_params: List[Dict],
+	previous_result: pd.DataFrame,
+	node_line_dir: str,
+	strategies: Dict,
+) -> pd.DataFrame:
+	"""
+	Run evaluation and select the best module among passage compressor modules.
+
+	:param modules: Passage compressor modules to run.
+	:param module_params: Passage compressor module parameters.
+	:param previous_result: Previous result dataframe.
+	    Could be retrieval, reranker modules result.
+	    It means it must contain 'query', 'retrieved_contents', 'retrieved_ids', 'retrieve_scores' columns.
+	:param node_line_dir: This node line's directory.
+	:param strategies: Strategies for passage compressor node.
+	    In this node, we use
+	    You can skip evaluation when you use only one module and a module parameter.
+	:return: The best result dataframe with previous result columns.
+	    This node will replace 'retrieved_contents' to compressed passages, so its length will be one.
+	"""
+	if not os.path.exists(node_line_dir):
+		os.makedirs(node_line_dir)
+	project_dir = pathlib.PurePath(node_line_dir).parent.parent
+	data_dir = os.path.join(project_dir, "data")
+	save_dir = os.path.join(node_line_dir, "passage_compressor")
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+
+	# make retrieval contents gt
+	qa_data = pd.read_parquet(os.path.join(data_dir, "qa.parquet"), engine="pyarrow")
+	corpus_data = pd.read_parquet(
+		os.path.join(data_dir, "corpus.parquet"), engine="pyarrow"
+	)
+	# check qa_data have retrieval_gt
+	assert all(
+		len(x[0]) > 0 for x in qa_data["retrieval_gt"].tolist()
+	), "Can't use passage compressor if you don't have retrieval gt values in QA dataset."
+
+	# run modules
+	results, execution_times = zip(
+		*map(
+			lambda task: measure_speed(
+				task[0].run_evaluator,
+				project_dir=project_dir,
+				previous_result=previous_result,
+				**task[1],
+			),
+			zip(modules, module_params),
+		)
+	)
+	results = list(results)
+	average_times = list(map(lambda x: x / len(results[0]), execution_times))
+
+	retrieval_gt_contents = list(
+		map(lambda x: fetch_contents(corpus_data, x), qa_data["retrieval_gt"].tolist())
+	)
+
+	metric_inputs = [
+		MetricInput(retrieval_gt_contents=ret_cont_gt)
+		for ret_cont_gt in retrieval_gt_contents
+	]
+
+	# run metrics before filtering
+	if strategies.get("metrics") is None:
+		raise ValueError(
+			"You must at least one metrics for retrieval contents evaluation."
+			"It can be 'retrieval_token_f1', 'retrieval_token_precision', 'retrieval_token_recall'."
+		)
+	results = list(
+		map(
+			lambda x: evaluate_passage_compressor_node(
+				x, metric_inputs, strategies.get("metrics")
+			),
+			results,
+		)
+	)
+
+	# save results to folder
+	filepaths = list(
+		map(lambda x: os.path.join(save_dir, f"{x}.parquet"), range(len(modules)))
+	)
+	list(
+		map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
+	)  # execute save to parquet
+	filenames = list(map(lambda x: os.path.basename(x), filepaths))
+
+	# make summary file
+	summary_df = pd.DataFrame(
+		{
+			"filename": filenames,
+			"module_name": list(map(lambda module: module.__name__, modules)),
+			"module_params": module_params,
+			"execution_time": average_times,
+			**{
+				f"passage_compressor_{metric}": list(
+					map(lambda result: result[metric].mean(), results)
+				)
+				for metric in strategies.get("metrics")
+			},
+		}
+	)
+
+	# filter by strategies
+	if strategies.get("speed_threshold") is not None:
+		results, filenames = filter_by_threshold(
+			results, average_times, strategies["speed_threshold"], filenames
+		)
+	selected_result, selected_filename = select_best(
+		results,
+		strategies.get("metrics"),
+		filenames,
+		strategies.get("strategy", "mean"),
+	)
+	new_retrieved_contents = selected_result["retrieved_contents"]
+	previous_result["retrieved_contents"] = new_retrieved_contents
+	selected_result = selected_result.drop(columns=["retrieved_contents"])
+	best_result = pd.concat([previous_result, selected_result], axis=1)
+
+	# add 'is_best' column to summary file
+	summary_df["is_best"] = summary_df["filename"] == selected_filename
+
+	# add prefix 'passage_compressor' to best_result columns
+	best_result = best_result.rename(
+		columns={
+			metric_name: f"passage_compressor_{metric_name}"
+			for metric_name in strategies.get("metrics")
+		}
+	)
+
+	# save the result files
+	best_result.to_parquet(
+		os.path.join(
+			save_dir, f"best_{os.path.splitext(selected_filename)[0]}.parquet"
+		),
+		index=False,
+	)
+	summary_df.to_csv(os.path.join(save_dir, "summary.csv"), index=False)
+	return best_result
+
+
+def evaluate_passage_compressor_node(
+	result_df: pd.DataFrame, metric_inputs: List[MetricInput], metrics: List[str]
+):
+	metric_funcs = {
+		retrieval_token_recall.__name__: retrieval_token_recall,
+		retrieval_token_precision.__name__: retrieval_token_precision,
+		retrieval_token_f1.__name__: retrieval_token_f1,
+	}
+	for metric_input, generated_text in zip(
+		metric_inputs, result_df["retrieved_contents"].tolist()
+	):
+		metric_input.retrieved_contents = generated_text
+	metrics = list(filter(lambda x: x in metric_funcs.keys(), metrics))
+	if len(metrics) <= 0:
+		raise ValueError(f"metrics must be one of {metric_funcs.keys()}")
+	metrics_scores = dict(
+		map(
+			lambda metric: (
+				metric,
+				metric_funcs[metric](
+					metric_inputs=metric_inputs,
+				),
+			),
+			metrics,
+		)
+	)
+	result_df = pd.concat([result_df, pd.DataFrame(metrics_scores)], axis=1)
+	return result_df
--- a/autorag/nodes/passagecompressor/tree_summarize.py
+++ b/autorag/nodes/passagecompressor/tree_summarize.py
@@ -0,0 +1,56 @@
+from typing import List, Optional
+
+from llama_index.core import PromptTemplate
+from llama_index.core.prompts import PromptType
+from llama_index.core.prompts.utils import is_chat_model
+from llama_index.core.response_synthesizers import TreeSummarize as ts
+
+from autorag.nodes.passagecompressor.base import LlamaIndexCompressor
+from autorag.utils.util import get_event_loop, process_batch
+
+
+class TreeSummarize(LlamaIndexCompressor):
+	def _pure(
+		self,
+		queries: List[str],
+		contents: List[List[str]],
+		prompt: Optional[str] = None,
+		chat_prompt: Optional[str] = None,
+		batch: int = 16,
+	) -> List[str]:
+		"""
+		Recursively merge retrieved texts and summarizes them in a bottom-up fashion.
+		This function is a wrapper for llama_index.response_synthesizers.TreeSummarize.
+		For more information, visit https://docs.llamaindex.ai/en/latest/examples/response_synthesizers/tree_summarize.html.
+
+		:param queries: The queries for retrieved passages.
+		:param contents: The contents of retrieved passages.
+		:param prompt: The prompt template for summarization.
+		    If you want to use chat prompt, you should pass chat_prompt instead.
+		    At prompt, you must specify where to put 'context_str' and 'query_str'.
+		    Default is None. When it is None, it will use llama index default prompt.
+		:param chat_prompt: The chat prompt template for summarization.
+		    If you want to use normal prompt, you should pass prompt instead.
+		    At prompt, you must specify where to put 'context_str' and 'query_str'.
+		    Default is None. When it is None, it will use llama index default chat prompt.
+		:param batch: The batch size for llm.
+		    Set low if you face some errors.
+		    Default is 16.
+		:return: The list of compressed texts.
+		"""
+		if prompt is not None and not is_chat_model(self.llm):
+			summary_template = PromptTemplate(prompt, prompt_type=PromptType.SUMMARY)
+		elif chat_prompt is not None and is_chat_model(self.llm):
+			summary_template = PromptTemplate(
+				chat_prompt, prompt_type=PromptType.SUMMARY
+			)
+		else:
+			summary_template = None
+		summarizer = ts(llm=self.llm, summary_template=summary_template, use_async=True)
+		tasks = [
+			summarizer.aget_response(query, content)
+			for query, content in zip(queries, contents)
+		]
+		loop = get_event_loop()
+		results = loop.run_until_complete(process_batch(tasks, batch_size=batch))
+		return results
--- a/autorag/nodes/passagefilter/init.py
+++ b/autorag/nodes/passagefilter/init.py
@@ -0,0 +1,6 @@
+from .pass_passage_filter import PassPassageFilter
+from .percentile_cutoff import PercentileCutoff
+from .recency import RecencyFilter
+from .similarity_percentile_cutoff import SimilarityPercentileCutoff
+from .similarity_threshold_cutoff import SimilarityThresholdCutoff
+from .threshold_cutoff import ThresholdCutoff
--- a/autorag/nodes/passagefilter/base.py
+++ b/autorag/nodes/passagefilter/base.py
@@ -0,0 +1,50 @@
+import abc
+import logging
+from pathlib import Path
+from typing import Union
+
+import pandas as pd
+
+from autorag.schema.base import BaseModule
+from autorag.utils import validate_qa_dataset
+
+logger = logging.getLogger("AutoRAG")
+
+
+class BasePassageFilter(BaseModule, metaclass=abc.ABCMeta):
+	def __init__(self, project_dir: Union[str, Path], *args, **kwargs):
+		logger.info(f"Initialize passage filter node - {self.__class__.__name__}")
+
+	def __del__(self):
+		logger.info(f"Prompt maker node - {self.__class__.__name__} module is deleted.")
+
+	def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
+		logger.info(
+			f"Running passage filter node - {self.__class__.__name__} module..."
+		)
+		validate_qa_dataset(previous_result)
+
+		# find queries columns
+		assert (
+			"query" in previous_result.columns
+		), "previous_result must have query column."
+		queries = previous_result["query"].tolist()
+
+		# find contents_list columns
+		assert (
+			"retrieved_contents" in previous_result.columns
+		), "previous_result must have retrieved_contents column."
+		contents = previous_result["retrieved_contents"].tolist()
+
+		# find scores columns
+		assert (
+			"retrieve_scores" in previous_result.columns
+		), "previous_result must have retrieve_scores column."
+		scores = previous_result["retrieve_scores"].tolist()
+
+		# find ids columns
+		assert (
+			"retrieved_ids" in previous_result.columns
+		), "previous_result must have retrieved_ids column."
+		ids = previous_result["retrieved_ids"].tolist()
+		return queries, contents, scores, ids
--- a/autorag/nodes/passagefilter/pass_passage_filter.py
+++ b/autorag/nodes/passagefilter/pass_passage_filter.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+from autorag.nodes.passagefilter.base import BasePassageFilter
+from autorag.utils import result_to_dataframe
+
+
+class PassPassageFilter(BasePassageFilter):
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		_, contents, scores, ids = self.cast_to_run(previous_result)
+		return contents, ids, scores
+
+	def _pure(self, *args, **kwargs):
+		pass
--- a/autorag/nodes/passagefilter/percentile_cutoff.py
+++ b/autorag/nodes/passagefilter/percentile_cutoff.py
@@ -0,0 +1,58 @@
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.passagefilter.base import BasePassageFilter
+from autorag.utils.util import sort_by_scores, select_top_k, result_to_dataframe
+
+
+class PercentileCutoff(BasePassageFilter):
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, scores, ids = self.cast_to_run(previous_result)
+		return self._pure(queries, contents, scores, ids, *args, **kwargs)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		percentile: float,
+		reverse: bool = False,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Filter out the contents that are below the content's length times percentile.
+		If This is a filter and does not override scores.
+		If the value of content's length times percentile is less than 1, keep the only one highest similarity content.
+
+		:param queries: The list of queries to use for filtering
+		:param contents_list: The list of lists of contents to filter
+		:param scores_list: The list of lists of scores retrieved
+		:param ids_list: The list of lists of ids retrieved
+		:param percentile: The percentile to cut off
+		:param reverse: If True, the lower the score, the better
+		    Default is False.
+		:return: Tuple of lists containing the filtered contents, ids, and scores
+		"""
+		num_top_k = max(1, int(len(scores_list[0]) * percentile))
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": scores_list,
+			}
+		)
+
+		reverse = not reverse
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand", reverse=reverse
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], num_top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
--- a/autorag/nodes/passagefilter/recency.py
+++ b/autorag/nodes/passagefilter/recency.py
@@ -0,0 +1,105 @@
+import logging
+import os
+from datetime import datetime, date
+from pathlib import Path
+from typing import List, Tuple, Union
+
+import pandas as pd
+
+from autorag.nodes.passagefilter.base import BasePassageFilter
+from autorag.utils import fetch_contents, result_to_dataframe
+
+logger = logging.getLogger("AutoRAG")
+
+
+class RecencyFilter(BasePassageFilter):
+	def __init__(self, project_dir: Union[str, Path], *args, **kwargs):
+		super().__init__(project_dir, *args, **kwargs)
+		self.corpus_df = pd.read_parquet(
+			os.path.join(project_dir, "data", "corpus.parquet"), engine="pyarrow"
+		)
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		_, contents, scores, ids = self.cast_to_run(previous_result, *args, **kwargs)
+		metadatas = fetch_contents(self.corpus_df, ids, column_name="metadata")
+		times = [
+			[time["last_modified_datetime"] for time in time_list]
+			for time_list in metadatas
+		]
+		return self._pure(contents, scores, ids, times, *args, **kwargs)
+
+	def _pure(
+		self,
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		time_list: List[List[datetime]],
+		threshold_datetime: Union[datetime, date],
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Filter out the contents that are below the threshold datetime.
+		If all contents are filtered, keep the only one recency content.
+		If the threshold date format is incorrect, return the original contents.
+
+		:param contents_list: The list of lists of contents to filter
+		:param scores_list: The list of lists of scores retrieved
+		:param ids_list: The list of lists of ids retrieved
+		:param time_list: The list of lists of datetime retrieved
+		:param threshold_datetime: The threshold to cut off.
+			In recency filter, you have to use the datetime.datetime object or datetime.date object.
+			All you need to do is to set the date at your YAML file.
+			For example, you can write "2010-09-09 3:45:06" or "2010-09-09" in the YAML file.
+		:return: Tuple of lists containing the filtered contents, ids, and scores
+		"""
+		if not (
+			isinstance(threshold_datetime, datetime)
+			or isinstance(threshold_datetime, date)
+		):
+			raise ValueError(
+				f"Threshold should be a datetime object, but got {type(threshold_datetime)}"
+			)
+
+		if not isinstance(threshold_datetime, datetime):
+			threshold_datetime = datetime.combine(
+				threshold_datetime, datetime.min.time()
+			)
+
+		time_list = [
+			list(
+				map(
+					lambda t: datetime.combine(t, datetime.min.time())
+					if not isinstance(t, datetime)
+					else t,
+					time,
+				)
+			)
+			for time in time_list
+		]
+
+		def sort_row(contents, scores, ids, time, _datetime_threshold):
+			combined = list(zip(contents, scores, ids, time))
+			combined_filtered = [
+				item for item in combined if item[3] >= _datetime_threshold
+			]
+
+			if combined_filtered:
+				remain_contents, remain_scores, remain_ids, _ = zip(*combined_filtered)
+			else:
+				combined.sort(key=lambda x: x[3], reverse=True)
+				remain_contents, remain_scores, remain_ids, _ = zip(*combined[:1])
+
+			return list(remain_contents), list(remain_ids), list(remain_scores)
+
+		remain_contents_list, remain_ids_list, remain_scores_list = zip(
+			*map(
+				sort_row,
+				contents_list,
+				scores_list,
+				ids_list,
+				time_list,
+				[threshold_datetime] * len(contents_list),
+			)
+		)
+
+		return remain_contents_list, remain_ids_list, remain_scores_list
--- a/autorag/nodes/passagefilter/run.py
+++ b/autorag/nodes/passagefilter/run.py
@@ -0,0 +1,138 @@
+import os
+import pathlib
+from typing import List, Dict
+
+import pandas as pd
+
+from autorag.nodes.retrieval.run import evaluate_retrieval_node
+from autorag.schema.metricinput import MetricInput
+from autorag.strategy import measure_speed, filter_by_threshold, select_best
+from autorag.utils.util import to_list, apply_recursive
+
+
+def run_passage_filter_node(
+	modules: List,
+	module_params: List[Dict],
+	previous_result: pd.DataFrame,
+	node_line_dir: str,
+	strategies: Dict,
+) -> pd.DataFrame:
+	"""
+	Run evaluation and select the best module among passage filter node results.
+
+	:param modules: Passage filter modules to run.
+	:param module_params: Passage filter module parameters.
+	:param previous_result: Previous result dataframe.
+	    Could be retrieval, reranker, passage filter modules result.
+	    It means it must contain 'query', 'retrieved_contents', 'retrieved_ids', 'retrieve_scores' columns.
+	:param node_line_dir: This node line's directory.
+	:param strategies: Strategies for passage filter node.
+	    In this node, we use 'retrieval_f1', 'retrieval_recall' and 'retrieval_precision'.
+	    You can skip evaluation when you use only one module and a module parameter.
+	:return: The best result dataframe with previous result columns.
+	"""
+	if not os.path.exists(node_line_dir):
+		os.makedirs(node_line_dir)
+	project_dir = pathlib.PurePath(node_line_dir).parent.parent
+	qa_df = pd.read_parquet(
+		os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
+	)
+	retrieval_gt = qa_df["retrieval_gt"].tolist()
+	retrieval_gt = apply_recursive(lambda x: str(x), to_list(retrieval_gt))
+
+	# make rows to metric_inputs
+	metric_inputs = [
+		MetricInput(retrieval_gt=ret_gt, query=query, generation_gt=gen_gt)
+		for ret_gt, query, gen_gt in zip(
+			retrieval_gt, qa_df["query"].tolist(), qa_df["generation_gt"].tolist()
+		)
+	]
+
+	results, execution_times = zip(
+		*map(
+			lambda task: measure_speed(
+				task[0].run_evaluator,
+				project_dir=project_dir,
+				previous_result=previous_result,
+				**task[1],
+			),
+			zip(modules, module_params),
+		)
+	)
+	average_times = list(map(lambda x: x / len(results[0]), execution_times))
+
+	# run metrics before filtering
+	if strategies.get("metrics") is None:
+		raise ValueError("You must at least one metrics for passage_filter evaluation.")
+	results = list(
+		map(
+			lambda x: evaluate_retrieval_node(
+				x,
+				metric_inputs,
+				strategies.get("metrics"),
+			),
+			results,
+		)
+	)
+
+	# save results to folder
+	save_dir = os.path.join(node_line_dir, "passage_filter")  # node name
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+	filepaths = list(
+		map(lambda x: os.path.join(save_dir, f"{x}.parquet"), range(len(modules)))
+	)
+	list(
+		map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
+	)  # execute save to parquet
+	filenames = list(map(lambda x: os.path.basename(x), filepaths))
+
+	summary_df = pd.DataFrame(
+		{
+			"filename": filenames,
+			"module_name": list(map(lambda module: module.__name__, modules)),
+			"module_params": module_params,
+			"execution_time": average_times,
+			**{
+				f"passage_filter_{metric}": list(
+					map(lambda result: result[metric].mean(), results)
+				)
+				for metric in strategies.get("metrics")
+			},
+		}
+	)
+
+	# filter by strategies
+	if strategies.get("speed_threshold") is not None:
+		results, filenames = filter_by_threshold(
+			results, average_times, strategies["speed_threshold"], filenames
+		)
+	selected_result, selected_filename = select_best(
+		results,
+		strategies.get("metrics"),
+		filenames,
+		strategies.get("strategy", "mean"),
+	)
+	selected_result = selected_result.rename(
+		columns={
+			metric_name: f"passage_filter_{metric_name}"
+			for metric_name in strategies["metrics"]
+		}
+	)
+	previous_result = previous_result.drop(
+		columns=["retrieved_contents", "retrieved_ids", "retrieve_scores"]
+	)
+	best_result = pd.concat([previous_result, selected_result], axis=1)
+
+	# add 'is_best' column to summary file
+	summary_df["is_best"] = summary_df["filename"] == selected_filename
+
+	# save files
+	summary_df.to_csv(os.path.join(save_dir, "summary.csv"), index=False)
+	best_result.to_parquet(
+		os.path.join(
+			save_dir, f"best_{os.path.splitext(selected_filename)[0]}.parquet"
+		),
+		index=False,
+	)
+	return best_result
--- a/autorag/nodes/passagefilter/similarity_percentile_cutoff.py
+++ b/autorag/nodes/passagefilter/similarity_percentile_cutoff.py
@@ -0,0 +1,134 @@
+from pathlib import Path
+from typing import List, Tuple, Union
+
+import numpy as np
+import pandas as pd
+
+from autorag.embedding.base import EmbeddingModel
+from autorag.evaluation.metric.util import calculate_cosine_similarity
+from autorag.nodes.passagefilter.base import BasePassageFilter
+from autorag.nodes.passagefilter.similarity_threshold_cutoff import (
+	embedding_query_content,
+)
+from autorag.utils import result_to_dataframe
+from autorag.utils.util import empty_cuda_cache, pop_params
+
+
+class SimilarityPercentileCutoff(BasePassageFilter):
+	def __init__(self, project_dir: Union[str, Path], *args, **kwargs):
+		"""
+		Initialize the SimilarityPercentileCutoff module
+
+		:param project_dir: The project directory to use for initializing the module
+		:param embedding_model: The embedding model string to use for calculating similarity
+		        Default is "openai" which is OpenAI text-embedding-ada-002 embedding model.
+		"""
+		super().__init__(project_dir, *args, **kwargs)
+		embedding_model = kwargs.pop("embedding_model", "openai")
+		self.embedding_model = EmbeddingModel.load(embedding_model)()
+
+	def __del__(self):
+		super().__del__()
+		del self.embedding_model
+
+		empty_cuda_cache()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, **kwargs):
+		queries, contents, scores, ids = self.cast_to_run(previous_result)
+		kwargs = pop_params(self._pure, kwargs)
+		return self._pure(queries, contents, scores, ids, **kwargs)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		percentile: float,
+		batch: int = 128,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Re-calculate each content's similarity with the query and filter out the contents that are below the content's
+		length times percentile. If This is a filter and does not override scores. The output of scores is not coming from
+		query-content similarity.
+		If the value of content's length times percentile is less than 1, keep the only one highest similarity content.
+
+		:param queries: The list of queries to use for filtering
+		:param contents_list: The list of lists of contents to filter
+		:param scores_list: The list of lists of scores retrieved
+		:param ids_list: The list of lists of ids retrieved
+		:param percentile: The percentile to cut off
+		:param batch: The number of queries to be processed in a batch
+		    Default is 128.
+		:return: Tuple of lists containing the filtered contents, ids, and scores
+		"""
+		query_embeddings, content_embeddings = embedding_query_content(
+			queries, contents_list, self.embedding_model, batch
+		)
+
+		results = list(
+			map(
+				lambda x: self.__row_pure(x[0], x[1], x[2], x[3], x[4], percentile),
+				zip(
+					query_embeddings,
+					content_embeddings,
+					contents_list,
+					ids_list,
+					scores_list,
+				),
+			)
+		)
+
+		remain_content_list = list(map(lambda x: x[0], results))
+		remain_ids_list = list(map(lambda x: x[1], results))
+		remain_scores_list = list(map(lambda x: x[2], results))
+
+		return remain_content_list, remain_ids_list, remain_scores_list
+
+	@staticmethod
+	def __row_pure(
+		query_embedding: str,
+		content_embeddings: List[List[float]],
+		content_list: List[str],
+		ids_list: List[str],
+		scores_list: List[float],
+		percentile: float,
+	) -> Tuple[List[str], List[str], List[float]]:
+		"""
+		Return tuple of lists containing the filtered contents, ids, and scores
+
+		:param query_embedding: Query embedding
+		:param content_embeddings: Each content embedding
+		:param content_list: Each content
+		:param ids_list: Each id
+		:param scores_list: Each score
+		:param percentile: The percentile to cut off
+		:return: Tuple of lists containing the filtered contents, ids, and scores
+		"""
+		num_top_k = int(len(content_embeddings) * percentile)
+
+		if num_top_k == 0:
+			num_top_k = 1
+
+		similarities = np.array(
+			list(
+				map(
+					lambda x: calculate_cosine_similarity(query_embedding, x),
+					content_embeddings,
+				)
+			)
+		).tolist()
+
+		content_id_score_similarity = list(
+			zip(ids_list, content_list, scores_list, similarities)
+		)
+
+		sorted_content_id_score_similarity = sorted(
+			content_id_score_similarity, key=lambda x: x[3], reverse=True
+		)[:num_top_k]
+
+		content_result, id_result, score_result, _ = zip(
+			*sorted_content_id_score_similarity
+		)
+		return list(content_result), list(id_result), list(score_result)
--- a/autorag/nodes/passagefilter/similarity_threshold_cutoff.py
+++ b/autorag/nodes/passagefilter/similarity_threshold_cutoff.py
@@ -0,0 +1,112 @@
+from typing import List, Tuple
+
+import numpy as np
+import pandas as pd
+
+from autorag.embedding.base import EmbeddingModel
+from autorag.evaluation.metric.util import calculate_cosine_similarity
+from autorag.nodes.passagefilter.base import BasePassageFilter
+from autorag.utils.util import (
+	embedding_query_content,
+	empty_cuda_cache,
+	result_to_dataframe,
+	pop_params,
+)
+
+
+class SimilarityThresholdCutoff(BasePassageFilter):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		"""
+		Initialize the SimilarityThresholdCutoff module
+
+		:param project_dir: The project directory to use for initializing the module
+		:param embedding_model: The embedding model string to use for calculating similarity
+		        Default is "openai" which is OpenAI text-embedding-ada-002 embedding model.
+		"""
+		super().__init__(project_dir, *args, **kwargs)
+		embedding_model = kwargs.get("embedding_model", "openai")
+		self.embedding_model = EmbeddingModel.load(embedding_model)()
+
+	def __del__(self):
+		del self.embedding_model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		kwargs = pop_params(self._pure, kwargs)
+		queries, contents, scores, ids = self.cast_to_run(previous_result)
+		return self._pure(queries, contents, scores, ids, *args, **kwargs)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		threshold: float,
+		batch: int = 128,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Re-calculate each content's similarity with the query and filter out the contents that are below the threshold.
+		If all contents are filtered, keep the only one highest similarity content.
+		This is a filter and does not override scores.
+		The output of scores is not coming from query-content similarity.
+
+		:param queries: The list of queries to use for filtering
+		:param contents_list: The list of lists of contents to filter
+		:param scores_list: The list of lists of scores retrieved
+		:param ids_list: The list of lists of ids retrieved
+		:param threshold: The threshold to cut off
+		:param batch: The number of queries to be processed in a batch
+		    Default is 128.
+		:return: Tuple of lists containing the filtered contents, ids, and scores
+		"""
+		query_embeddings, content_embeddings = embedding_query_content(
+			queries, contents_list, self.embedding_model, batch
+		)
+
+		remain_indices = list(
+			map(
+				lambda x: self.__row_pure(x[0], x[1], threshold),
+				zip(query_embeddings, content_embeddings),
+			)
+		)
+
+		remain_content_list = list(
+			map(lambda c, idx: [c[i] for i in idx], contents_list, remain_indices)
+		)
+		remain_scores_list = list(
+			map(lambda s, idx: [s[i] for i in idx], scores_list, remain_indices)
+		)
+		remain_ids_list = list(
+			map(lambda _id, idx: [_id[i] for i in idx], ids_list, remain_indices)
+		)
+		return remain_content_list, remain_ids_list, remain_scores_list
+
+	@staticmethod
+	def __row_pure(
+		query_embedding: str, content_embeddings: List[List[float]], threshold: float
+	) -> List[int]:
+		"""
+		Return indices that have to remain.
+		Return at least one index if there is nothing to remain.
+
+		:param query_embedding: Query embedding
+		:param content_embeddings: Each content embedding
+		:param threshold: The threshold to cut off
+		:return: Indices to remain at the contents
+		"""
+
+		similarities = np.array(
+			list(
+				map(
+					lambda x: calculate_cosine_similarity(query_embedding, x),
+					content_embeddings,
+				)
+			)
+		)
+		result = np.where(similarities >= threshold)[0].tolist()
+		if len(result) > 0:
+			return result
+		return [np.argmax(similarities)]
--- a/autorag/nodes/passagefilter/threshold_cutoff.py
+++ b/autorag/nodes/passagefilter/threshold_cutoff.py
@@ -0,0 +1,78 @@
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.passagefilter.base import BasePassageFilter
+from autorag.utils.util import convert_inputs_to_list, result_to_dataframe
+
+
+class ThresholdCutoff(BasePassageFilter):
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		_, contents, scores, ids = self.cast_to_run(previous_result)
+		return self._pure(contents, scores, ids, *args, **kwargs)
+
+	def _pure(
+		self,
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		threshold: float,
+		reverse: bool = False,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Filters the contents, scores, and ids based on a previous result's score.
+		Keeps at least one item per query if all scores are below the threshold.
+
+		:param contents_list: List of content strings for each query.
+		:param scores_list: List of scores for each content.
+		:param ids_list: List of ids for each content.
+		:param threshold: The minimum score to keep an item.
+		:param reverse: If True, the lower the score, the better.
+		    Default is False.
+		:return: Filtered lists of contents, ids, and scores.
+		"""
+		remain_indices = list(
+			map(lambda x: self.__row_pure(x, threshold, reverse), scores_list)
+		)
+
+		remain_content_list = list(
+			map(lambda c, idx: [c[i] for i in idx], contents_list, remain_indices)
+		)
+		remain_scores_list = list(
+			map(lambda s, idx: [s[i] for i in idx], scores_list, remain_indices)
+		)
+		remain_ids_list = list(
+			map(lambda _id, idx: [_id[i] for i in idx], ids_list, remain_indices)
+		)
+
+		return remain_content_list, remain_ids_list, remain_scores_list
+
+	@convert_inputs_to_list
+	def __row_pure(
+		self, scores_list: List[float], threshold: float, reverse: bool = False
+	) -> List[int]:
+		"""
+		Return indices that have to remain.
+		Return at least one index if there is nothing to remain.
+
+		:param scores_list: Each score
+		:param threshold: The threshold to cut off
+		:param reverse: If True, the lower the score, the better
+			Default is False.
+		:return: Indices to remain at the contents
+		"""
+		assert isinstance(scores_list, list), "scores_list must be a list."
+
+		if reverse:
+			remain_indices = [
+				i for i, score in enumerate(scores_list) if score <= threshold
+			]
+			default_index = scores_list.index(min(scores_list))
+		else:
+			remain_indices = [
+				i for i, score in enumerate(scores_list) if score >= threshold
+			]
+			default_index = scores_list.index(max(scores_list))
+
+		return remain_indices if remain_indices else [default_index]
--- a/autorag/nodes/passagereranker/init.py
+++ b/autorag/nodes/passagereranker/init.py
@@ -0,0 +1,18 @@
+from .cohere import CohereReranker
+from .colbert import ColbertReranker
+from .flag_embedding import FlagEmbeddingReranker
+from .flag_embedding_llm import FlagEmbeddingLLMReranker
+from .jina import JinaReranker
+from .koreranker import KoReranker
+from .monot5 import MonoT5
+from .pass_reranker import PassReranker
+from .rankgpt import RankGPT
+from .sentence_transformer import SentenceTransformerReranker
+from .time_reranker import TimeReranker
+from .upr import Upr
+from .openvino import OpenVINOReranker
+from .voyageai import VoyageAIReranker
+from .mixedbreadai import MixedbreadAIReranker
+from .flashrank import FlashRankReranker
+
+from .dragonkue2 import DragonKue2 # 250313 추가 - 김용연 
--- a/autorag/nodes/passagereranker/base.py
+++ b/autorag/nodes/passagereranker/base.py
@@ -0,0 +1,55 @@
+import abc
+import logging
+from pathlib import Path
+from typing import Union
+
+import pandas as pd
+
+from autorag.schema import BaseModule
+from autorag.utils import validate_qa_dataset
+
+logger = logging.getLogger("AutoRAG")
+
+
+class BasePassageReranker(BaseModule, metaclass=abc.ABCMeta):
+	def __init__(self, project_dir: Union[str, Path], *args, **kwargs):
+		logger.info(
+			f"Initialize passage reranker node - {self.__class__.__name__} module..."
+		)
+
+	def __del__(self):
+		logger.info(
+			f"Deleting passage reranker node - {self.__class__.__name__} module..."
+		)
+
+	def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
+		logger.info(
+			f"Running passage reranker node - {self.__class__.__name__} module..."
+		)
+		validate_qa_dataset(previous_result)
+
+		# find queries columns
+		assert (
+			"query" in previous_result.columns
+		), "previous_result must have query column."
+		queries = previous_result["query"].tolist()
+
+		# find contents_list columns
+		assert (
+			"retrieved_contents" in previous_result.columns
+		), "previous_result must have retrieved_contents column."
+		contents = previous_result["retrieved_contents"].tolist()
+
+		# find scores columns
+		assert (
+			"retrieve_scores" in previous_result.columns
+		), "previous_result must have retrieve_scores column."
+		scores = previous_result["retrieve_scores"].tolist()
+
+		# find ids columns
+		assert (
+			"retrieved_ids" in previous_result.columns
+		), "previous_result must have retrieved_ids column."
+		ids = previous_result["retrieved_ids"].tolist()
+
+		return queries, contents, scores, ids
--- a/autorag/nodes/passagereranker/cohere.py
+++ b/autorag/nodes/passagereranker/cohere.py
@@ -0,0 +1,119 @@
+import os
+from typing import List, Tuple
+
+import cohere
+import pandas as pd
+from cohere import RerankResponseResultsItem
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import get_event_loop, process_batch, result_to_dataframe
+
+
+class CohereReranker(BasePassageReranker):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		"""
+		Initialize Cohere rerank node.
+
+		:param project_dir: The project directory path.
+		:param api_key: The API key for Cohere rerank.
+		    You can set it in the environment variable COHERE_API_KEY.
+		    Or, you can directly set it on the config YAML file using this parameter.
+		    Default is env variable "COHERE_API_KEY".
+		:param kwargs: Extra arguments that are not affected
+		"""
+		super().__init__(project_dir)
+		api_key = kwargs.pop("api_key", None)
+		api_key = os.getenv("COHERE_API_KEY", None) if api_key is None else api_key
+		if api_key is None:
+			api_key = os.getenv("CO_API_KEY", None)
+		if api_key is None:
+			raise KeyError(
+				"Please set the API key for Cohere rerank in the environment variable COHERE_API_KEY "
+				"or directly set it on the config YAML file."
+			)
+
+		self.cohere_client = cohere.AsyncClientV2(api_key=api_key)
+
+	def __del__(self):
+		del self.cohere_client
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, scores, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 64)
+		model = kwargs.pop("model", "rerank-v3.5")
+		return self._pure(queries, contents, scores, ids, top_k, batch, model)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+		model: str = "rerank-v3.5",
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents with Cohere rerank models.
+		You can get the API key from https://cohere.com/rerank and set it in the environment variable COHERE_API_KEY.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param scores_list: The list of lists of scores retrieved from the initial ranking
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+		:param model: The model name for Cohere rerank.
+		    You can choose between "rerank-v3.5", "rerank-english-v3.0", and "rerank-multilingual-v3.0".
+		    Default is "rerank-v3.5".
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		# Run async cohere_rerank_pure function
+		tasks = [
+			cohere_rerank_pure(self.cohere_client, model, query, document, ids, top_k)
+			for query, document, ids in zip(queries, contents_list, ids_list)
+		]
+		loop = get_event_loop()
+		results = loop.run_until_complete(process_batch(tasks, batch_size=batch))
+		content_result = list(map(lambda x: x[0], results))
+		id_result = list(map(lambda x: x[1], results))
+		score_result = list(map(lambda x: x[2], results))
+
+		return content_result, id_result, score_result
+
+
+async def cohere_rerank_pure(
+	cohere_client: cohere.AsyncClient,
+	model: str,
+	query: str,
+	documents: List[str],
+	ids: List[str],
+	top_k: int,
+) -> Tuple[List[str], List[str], List[float]]:
+	"""
+	Rerank a list of contents with Cohere rerank models.
+
+	:param cohere_client: The Cohere AsyncClient to use for reranking
+	:param model: The model name for Cohere rerank
+	:param query: The query to use for reranking
+	:param documents: The list of contents to rerank
+	:param ids: The list of ids corresponding to the documents
+	:param top_k: The number of passages to be retrieved
+	:return: Tuple of lists containing the reranked contents, ids, and scores
+	"""
+	rerank_results = await cohere_client.rerank(
+		model=model,
+		query=query,
+		documents=documents,
+		top_n=top_k,
+		return_documents=False,
+	)
+	results: List[RerankResponseResultsItem] = rerank_results.results
+	reranked_scores: List[float] = list(map(lambda x: x.relevance_score, results))
+	indices = list(map(lambda x: x.index, results))
+	reranked_contents: List[str] = list(map(lambda i: documents[i], indices))
+	reranked_ids: List[str] = list(map(lambda i: ids[i], indices))
+	return reranked_contents, reranked_ids, reranked_scores
--- a/autorag/nodes/passagereranker/colbert.py
+++ b/autorag/nodes/passagereranker/colbert.py
@@ -0,0 +1,213 @@
+from typing import List, Tuple
+
+import numpy as np
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import (
+	flatten_apply,
+	sort_by_scores,
+	select_top_k,
+	pop_params,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class ColbertReranker(BasePassageReranker):
+	def __init__(
+		self,
+		project_dir: str,
+		model_name: str = "colbert-ir/colbertv2.0",
+		*args,
+		**kwargs,
+	):
+		"""
+		Initialize a colbert rerank model for reranking.
+
+		:param project_dir: The project directory
+		:param model_name: The model name for Colbert rerank.
+			You can choose a colbert model for reranking.
+			The default is "colbert-ir/colbertv2.0".
+		:param kwargs: Extra parameter for the model.
+		"""
+		super().__init__(project_dir)
+		try:
+			import torch
+			from transformers import AutoModel, AutoTokenizer
+		except ImportError:
+			raise ImportError(
+				"Pytorch is not installed. Please install pytorch to use Colbert reranker."
+			)
+		self.device = "cuda" if torch.cuda.is_available() else "cpu"
+		model_params = pop_params(AutoModel.from_pretrained, kwargs)
+		self.model = AutoModel.from_pretrained(model_name, **model_params).to(
+			self.device
+		)
+		self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+	def __del__(self):
+		del self.model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 64)
+		return self._pure(queries, contents, ids, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents with Colbert rerank models.
+		You can get more information about a Colbert model at https://huggingface.co/colbert-ir/colbertv2.0.
+		It uses BERT-based model, so recommend using CUDA gpu for faster reranking.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+			Default is 64.
+
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+
+		# get query and content embeddings
+		query_embedding_list = get_colbert_embedding_batch(
+			queries, self.model, self.tokenizer, batch
+		)
+		content_embedding_list = flatten_apply(
+			get_colbert_embedding_batch,
+			contents_list,
+			model=self.model,
+			tokenizer=self.tokenizer,
+			batch_size=batch,
+		)
+		df = pd.DataFrame(
+			{
+				"ids": ids_list,
+				"query_embedding": query_embedding_list,
+				"contents": contents_list,
+				"content_embedding": content_embedding_list,
+			}
+		)
+		temp_df = df.explode("content_embedding")
+		temp_df["score"] = temp_df.apply(
+			lambda x: get_colbert_score(x["query_embedding"], x["content_embedding"]),
+			axis=1,
+		)
+		df["scores"] = (
+			temp_df.groupby(level=0, sort=False)["score"].apply(list).tolist()
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def get_colbert_embedding_batch(
+	input_strings: List[str], model, tokenizer, batch_size: int
+) -> List[np.array]:
+	try:
+		import torch
+	except ImportError:
+		raise ImportError(
+			"Pytorch is not installed. Please install pytorch to use Colbert reranker."
+		)
+	encoding = tokenizer(
+		input_strings,
+		return_tensors="pt",
+		padding=True,
+		truncation=True,
+		max_length=model.config.max_position_embeddings,
+	)
+
+	input_batches = slice_tokenizer_result(encoding, batch_size)
+	result_embedding = []
+	with torch.no_grad():
+		for encoding_batch in input_batches:
+			result_embedding.append(model(**encoding_batch).last_hidden_state)
+	total_tensor = torch.cat(
+		result_embedding, dim=0
+	)  # shape [batch_size, token_length, embedding_dim]
+	tensor_results = list(total_tensor.chunk(total_tensor.size()[0]))
+
+	if torch.cuda.is_available():
+		return list(map(lambda x: x.detach().cpu().numpy(), tensor_results))
+	else:
+		return list(map(lambda x: x.detach().numpy(), tensor_results))
+
+
+def slice_tokenizer_result(tokenizer_output, batch_size):
+	input_ids_batches = slice_tensor(tokenizer_output["input_ids"], batch_size)
+	attention_mask_batches = slice_tensor(
+		tokenizer_output["attention_mask"], batch_size
+	)
+	token_type_ids_batches = slice_tensor(
+		tokenizer_output.get("token_type_ids", None), batch_size
+	)
+	return [
+		{
+			"input_ids": input_ids,
+			"attention_mask": attention_mask,
+			"token_type_ids": token_type_ids,
+		}
+		for input_ids, attention_mask, token_type_ids in zip(
+			input_ids_batches, attention_mask_batches, token_type_ids_batches
+		)
+	]
+
+
+def slice_tensor(input_tensor, batch_size):
+	try:
+		import torch
+	except ImportError:
+		raise ImportError(
+			"Pytorch is not installed. Please install pytorch to use Colbert reranker."
+		)
+	# Calculate the number of full batches
+	num_full_batches = input_tensor.size(0) // batch_size
+
+	# Slice the tensor into batches
+	tensor_list = [
+		input_tensor[i * batch_size : (i + 1) * batch_size]
+		for i in range(num_full_batches)
+	]
+
+	# Handle the last batch if it's smaller than batch_size
+	remainder = input_tensor.size(0) % batch_size
+	if remainder:
+		tensor_list.append(input_tensor[-remainder:])
+
+	device = "cuda" if torch.cuda.is_available() else "cpu"
+	tensor_list = list(map(lambda x: x.to(device), tensor_list))
+
+	return tensor_list
+
+
+def get_colbert_score(query_embedding: np.array, content_embedding: np.array) -> float:
+	if query_embedding.ndim == 3 and content_embedding.ndim == 3:
+		query_embedding = query_embedding.reshape(-1, query_embedding.shape[-1])
+		content_embedding = content_embedding.reshape(-1, content_embedding.shape[-1])
+
+	sim_matrix = np.dot(query_embedding, content_embedding.T) / (
+		np.linalg.norm(query_embedding, axis=1)[:, np.newaxis]
+		* np.linalg.norm(content_embedding, axis=1)
+	)
+	max_sim_scores = np.max(sim_matrix, axis=1)
+	return float(np.mean(max_sim_scores))
--- a/autorag/nodes/passagereranker/dragonkue2.py
+++ b/autorag/nodes/passagereranker/dragonkue2.py
@@ -0,0 +1,138 @@
+# 250313 reranker module_type 추가 - 김용연
+
+from typing import List, Tuple
+
+import numpy as np
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import (
+	make_batch,
+	sort_by_scores,
+	flatten_apply,
+	select_top_k,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class DragonKue2(BasePassageReranker):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		super().__init__(project_dir)
+		try:
+			import torch
+			from transformers import AutoModelForSequenceClassification, AutoTokenizer
+		except ImportError:
+			raise ImportError("For using dragonkue2, please install torch first.")
+
+		model_path = "dragonkue/bge-reranker-v2-m3-ko"
+		self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+		self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
+		self.model.eval()
+		# Determine the device to run the model on (GPU if available, otherwise CPU)
+		self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+		self.model.to(self.device)
+
+	def __del__(self):
+		del self.model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 64)
+		return self._pure(queries, contents, ids, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using ko-reranker.
+		bge-reranker-v2-m3-ko is a reranker based on korean (https://huggingface.co/dragonkue/bge-reranker-v2-m3-ko).
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+		    Default is 64.
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		nested_list = [
+			list(map(lambda x: [query, x], content_list))
+			for query, content_list in zip(queries, contents_list)
+		]
+		scores_nps = flatten_apply(
+			dragonku2_run_model,
+			nested_list,
+			model=self.model,
+			batch_size=batch,
+			tokenizer=self.tokenizer,
+			device=self.device,
+		)
+
+		rerank_scores = list(
+			map(
+				lambda scores: exp_normalize(np.array(scores)).astype(float), scores_nps
+			)
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def dragonku2_run_model(input_texts, model, tokenizer, device, batch_size: int): # 250313 추가 - 김용연
+	try:
+		import torch
+	except ImportError:
+		raise ImportError("For using drangonku2, please install torch first.")
+	batch_input_texts = make_batch(input_texts, batch_size)
+	results = []
+	for batch_texts in batch_input_texts:
+		inputs = tokenizer(
+			batch_texts,
+			padding=True,
+			truncation=True,
+			return_tensors="pt",
+			max_length=512,
+		)
+		inputs = inputs.to(device)
+		with torch.no_grad():
+			scores = (
+				model(**inputs, return_dict=True)
+				.logits.view(
+					-1,
+				)
+				.float()
+			)
+			scores_np = scores.cpu().numpy()
+			results.extend(scores_np)
+	return results
+
+
+def exp_normalize(x):
+	b = x.max()
+	y = np.exp(x - b)
+	return y / y.sum()
--- a/autorag/nodes/passagereranker/flag_embedding.py
+++ b/autorag/nodes/passagereranker/flag_embedding.py
@@ -0,0 +1,112 @@
+from typing import List, Tuple, Iterable
+
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import (
+	make_batch,
+	sort_by_scores,
+	flatten_apply,
+	select_top_k,
+	pop_params,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class FlagEmbeddingReranker(BasePassageReranker):
+	def __init__(
+		self, project_dir, model_name: str = "BAAI/bge-reranker-large", *args, **kwargs
+	):
+		"""
+		Initialize the FlagEmbeddingReranker module.
+
+		:param project_dir: The project directory.
+		:param model_name: The name of the BAAI Reranker normal-model name.
+		Default is "BAAI/bge-reranker-large"
+		:param kwargs: Extra parameter for FlagEmbedding.FlagReranker
+		"""
+		super().__init__(project_dir)
+		try:
+			from FlagEmbedding import FlagReranker
+		except ImportError:
+			raise ImportError(
+				"FlagEmbeddingReranker requires the 'FlagEmbedding' package to be installed."
+			)
+		model_params = pop_params(FlagReranker.__init__, kwargs)
+		model_params.pop("model_name_or_path", None)
+		self.model = FlagReranker(model_name_or_path=model_name, **model_params)
+
+	def __del__(self):
+		del self.model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 64)
+		return self._pure(queries, contents, ids, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using BAAI normal-Reranker model.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+			Default is 64.
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		nested_list = [
+			list(map(lambda x: [query, x], content_list))
+			for query, content_list in zip(queries, contents_list)
+		]
+		rerank_scores = flatten_apply(
+			flag_embedding_run_model, nested_list, model=self.model, batch_size=batch
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def flag_embedding_run_model(input_texts, model, batch_size: int):
+	try:
+		import torch
+	except ImportError:
+		raise ImportError("FlagEmbeddingReranker requires PyTorch to be installed.")
+	batch_input_texts = make_batch(input_texts, batch_size)
+	results = []
+	for batch_texts in batch_input_texts:
+		with torch.no_grad():
+			pred_scores = model.compute_score(sentence_pairs=batch_texts)
+		if batch_size == 1 or not isinstance(pred_scores, Iterable):
+			results.append(pred_scores)
+		else:
+			results.extend(pred_scores)
+	return results
--- a/autorag/nodes/passagereranker/flag_embedding_llm.py
+++ b/autorag/nodes/passagereranker/flag_embedding_llm.py
@@ -0,0 +1,101 @@
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.nodes.passagereranker.flag_embedding import flag_embedding_run_model
+from autorag.utils.util import (
+	flatten_apply,
+	sort_by_scores,
+	select_top_k,
+	pop_params,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class FlagEmbeddingLLMReranker(BasePassageReranker):
+	def __init__(
+		self,
+		project_dir,
+		model_name: str = "BAAI/bge-reranker-v2-gemma",
+		*args,
+		**kwargs,
+	):
+		"""
+		Initialize the FlagEmbeddingReranker module.
+
+		:param project_dir: The project directory.
+		:param model_name: The name of the BAAI Reranker LLM-based-model name.
+		Default is "BAAI/bge-reranker-v2-gemma"
+		:param kwargs: Extra parameter for FlagEmbedding.FlagReranker
+		"""
+		super().__init__(project_dir)
+		try:
+			from FlagEmbedding import FlagLLMReranker
+		except ImportError:
+			raise ImportError(
+				"FlagEmbeddingLLMReranker requires the 'FlagEmbedding' package to be installed."
+			)
+		model_params = pop_params(FlagLLMReranker.__init__, kwargs)
+		model_params.pop("model_name_or_path", None)
+		self.model = FlagLLMReranker(model_name_or_path=model_name, **model_params)
+
+	def __del__(self):
+		del self.model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 64)
+		return self._pure(queries, contents, ids, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using BAAI LLM-based-Reranker model.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+			Default is 64.
+
+		:return: tuple of lists containing the reranked contents, ids, and scores
+		"""
+
+		nested_list = [
+			list(map(lambda x: [query, x], content_list))
+			for query, content_list in zip(queries, contents_list)
+		]
+		rerank_scores = flatten_apply(
+			flag_embedding_run_model, nested_list, model=self.model, batch_size=batch
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
--- a/autorag/nodes/passagereranker/flashrank.py
+++ b/autorag/nodes/passagereranker/flashrank.py
@@ -0,0 +1,245 @@
+import json
+from pathlib import Path
+
+import pandas as pd
+import numpy as np
+import os
+import zipfile
+import requests
+from tqdm import tqdm
+import collections
+from typing import List, Dict, Tuple
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils import result_to_dataframe
+from autorag.utils.util import (
+	flatten_apply,
+	sort_by_scores,
+	select_top_k,
+	make_batch,
+	empty_cuda_cache,
+)
+
+model_url = "https://huggingface.co/prithivida/flashrank/resolve/main/{}.zip"
+
+model_file_map = {
+	"ms-marco-TinyBERT-L-2-v2": "flashrank-TinyBERT-L-2-v2.onnx",
+	"ms-marco-MiniLM-L-12-v2": "flashrank-MiniLM-L-12-v2_Q.onnx",
+	"ms-marco-MultiBERT-L-12": "flashrank-MultiBERT-L12_Q.onnx",
+	"rank-T5-flan": "flashrank-rankt5_Q.onnx",
+	"ce-esci-MiniLM-L12-v2": "flashrank-ce-esci-MiniLM-L12-v2_Q.onnx",
+	"miniReranker_arabic_v1": "miniReranker_arabic_v1.onnx",
+}
+
+
+class FlashRankReranker(BasePassageReranker):
+	def __init__(
+		self, project_dir: str, model: str = "ms-marco-TinyBERT-L-2-v2", *args, **kwargs
+	):
+		"""
+		Initialize FlashRank rerank node.
+
+		:param project_dir: The project directory path.
+		:param model: The model name for FlashRank rerank.
+		    You can get the list of available models from https://github.com/PrithivirajDamodaran/FlashRank.
+		    Default is "ms-marco-TinyBERT-L-2-v2".
+		    Not support “rank_zephyr_7b_v1_full” due to parallel inference issue.
+		:param kwargs: Extra arguments that are not affected
+		"""
+		super().__init__(project_dir)
+		try:
+			from tokenizers import Tokenizer
+		except ImportError:
+			raise ImportError(
+				"Tokenizer is not installed. Please install tokenizers to use FlashRank reranker."
+			)
+
+		cache_dir = kwargs.pop("cache_dir", "/tmp")
+		max_length = kwargs.pop("max_length", 512)
+
+		self.cache_dir: Path = Path(cache_dir)
+		self.model_dir: Path = self.cache_dir / model
+		self._prepare_model_dir(model)
+		model_file = model_file_map[model]
+
+		try:
+			import onnxruntime as ort
+		except ImportError:
+			raise ImportError(
+				"onnxruntime is not installed. Please install onnxruntime to use FlashRank reranker."
+			)
+
+		self.session = ort.InferenceSession(str(self.model_dir / model_file))
+		self.tokenizer: Tokenizer = self._get_tokenizer(max_length)
+
+	def __del__(self):
+		del self.session
+		del self.tokenizer
+		empty_cuda_cache()
+		super().__del__()
+
+	def _prepare_model_dir(self, model_name: str):
+		if not self.cache_dir.exists():
+			self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+		if not self.model_dir.exists():
+			self._download_model_files(model_name)
+
+	def _download_model_files(self, model_name: str):
+		local_zip_file = self.cache_dir / f"{model_name}.zip"
+		formatted_model_url = model_url.format(model_name)
+
+		with requests.get(formatted_model_url, stream=True) as r:
+			r.raise_for_status()
+			total_size = int(r.headers.get("content-length", 0))
+			with (
+				open(local_zip_file, "wb") as f,
+				tqdm(
+					desc=local_zip_file.name,
+					total=total_size,
+					unit="iB",
+					unit_scale=True,
+					unit_divisor=1024,
+				) as bar,
+			):
+				for chunk in r.iter_content(chunk_size=8192):
+					size = f.write(chunk)
+					bar.update(size)
+
+		with zipfile.ZipFile(local_zip_file, "r") as zip_ref:
+			zip_ref.extractall(self.cache_dir)
+		os.remove(local_zip_file)
+
+	def _get_tokenizer(self, max_length: int = 512):
+		try:
+			from tokenizers import AddedToken, Tokenizer
+		except ImportError:
+			raise ImportError(
+				"Pytorch is not installed. Please install pytorch to use FlashRank reranker."
+			)
+		config = json.load(open(str(self.model_dir / "config.json")))
+		tokenizer_config = json.load(
+			open(str(self.model_dir / "tokenizer_config.json"))
+		)
+		tokens_map = json.load(open(str(self.model_dir / "special_tokens_map.json")))
+		tokenizer = Tokenizer.from_file(str(self.model_dir / "tokenizer.json"))
+
+		tokenizer.enable_truncation(
+			max_length=min(tokenizer_config["model_max_length"], max_length)
+		)
+		tokenizer.enable_padding(
+			pad_id=config["pad_token_id"], pad_token=tokenizer_config["pad_token"]
+		)
+
+		for token in tokens_map.values():
+			if isinstance(token, str):
+				tokenizer.add_special_tokens([token])
+			elif isinstance(token, dict):
+				tokenizer.add_special_tokens([AddedToken(**token)])
+
+		vocab_file = self.model_dir / "vocab.txt"
+		if vocab_file.exists():
+			tokenizer.vocab = self._load_vocab(vocab_file)
+			tokenizer.ids_to_tokens = collections.OrderedDict(
+				[(ids, tok) for tok, ids in tokenizer.vocab.items()]
+			)
+		return tokenizer
+
+	def _load_vocab(self, vocab_file: Path) -> Dict[str, int]:
+		vocab = collections.OrderedDict()
+		with open(vocab_file, "r", encoding="utf-8") as reader:
+			tokens = reader.readlines()
+		for index, token in enumerate(tokens):
+			token = token.rstrip("\n")
+			vocab[token] = index
+		return vocab
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 64)
+		return self._pure(queries, contents, ids, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents with FlashRank rerank models.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		nested_list = [
+			list(map(lambda x: [query, x], content_list))
+			for query, content_list in zip(queries, contents_list)
+		]
+
+		rerank_scores = flatten_apply(
+			flashrank_run_model,
+			nested_list,
+			session=self.session,
+			batch_size=batch,
+			tokenizer=self.tokenizer,
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def flashrank_run_model(input_texts, tokenizer, session, batch_size: int):
+	batch_input_texts = make_batch(input_texts, batch_size)
+	results = []
+
+	for batch_texts in tqdm(batch_input_texts):
+		input_text = tokenizer.encode_batch(batch_texts)
+		input_ids = np.array([e.ids for e in input_text])
+		token_type_ids = np.array([e.type_ids for e in input_text])
+		attention_mask = np.array([e.attention_mask for e in input_text])
+
+		use_token_type_ids = token_type_ids is not None and not np.all(
+			token_type_ids == 0
+		)
+
+		onnx_input = {
+			"input_ids": input_ids.astype(np.int64),
+			"attention_mask": attention_mask.astype(np.int64),
+		}
+		if use_token_type_ids:
+			onnx_input["token_type_ids"] = token_type_ids.astype(np.int64)
+
+		outputs = session.run(None, onnx_input)
+
+		logits = outputs[0]
+
+		if logits.shape[1] == 1:
+			scores = 1 / (1 + np.exp(-logits.flatten()))
+		else:
+			exp_logits = np.exp(logits)
+			scores = exp_logits[:, 1] / np.sum(exp_logits, axis=1)
+		results.extend(scores)
+	return results
--- a/autorag/nodes/passagereranker/jina.py
+++ b/autorag/nodes/passagereranker/jina.py
@@ -0,0 +1,115 @@
+import os
+from typing import List, Tuple
+
+import aiohttp
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import get_event_loop, process_batch, result_to_dataframe
+
+JINA_API_URL = "https://api.jina.ai/v1/rerank"
+
+
+class JinaReranker(BasePassageReranker):
+	def __init__(self, project_dir: str, api_key: str = None, *args, **kwargs):
+		"""
+		Initialize Jina rerank node.
+
+		:param project_dir: The project directory path.
+		:param api_key: The API key for Jina rerank.
+		You can set it in the environment variable JINAAI_API_KEY.
+		Or, you can directly set it on the config YAML file using this parameter.
+		Default is env variable "JINAAI_API_KEY".
+		:param kwargs: Extra arguments that are not affected
+		"""
+		super().__init__(project_dir)
+		if api_key is None:
+			api_key = os.getenv("JINAAI_API_KEY", None)
+			if api_key is None:
+				raise ValueError(
+					"API key is not provided."
+					"You can set it as an argument or as an environment variable 'JINAAI_API_KEY'"
+				)
+		self.session = aiohttp.ClientSession(loop=get_event_loop())
+		self.session.headers.update(
+			{"Authorization": f"Bearer {api_key}", "Accept-Encoding": "identity"}
+		)
+
+	def __del__(self):
+		self.session.close()
+		del self.session
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 8)
+		model = kwargs.pop("model", "jina-reranker-v1-base-en")
+		return self._pure(queries, contents, ids, top_k, model, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		model: str = "jina-reranker-v1-base-en",
+		batch: int = 8,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents with Jina rerank models.
+		You can get the API key from https://jina.ai/reranker and set it in the environment variable JINAAI_API_KEY.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param model: The model name for Cohere rerank.
+		    You can choose between "jina-reranker-v1-base-en" and "jina-colbert-v1-en".
+		    Default is "jina-reranker-v1-base-en".
+		:param batch: The number of queries to be processed in a batch
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		tasks = [
+			jina_reranker_pure(
+				self.session, query, contents, ids, top_k=top_k, model=model
+			)
+			for query, contents, ids in zip(queries, contents_list, ids_list)
+		]
+		loop = get_event_loop()
+		results = loop.run_until_complete(process_batch(tasks, batch))
+
+		content_result, id_result, score_result = zip(*results)
+
+		return list(content_result), list(id_result), list(score_result)
+
+
+async def jina_reranker_pure(
+	session,
+	query: str,
+	contents: List[str],
+	ids: List[str],
+	top_k: int,
+	model: str = "jina-reranker-v1-base-en",
+) -> Tuple[List[str], List[str], List[float]]:
+	async with session.post(
+		JINA_API_URL,
+		json={
+			"query": query,
+			"documents": contents,
+			"model": model,
+			"top_n": top_k,
+		},
+	) as resp:
+		resp_json = await resp.json()
+		if "results" not in resp_json:
+			raise RuntimeError(f"Invalid response from Jina API: {resp_json['detail']}")
+
+		results = resp_json["results"]
+		indices = list(map(lambda x: x["index"], results))
+		score_result = list(map(lambda x: x["relevance_score"], results))
+		id_result = list(map(lambda x: ids[x], indices))
+		content_result = list(map(lambda x: contents[x], indices))
+
+		return content_result, id_result, score_result
--- a/autorag/nodes/passagereranker/koreranker.py
+++ b/autorag/nodes/passagereranker/koreranker.py
@@ -0,0 +1,136 @@
+from typing import List, Tuple
+
+import numpy as np
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import (
+	make_batch,
+	sort_by_scores,
+	flatten_apply,
+	select_top_k,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class KoReranker(BasePassageReranker):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		super().__init__(project_dir)
+		try:
+			import torch
+			from transformers import AutoModelForSequenceClassification, AutoTokenizer
+		except ImportError:
+			raise ImportError("For using KoReranker, please install torch first.")
+
+		model_path = "Dongjin-kr/ko-reranker"
+		self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+		self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
+		self.model.eval()
+		# Determine the device to run the model on (GPU if available, otherwise CPU)
+		self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+		self.model.to(self.device)
+
+	def __del__(self):
+		del self.model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 64)
+		return self._pure(queries, contents, ids, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using ko-reranker.
+		ko-reranker is a reranker based on korean (https://huggingface.co/Dongjin-kr/ko-reranker).
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+		    Default is 64.
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		nested_list = [
+			list(map(lambda x: [query, x], content_list))
+			for query, content_list in zip(queries, contents_list)
+		]
+		scores_nps = flatten_apply(
+			koreranker_run_model,
+			nested_list,
+			model=self.model,
+			batch_size=batch,
+			tokenizer=self.tokenizer,
+			device=self.device,
+		)
+
+		rerank_scores = list(
+			map(
+				lambda scores: exp_normalize(np.array(scores)).astype(float), scores_nps
+			)
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def koreranker_run_model(input_texts, model, tokenizer, device, batch_size: int):
+	try:
+		import torch
+	except ImportError:
+		raise ImportError("For using KoReranker, please install torch first.")
+	batch_input_texts = make_batch(input_texts, batch_size)
+	results = []
+	for batch_texts in batch_input_texts:
+		inputs = tokenizer(
+			batch_texts,
+			padding=True,
+			truncation=True,
+			return_tensors="pt",
+			max_length=512,
+		)
+		inputs = inputs.to(device)
+		with torch.no_grad():
+			scores = (
+				model(**inputs, return_dict=True)
+				.logits.view(
+					-1,
+				)
+				.float()
+			)
+			scores_np = scores.cpu().numpy()
+			results.extend(scores_np)
+	return results
+
+
+def exp_normalize(x):
+	b = x.max()
+	y = np.exp(x - b)
+	return y / y.sum()
--- a/autorag/nodes/passagereranker/mixedbreadai.py
+++ b/autorag/nodes/passagereranker/mixedbreadai.py
@@ -0,0 +1,126 @@
+import os
+from typing import List, Tuple
+
+import pandas as pd
+from mixedbread_ai.client import AsyncMixedbreadAI
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import (
+	result_to_dataframe,
+	get_event_loop,
+	process_batch,
+	pop_params,
+)
+
+
+class MixedbreadAIReranker(BasePassageReranker):
+	def __init__(
+		self,
+		project_dir: str,
+		*args,
+		**kwargs,
+	):
+		"""
+		Initialize mixedbread-ai rerank node.
+
+		:param project_dir: The project directory path.
+		:param api_key: The API key for MixedbreadAI rerank.
+		    You can set it in the environment variable MXBAI_API_KEY.
+		    Or, you can directly set it on the config YAML file using this parameter.
+		    Default is env variable "MXBAI_API_KEY".
+		:param kwargs: Extra arguments that are not affected
+		"""
+		super().__init__(project_dir)
+		api_key = kwargs.pop("api_key", None)
+		api_key = os.getenv("MXBAI_API_KEY", None) if api_key is None else api_key
+		if api_key is None:
+			raise KeyError(
+				"Please set the API key for Mixedbread AI rerank in the environment variable MXBAI_API_KEY "
+				"or directly set it on the config YAML file."
+			)
+		self.client = AsyncMixedbreadAI(api_key=api_key)
+
+	def __del__(self):
+		del self.client
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, scores, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 8)
+		model = kwargs.pop("model", "mixedbread-ai/mxbai-rerank-large-v1")
+		rerank_params = pop_params(self.client.reranking, kwargs)
+		return self._pure(queries, contents, ids, top_k, model, batch, **rerank_params)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		model: str = "mixedbread-ai/mxbai-rerank-large-v1",
+		batch: int = 8,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents with mixedbread-ai rerank models.
+		You can get the API key from https://www.mixedbread.ai/api-reference#quick-start-guide and set it in the environment variable MXBAI_API_KEY.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param model: The model name for mixedbread-ai rerank.
+			You can choose between "mixedbread-ai/mxbai-rerank-large-v1", "mixedbread-ai/mxbai-rerank-base-v1" and "mixedbread-ai/mxbai-rerank-xsmall-v1".
+			Default is "mixedbread-ai/mxbai-rerank-large-v1".
+		:param batch: The number of queries to be processed in a batch
+		        :return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		tasks = [
+			mixedbreadai_rerank_pure(
+				self.client, query, contents, ids, top_k=top_k, model=model
+			)
+			for query, contents, ids in zip(queries, contents_list, ids_list)
+		]
+		loop = get_event_loop()
+		results = loop.run_until_complete(process_batch(tasks, batch))
+
+		content_result, id_result, score_result = zip(*results)
+
+		return list(content_result), list(id_result), list(score_result)
+
+
+async def mixedbreadai_rerank_pure(
+	client: AsyncMixedbreadAI,
+	query: str,
+	documents: List[str],
+	ids: List[str],
+	top_k: int,
+	model: str = "mixedbread-ai/mxbai-rerank-large-v1",
+) -> Tuple[List[str], List[str], List[float]]:
+	"""
+	Rerank a list of contents with mixedbread-ai rerank models.
+
+	:param client: The mixedbread-ai client to use for reranking
+	:param query: The query to use for reranking
+	:param documents: The list of contents to rerank
+	:param ids: The list of ids corresponding to the documents
+	:param top_k: The number of passages to be retrieved
+	:param model: The model name for mixedbread-ai rerank.
+	    You can choose between "mixedbread-ai/mxbai-rerank-large-v1" and "mixedbread-ai/mxbai-rerank-base-v1".
+	    Default is "mixedbread-ai/mxbai-rerank-large-v1".
+	:return: Tuple of lists containing the reranked contents, ids, and scores
+	"""
+
+	results = await client.reranking(
+		query=query,
+		input=documents,
+		top_k=top_k,
+		model=model,
+	)
+	reranked_scores: List[float] = list(map(lambda x: x.score, results.data))
+	reranked_scores_float = list(map(float, reranked_scores))
+	indices = list(map(lambda x: x.index, results.data))
+	reranked_contents = list(map(lambda x: documents[x], indices))
+	reranked_ids: List[str] = list(map(lambda i: ids[i], indices))
+	return reranked_contents, reranked_ids, reranked_scores_float
--- a/autorag/nodes/passagereranker/monot5.py
+++ b/autorag/nodes/passagereranker/monot5.py
@@ -0,0 +1,190 @@
+from itertools import chain
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import (
+	make_batch,
+	sort_by_scores,
+	flatten_apply,
+	select_top_k,
+	result_to_dataframe,
+	pop_params,
+	empty_cuda_cache,
+)
+
+prediction_tokens = {
+	"castorini/monot5-base-msmarco": ["▁false", "▁true"],
+	"castorini/monot5-base-msmarco-10k": ["▁false", "▁true"],
+	"castorini/monot5-large-msmarco": ["▁false", "▁true"],
+	"castorini/monot5-large-msmarco-10k": ["▁false", "▁true"],
+	"castorini/monot5-base-med-msmarco": ["▁false", "▁true"],
+	"castorini/monot5-3b-med-msmarco": ["▁false", "▁true"],
+	"castorini/monot5-3b-msmarco-10k": ["▁false", "▁true"],
+	"unicamp-dl/mt5-base-en-msmarco": ["▁no", "▁yes"],
+	"unicamp-dl/ptt5-base-pt-msmarco-10k-v2": ["▁não", "▁sim"],
+	"unicamp-dl/ptt5-base-pt-msmarco-100k-v2": ["▁não", "▁sim"],
+	"unicamp-dl/ptt5-base-en-pt-msmarco-100k-v2": ["▁não", "▁sim"],
+	"unicamp-dl/mt5-base-en-pt-msmarco-v2": ["▁no", "▁yes"],
+	"unicamp-dl/mt5-base-mmarco-v2": ["▁no", "▁yes"],
+	"unicamp-dl/mt5-base-en-pt-msmarco-v1": ["▁no", "▁yes"],
+	"unicamp-dl/mt5-base-mmarco-v1": ["▁no", "▁yes"],
+	"unicamp-dl/ptt5-base-pt-msmarco-10k-v1": ["▁não", "▁sim"],
+	"unicamp-dl/ptt5-base-pt-msmarco-100k-v1": ["▁não", "▁sim"],
+	"unicamp-dl/ptt5-base-en-pt-msmarco-10k-v1": ["▁não", "▁sim"],
+	"unicamp-dl/mt5-3B-mmarco-en-pt": ["▁", "▁true"],
+	"unicamp-dl/mt5-13b-mmarco-100k": ["▁", "▁true"],
+}
+
+
+class MonoT5(BasePassageReranker):
+	def __init__(
+		self,
+		project_dir: str,
+		model_name: str = "castorini/monot5-3b-msmarco-10k",
+		*args,
+		**kwargs,
+	):
+		"""
+		Initialize the MonoT5 reranker.
+
+		:param project_dir: The project directory
+		:param model_name: The name of the MonoT5 model to use for reranking
+			Note: default model name is 'castorini/monot5-3b-msmarco-10k'
+				If there is a '/' in the model name parameter,
+				when we create the file to store the results, the path will be twisted because of the '/'.
+				Therefore, it will be received as '_' instead of '/'.
+		:param kwargs: The extra arguments for the MonoT5 reranker
+		"""
+		super().__init__(project_dir)
+		try:
+			import torch
+			from transformers import T5Tokenizer, T5ForConditionalGeneration
+		except ImportError:
+			raise ImportError("For using MonoT5 Reranker, please install torch first.")
+		# replace '_' to '/'
+		if "_" in model_name:
+			model_name = model_name.replace("_", "/")
+		# Load the tokenizer and model from the pre-trained MonoT5 model
+		self.tokenizer = T5Tokenizer.from_pretrained(model_name)
+		model_params = pop_params(T5ForConditionalGeneration.from_pretrained, kwargs)
+		self.model = T5ForConditionalGeneration.from_pretrained(
+			model_name, **model_params
+		).eval()
+
+		# Determine the device to run the model on (GPU if available, otherwise CPU)
+		self.device = "cuda" if torch.cuda.is_available() else "cpu"
+		self.model.to(self.device)
+
+		token_false, token_true = prediction_tokens[model_name]
+		self.token_false_id = self.tokenizer.convert_tokens_to_ids(token_false)
+		self.token_true_id = self.tokenizer.convert_tokens_to_ids(token_true)
+
+	def __del__(self):
+		del self.model
+		del self.tokenizer
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.get("top_k", 3)
+		batch = kwargs.get("batch", 64)
+		return self._pure(queries, contents, ids, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using MonoT5.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+
+		:param batch: The number of queries to be processed in a batch
+		:return: tuple of lists containing the reranked contents, ids, and scores
+		"""
+		# Retrieve the tokens used by the model to represent false and true predictions
+
+		nested_list = [
+			list(map(lambda x: [f"Query: {query} Document: {x}"], content_list))
+			for query, content_list in zip(queries, contents_list)
+		]
+
+		rerank_scores = flatten_apply(
+			monot5_run_model,
+			nested_list,
+			model=self.model,
+			batch_size=batch,
+			tokenizer=self.tokenizer,
+			device=self.device,
+			token_false_id=self.token_false_id,
+			token_true_id=self.token_true_id,
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def monot5_run_model(
+	input_texts,
+	model,
+	batch_size: int,
+	tokenizer,
+	device,
+	token_false_id,
+	token_true_id,
+):
+	try:
+		import torch
+	except ImportError:
+		raise ImportError("For using MonoT5 Reranker, please install torch first.")
+	batch_input_texts = make_batch(input_texts, batch_size)
+	results = []
+	for batch_texts in batch_input_texts:
+		flattened_batch_texts = list(chain.from_iterable(batch_texts))
+		input_encodings = tokenizer(
+			flattened_batch_texts,
+			padding=True,
+			truncation=True,
+			max_length=512,
+			return_tensors="pt",
+		).to(device)
+		with torch.no_grad():
+			outputs = model.generate(
+				input_ids=input_encodings["input_ids"],
+				attention_mask=input_encodings["attention_mask"],
+				output_scores=True,
+				return_dict_in_generate=True,
+			)
+
+		# Extract logits for the 'false' and 'true' tokens from the model's output
+		logits = outputs.scores[-1][:, [token_false_id, token_true_id]]
+		# Calculate the softmax probability of the 'true' token
+		probs = torch.nn.functional.softmax(logits, dim=-1)[:, 1]
+		results.extend(probs.tolist())
+	return results
--- a/autorag/nodes/passagereranker/openvino.py
+++ b/autorag/nodes/passagereranker/openvino.py
@@ -0,0 +1,191 @@
+from pathlib import Path
+from typing import Any, List, Tuple
+
+import numpy as np
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+
+
+from autorag.utils.util import (
+	make_batch,
+	sort_by_scores,
+	flatten_apply,
+	select_top_k,
+	result_to_dataframe,
+	pop_params,
+	empty_cuda_cache,
+)
+
+
+class OpenVINOReranker(BasePassageReranker):
+	def __init__(
+		self,
+		project_dir: str,
+		model: str = "BAAI/bge-reranker-large",
+		*args,
+		**kwargs,
+	):
+		super().__init__(project_dir)
+
+		try:
+			from huggingface_hub import HfApi
+			from transformers import AutoTokenizer
+		except ImportError as e:
+			raise ValueError(
+				"Could not import huggingface_hub python package. "
+				"Please install it with: "
+				"`pip install -U huggingface_hub`."
+			) from e
+
+		def require_model_export(
+			model_id: str, revision: Any = None, subfolder: Any = None
+		) -> bool:
+			model_dir = Path(model_id)
+			if subfolder is not None:
+				model_dir = model_dir / subfolder
+			if model_dir.is_dir():
+				return (
+					not (model_dir / "openvino_model.xml").exists()
+					or not (model_dir / "openvino_model.bin").exists()
+				)
+			hf_api = HfApi()
+			try:
+				model_info = hf_api.model_info(model_id, revision=revision or "main")
+				normalized_subfolder = (
+					None if subfolder is None else Path(subfolder).as_posix()
+				)
+				model_files = [
+					file.rfilename
+					for file in model_info.siblings
+					if normalized_subfolder is None
+					or file.rfilename.startswith(normalized_subfolder)
+				]
+				ov_model_path = (
+					"openvino_model.xml"
+					if subfolder is None
+					else f"{normalized_subfolder}/openvino_model.xml"
+				)
+				return (
+					ov_model_path not in model_files
+					or ov_model_path.replace(".xml", ".bin") not in model_files
+				)
+			except Exception:
+				return True
+
+		try:
+			from optimum.intel.openvino import OVModelForSequenceClassification
+		except ImportError:
+			raise ImportError(
+				"Please install optimum package to use OpenVINOReranker"
+				"pip install 'optimum[openvino,nncf]'"
+			)
+
+		model_kwargs = pop_params(
+			OVModelForSequenceClassification.from_pretrained, kwargs
+		)
+
+		if require_model_export(model):
+			# use remote model
+			self.model = OVModelForSequenceClassification.from_pretrained(
+				model, export=True, **model_kwargs
+			)
+		else:
+			# use local model
+			self.model = OVModelForSequenceClassification.from_pretrained(
+				model, **model_kwargs
+			)
+
+		self.tokenizer = AutoTokenizer.from_pretrained(model)
+
+	def __del__(self):
+		del self.model
+		del self.tokenizer
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.get("top_k", 3)
+		batch = kwargs.get("batch", 64)
+		return self._pure(queries, contents, ids, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using MonoT5.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+
+		:param batch: The number of queries to be processed in a batch
+		:return: tuple of lists containing the reranked contents, ids, and scores
+		"""
+		# Retrieve the tokens used by the model to represent false and true predictions
+
+		nested_list = [
+			list(map(lambda x: [query, x], content_list))
+			for query, content_list in zip(queries, contents_list)
+		]
+
+		rerank_scores = flatten_apply(
+			openvino_run_model,
+			nested_list,
+			model=self.model,
+			batch_size=batch,
+			tokenizer=self.tokenizer,
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def openvino_run_model(
+	input_texts,
+	model,
+	batch_size: int,
+	tokenizer,
+):
+	batch_input_texts = make_batch(input_texts, batch_size)
+	results = []
+	for batch_texts in batch_input_texts:
+		input_tensors = tokenizer(
+			batch_texts,
+			padding=True,
+			truncation=True,
+			return_tensors="pt",
+		)
+
+		outputs = model(**input_tensors, return_dict=True)
+		if outputs[0].shape[1] > 1:
+			scores = outputs[0][:, 1]
+		else:
+			scores = outputs[0].flatten()
+
+		scores = list(map(float, (1 / (1 + np.exp(-np.array(scores))))))
+		results.extend(scores)
+	return results
--- a/autorag/nodes/passagereranker/pass_reranker.py
+++ b/autorag/nodes/passagereranker/pass_reranker.py
@@ -0,0 +1,31 @@
+from typing import List
+
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils import result_to_dataframe
+
+
+class PassReranker(BasePassageReranker):
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		top_k = kwargs.pop("top_k")
+
+		_, contents_list, scores_list, ids_list = self.cast_to_run(previous_result)
+		return self._pure(contents_list, scores_list, ids_list, top_k)
+
+	def _pure(
+		self,
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		top_k: int,
+	):
+		"""
+		Do not perform reranking.
+		Return the given top-k passages as is.
+		"""
+		contents_list = list(map(lambda x: x[:top_k], contents_list))
+		scores_list = list(map(lambda x: x[:top_k], scores_list))
+		ids_list = list(map(lambda x: x[:top_k], ids_list))
+		return contents_list, ids_list, scores_list
--- a/autorag/nodes/passagereranker/rankgpt.py
+++ b/autorag/nodes/passagereranker/rankgpt.py
@@ -0,0 +1,170 @@
+from typing import List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import pandas as pd
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse
+from llama_index.core.llms import LLM
+from llama_index.core.postprocessor.rankGPT_rerank import RankGPTRerank
+from llama_index.core.schema import NodeWithScore, QueryBundle, TextNode
+from llama_index.core.utils import print_text
+from llama_index.llms.openai import OpenAI
+
+from autorag import generator_models
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import (
+	get_event_loop,
+	process_batch,
+	pop_params,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class RankGPT(BasePassageReranker):
+	def __init__(
+		self, project_dir: str, llm: Optional[Union[str, LLM]] = None, **kwargs
+	):
+		"""
+		Initialize the RankGPT reranker.
+
+		:param project_dir: The project directory
+		:param llm: The LLM model to use for RankGPT rerank.
+			It is a llama index model.
+			Default is the OpenAI model with gpt-4o-mini.
+		:param kwargs: The keyword arguments for the LLM model.
+		"""
+		super().__init__(project_dir)
+		if llm is None:
+			self.llm = OpenAI(model="gpt-4o-mini")
+		else:
+			if not isinstance(llm, LLM):
+				llm_class = generator_models[llm]
+				llm_param = pop_params(llm_class.__init__, kwargs)
+				self.llm = llm_class(**llm_param)
+			else:
+				self.llm = llm
+
+	def __del__(self):
+		del self.llm
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, scores, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.get("top_k", 1)
+		verbose = kwargs.get("verbose", False)
+		rankgpt_rerank_prompt = kwargs.get("rankgpt_rerank_prompt", None)
+		batch = kwargs.get("batch", 16)
+		return self._pure(
+			queries=queries,
+			contents_list=contents,
+			scores_list=scores,
+			ids_list=ids,
+			top_k=top_k,
+			verbose=verbose,
+			rankgpt_rerank_prompt=rankgpt_rerank_prompt,
+			batch=batch,
+		)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		top_k: int,
+		verbose: bool = False,
+		rankgpt_rerank_prompt: Optional[str] = None,
+		batch: int = 16,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank given context paragraphs using RankGPT.
+		Return pseudo scores, since the actual scores are not available on RankGPT.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param scores_list: The list of lists of scores retrieved from the initial ranking
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param verbose: Whether to print intermediate steps.
+		:param rankgpt_rerank_prompt: The prompt template for RankGPT rerank.
+			Default is RankGPT's default prompt.
+		:param batch: The number of queries to be processed in a batch.
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		query_bundles = list(map(lambda query: QueryBundle(query_str=query), queries))
+		nodes_list = [
+			list(
+				map(
+					lambda x: NodeWithScore(node=TextNode(text=x[0]), score=x[1]),
+					zip(content_list, score_list),
+				)
+			)
+			for content_list, score_list in zip(contents_list, scores_list)
+		]
+
+		reranker = AsyncRankGPTRerank(
+			top_n=top_k,
+			llm=self.llm,
+			verbose=verbose,
+			rankgpt_rerank_prompt=rankgpt_rerank_prompt,
+		)
+
+		tasks = [
+			reranker.async_postprocess_nodes(nodes, query, ids)
+			for nodes, query, ids in zip(nodes_list, query_bundles, ids_list)
+		]
+		loop = get_event_loop()
+		rerank_result = loop.run_until_complete(process_batch(tasks, batch_size=batch))
+		content_result = [
+			list(map(lambda x: x.node.text, res[0])) for res in rerank_result
+		]
+		score_result = [
+			np.linspace(1.0, 0.0, len(res[0])).tolist() for res in rerank_result
+		]
+		id_result = [res[1] for res in rerank_result]
+
+		del reranker
+
+		return content_result, id_result, score_result
+
+
+class AsyncRankGPTRerank(RankGPTRerank):
+	async def async_run_llm(self, messages: Sequence[ChatMessage]) -> ChatResponse:
+		return await self.llm.achat(messages)
+
+	async def async_postprocess_nodes(
+		self,
+		nodes: List[NodeWithScore],
+		query_bundle: QueryBundle,
+		ids: Optional[List[str]] = None,
+	) -> Tuple[List[NodeWithScore], List[str]]:
+		if ids is None:
+			ids = [str(i) for i in range(len(nodes))]
+
+		items = {
+			"query": query_bundle.query_str,
+			"hits": [{"content": node.get_content()} for node in nodes],
+		}
+
+		messages = self.create_permutation_instruction(item=items)
+		permutation = await self.async_run_llm(messages=messages)
+		if permutation.message is not None and permutation.message.content is not None:
+			rerank_ranks = self._receive_permutation(
+				items, str(permutation.message.content)
+			)
+			if self.verbose:
+				print_text(f"After Reranking, new rank list for nodes: {rerank_ranks}")
+
+			initial_results: List[NodeWithScore] = []
+			id_results = []
+
+			for idx in rerank_ranks:
+				initial_results.append(
+					NodeWithScore(node=nodes[idx].node, score=nodes[idx].score)
+				)
+				id_results.append(ids[idx])
+			return initial_results[: self.top_n], id_results[: self.top_n]
+		else:
+			return nodes[: self.top_n], ids[: self.top_n]
--- a/autorag/nodes/passagereranker/run.py
+++ b/autorag/nodes/passagereranker/run.py
@@ -0,0 +1,145 @@
+import logging
+import os
+import pathlib
+from typing import List, Dict
+
+import pandas as pd
+
+from autorag.nodes.retrieval.run import evaluate_retrieval_node
+from autorag.schema.metricinput import MetricInput
+from autorag.strategy import measure_speed, filter_by_threshold, select_best
+from autorag.utils.util import apply_recursive, to_list
+
+logger = logging.getLogger("AutoRAG")
+
+
+def run_passage_reranker_node(
+	modules: List,
+	module_params: List[Dict],
+	previous_result: pd.DataFrame,
+	node_line_dir: str,
+	strategies: Dict,
+) -> pd.DataFrame:
+	"""
+	Run evaluation and select the best module among passage reranker node results.
+
+	:param modules: Passage reranker modules to run.
+	:param module_params: Passage reranker module parameters.
+	:param previous_result: Previous result dataframe.
+	    Could be retrieval, reranker modules result.
+	    It means it must contain 'query', 'retrieved_contents', 'retrieved_ids', 'retrieve_scores' columns.
+	:param node_line_dir: This node line's directory.
+	:param strategies: Strategies for passage reranker node.
+	    In this node, we use 'retrieval_f1', 'retrieval_recall' and 'retrieval_precision'.
+	    You can skip evaluation when you use only one module and a module parameter.
+	:return: The best result dataframe with previous result columns.
+	"""
+	if not os.path.exists(node_line_dir):
+		os.makedirs(node_line_dir)
+	project_dir = pathlib.PurePath(node_line_dir).parent.parent
+	qa_df = pd.read_parquet(
+		os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
+	)
+	retrieval_gt = qa_df["retrieval_gt"].tolist()
+	retrieval_gt = apply_recursive(lambda x: str(x), to_list(retrieval_gt))
+
+	# make rows to metric_inputs
+	metric_inputs = [
+		MetricInput(retrieval_gt=ret_gt, query=query, generation_gt=gen_gt)
+		for ret_gt, query, gen_gt in zip(
+			retrieval_gt, qa_df["query"].tolist(), qa_df["generation_gt"].tolist()
+		)
+	]
+
+	results, execution_times = zip(
+		*map(
+			lambda task: measure_speed(
+				task[0].run_evaluator,
+				project_dir=project_dir,
+				previous_result=previous_result,
+				**task[1],
+			),
+			zip(modules, module_params),
+		)
+	)
+	average_times = list(map(lambda x: x / len(results[0]), execution_times))
+
+	# run metrics before filtering
+	if strategies.get("metrics") is None:
+		raise ValueError(
+			"You must at least one metrics for passage_reranker evaluation."
+		)
+	results = list(
+		map(
+			lambda x: evaluate_retrieval_node(
+				x,
+				metric_inputs,
+				strategies.get("metrics"),
+			),
+			results,
+		)
+	)
+
+	# save results to folder
+	save_dir = os.path.join(node_line_dir, "passage_reranker")  # node name
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+	filepaths = list(
+		map(lambda x: os.path.join(save_dir, f"{x}.parquet"), range(len(modules)))
+	)
+	list(
+		map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
+	)  # execute save to parquet
+	filenames = list(map(lambda x: os.path.basename(x), filepaths))
+
+	summary_df = pd.DataFrame(
+		{
+			"filename": filenames,
+			"module_name": list(map(lambda module: module.__name__, modules)),
+			"module_params": module_params,
+			"execution_time": average_times,
+			**{
+				f"passage_reranker_{metric}": list(
+					map(lambda result: result[metric].mean(), results)
+				)
+				for metric in strategies.get("metrics")
+			},
+		}
+	)
+
+	# filter by strategies
+	if strategies.get("speed_threshold") is not None:
+		results, filenames = filter_by_threshold(
+			results, average_times, strategies["speed_threshold"], filenames
+		)
+	selected_result, selected_filename = select_best(
+		results,
+		strategies.get("metrics"),
+		filenames,
+		strategies.get("strategy", "mean"),
+	)
+	# change metric name columns to passage_reranker_metric_name
+	selected_result = selected_result.rename(
+		columns={
+			metric_name: f"passage_reranker_{metric_name}"
+			for metric_name in strategies["metrics"]
+		}
+	)
+	# drop retrieval result columns in previous_result
+	previous_result = previous_result.drop(
+		columns=["retrieved_contents", "retrieved_ids", "retrieve_scores"]
+	)
+	best_result = pd.concat([previous_result, selected_result], axis=1)
+
+	# add 'is_best' column to summary file
+	summary_df["is_best"] = summary_df["filename"] == selected_filename
+
+	# save files
+	summary_df.to_csv(os.path.join(save_dir, "summary.csv"), index=False)
+	best_result.to_parquet(
+		os.path.join(
+			save_dir, f"best_{os.path.splitext(selected_filename)[0]}.parquet"
+		),
+		index=False,
+	)
+	return best_result
--- a/autorag/nodes/passagereranker/sentence_transformer.py
+++ b/autorag/nodes/passagereranker/sentence_transformer.py
@@ -0,0 +1,129 @@
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import (
+	flatten_apply,
+	make_batch,
+	select_top_k,
+	sort_by_scores,
+	pop_params,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class SentenceTransformerReranker(BasePassageReranker):
+	def __init__(
+		self,
+		project_dir: str,
+		model_name: str = "cross-encoder/ms-marco-MiniLM-L-2-v2",
+		*args,
+		**kwargs,
+	):
+		"""
+		Initialize the Sentence Transformer reranker node.
+
+		:param project_dir: The project directory
+		:param model_name: The name of the Sentence Transformer model to use for reranking
+		Default is "cross-encoder/ms-marco-MiniLM-L-2-v2"
+		:param kwargs: The CrossEncoder parameters
+		"""
+		super().__init__(project_dir, *args, **kwargs)
+		try:
+			import torch
+			from sentence_transformers import CrossEncoder
+		except ImportError:
+			raise ImportError(
+				"You have to install AutoRAG[gpu] to use SentenceTransformerReranker"
+			)
+		self.device = "cuda" if torch.cuda.is_available() else "cpu"
+		model_params = pop_params(CrossEncoder.__init__, kwargs)
+		self.model = CrossEncoder(model_name, device=self.device, **model_params)
+
+	def __del__(self):
+		del self.model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		"""
+		Rerank a list of contents based on their relevance to a query using a Sentence Transformer model.
+
+		:param previous_result: The previous result
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+		:return: pd DataFrame containing the reranked contents, ids, and scores
+		"""
+		queries, contents_list, scores_list, ids_list = self.cast_to_run(
+			previous_result
+		)
+		top_k = kwargs.get("top_k", 1)
+		batch = kwargs.get("batch", 64)
+		return self._pure(queries, contents_list, ids_list, top_k, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using a Sentence Transformer model.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param batch: The number of queries to be processed in a batch
+
+		:return: tuple of lists containing the reranked contents, ids, and scores
+		"""
+		nested_list = [
+			list(map(lambda x: [query, x], content_list))
+			for query, content_list in zip(queries, contents_list)
+		]
+		rerank_scores = flatten_apply(
+			sentence_transformer_run_model,
+			nested_list,
+			model=self.model,
+			batch_size=batch,
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def sentence_transformer_run_model(input_texts, model, batch_size: int):
+	try:
+		import torch
+	except ImportError:
+		raise ImportError(
+			"You have to install AutoRAG[gpu] to use SentenceTransformerReranker"
+		)
+	batch_input_texts = make_batch(input_texts, batch_size)
+	results = []
+	for batch_texts in batch_input_texts:
+		with torch.no_grad():
+			pred_scores = model.predict(sentences=batch_texts, apply_softmax=True)
+		results.extend(pred_scores.tolist())
+	return results
--- a/autorag/nodes/passagereranker/tart/init.py
+++ b/autorag/nodes/passagereranker/tart/init.py
@@ -0,0 +1 @@
+from .tart import Tart
--- a/autorag/nodes/passagereranker/tart/modeling_enc_t5.py
+++ b/autorag/nodes/passagereranker/tart/modeling_enc_t5.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import copy
+
+from transformers.modeling_outputs import SequenceClassifierOutput
+from transformers.models.t5.modeling_t5 import T5Config, T5PreTrainedModel, T5Stack
+from transformers.utils.model_parallel_utils import assert_device_map, get_device_map
+
+from autorag.utils.util import empty_cuda_cache
+
+
+class EncT5ForSequenceClassification(T5PreTrainedModel):
+	_keys_to_ignore_on_load_missing = [
+		r"encoder\.embed_tokens\.weight",
+	]
+
+	def __init__(self, config: T5Config, dropout=0.1):
+		super().__init__(config)
+		try:
+			from torch import nn
+		except ImportError:
+			raise ImportError("Please install PyTorch to use TART reranker.")
+		self.num_labels = config.num_labels
+		self.config = config
+
+		self.shared = nn.Embedding(config.vocab_size, config.d_model)
+
+		encoder_config = copy.deepcopy(config)
+		encoder_config.use_cache = False
+		encoder_config.is_encoder_decoder = False
+		self.encoder = T5Stack(encoder_config, self.shared)
+
+		self.dropout = nn.Dropout(dropout)
+		self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+
+		# Initialize weights and apply final processing
+		self.post_init()
+
+		# Model parallel
+		self.model_parallel = False
+		self.device_map = None
+
+	def parallelize(self, device_map=None):
+		try:
+			import torch
+		except ImportError:
+			raise ImportError("Please install PyTorch to use TART reranker.")
+		self.device_map = (
+			get_device_map(len(self.encoder.block), range(torch.cuda.device_count()))
+			if device_map is None
+			else device_map
+		)
+		assert_device_map(self.device_map, len(self.encoder.block))
+		self.encoder.parallelize(self.device_map)
+		self.classifier = self.classifier.to(self.encoder.first_device)
+		self.model_parallel = True
+
+	def deparallelize(self):
+		self.encoder.deparallelize()
+		self.encoder = self.encoder.to("cpu")
+		self.model_parallel = False
+		self.device_map = None
+		empty_cuda_cache()
+
+	def get_input_embeddings(self):
+		return self.shared
+
+	def set_input_embeddings(self, new_embeddings):
+		self.shared = new_embeddings
+		self.encoder.set_input_embeddings(new_embeddings)
+
+	def get_encoder(self):
+		return self.encoder
+
+	def _prune_heads(self, heads_to_prune):
+		"""
+		Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
+		class PreTrainedModel
+		"""
+		for layer, heads in heads_to_prune.items():
+			self.encoder.layer[layer].attention.prune_heads(heads)
+
+	def forward(
+		self,
+		input_ids=None,
+		attention_mask=None,
+		head_mask=None,
+		inputs_embeds=None,
+		labels=None,
+		output_attentions=None,
+		output_hidden_states=None,
+		return_dict=None,
+	):
+		try:
+			import torch
+			from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+		except ImportError:
+			raise ImportError("Please install PyTorch to use TART reranker.")
+		return_dict = (
+			return_dict if return_dict is not None else self.config.use_return_dict
+		)
+
+		outputs = self.encoder(
+			input_ids=input_ids,
+			attention_mask=attention_mask,
+			inputs_embeds=inputs_embeds,
+			head_mask=head_mask,
+			output_attentions=output_attentions,
+			output_hidden_states=output_hidden_states,
+			return_dict=return_dict,
+		)
+
+		hidden_states = outputs[0]
+		pooled_output = hidden_states[:, 0, :]  # Take bos token (equiv. to <s>)
+
+		pooled_output = self.dropout(pooled_output)
+		logits = self.classifier(pooled_output)
+
+		loss = None
+		if labels is not None:
+			if self.config.problem_type is None:
+				if self.num_labels == 1:
+					self.config.problem_type = "regression"
+				elif self.num_labels > 1 and (
+					labels.dtype == torch.long or labels.dtype == torch.int
+				):
+					self.config.problem_type = "single_label_classification"
+				else:
+					self.config.problem_type = "multi_label_classification"
+
+			if self.config.problem_type == "regression":
+				loss_fct = MSELoss()
+				if self.num_labels == 1:
+					loss = loss_fct(logits.squeeze(), labels.squeeze())
+				else:
+					loss = loss_fct(logits, labels)
+			elif self.config.problem_type == "single_label_classification":
+				loss_fct = CrossEntropyLoss()
+				loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+			elif self.config.problem_type == "multi_label_classification":
+				loss_fct = BCEWithLogitsLoss()
+				loss = loss_fct(logits, labels)
+		if not return_dict:
+			output = (logits,) + outputs[1:]
+			return ((loss,) + output) if loss is not None else output
+
+		return SequenceClassifierOutput(
+			loss=loss,
+			logits=logits,
+			hidden_states=outputs.hidden_states,
+			attentions=outputs.attentions,
+		)
--- a/autorag/nodes/passagereranker/tart/tart.py
+++ b/autorag/nodes/passagereranker/tart/tart.py
@@ -0,0 +1,139 @@
+from itertools import chain
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.nodes.passagereranker.tart.modeling_enc_t5 import (
+	EncT5ForSequenceClassification,
+)
+from autorag.nodes.passagereranker.tart.tokenization_enc_t5 import EncT5Tokenizer
+from autorag.utils.util import (
+	make_batch,
+	sort_by_scores,
+	flatten_apply,
+	select_top_k,
+	result_to_dataframe,
+	empty_cuda_cache,
+)
+
+
+class Tart(BasePassageReranker):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		super().__init__(project_dir)
+		try:
+			import torch
+		except ImportError:
+			raise ImportError(
+				"torch is not installed. Please install torch first to use TART reranker."
+			)
+		model_name = "facebook/tart-full-flan-t5-xl"
+		self.model = EncT5ForSequenceClassification.from_pretrained(model_name)
+		self.tokenizer = EncT5Tokenizer.from_pretrained(model_name)
+		self.device = "cuda" if torch.cuda.is_available() else "cpu"
+		self.model = self.model.to(self.device)
+
+	def __del__(self):
+		del self.model
+		del self.tokenizer
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		instruction = kwargs.pop("instruction", "Find passage to answer given question")
+		batch = kwargs.pop("batch", 64)
+		return self._pure(queries, contents, ids, top_k, instruction, batch)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		instruction: str = "Find passage to answer given question",
+		batch: int = 64,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using Tart.
+		TART is a reranker based on TART (https://github.com/facebookresearch/tart).
+		You can rerank the passages with the instruction using TARTReranker.
+		The default model is facebook/tart-full-flan-t5-xl.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param instruction: The instruction for reranking.
+			Note: default instruction is "Find passage to answer given question"
+				The default instruction from the TART paper is being used.
+				If you want to use a different instruction, you can change the instruction through this parameter
+		:param batch: The number of queries to be processed in a batch
+		:return: tuple of lists containing the reranked contents, ids, and scores
+		"""
+		nested_list = [
+			[["{} [SEP] {}".format(instruction, query)] for _ in contents]
+			for query, contents in zip(queries, contents_list)
+		]
+
+		rerank_scores = flatten_apply(
+			tart_run_model,
+			nested_list,
+			model=self.model,
+			batch_size=batch,
+			tokenizer=self.tokenizer,
+			device=self.device,
+			contents_list=contents_list,
+		)
+
+		df = pd.DataFrame(
+			{
+				"contents": contents_list,
+				"ids": ids_list,
+				"scores": rerank_scores,
+			}
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			sort_by_scores, axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+def tart_run_model(
+	input_texts, contents_list, model, batch_size: int, tokenizer, device
+):
+	try:
+		import torch
+		import torch.nn.functional as F
+	except ImportError:
+		raise ImportError(
+			"torch is not installed. Please install torch first to use TART reranker."
+		)
+	flattened_texts = list(chain.from_iterable(input_texts))
+	flattened_contents = list(chain.from_iterable(contents_list))
+	batch_input_texts = make_batch(flattened_texts, batch_size)
+	batch_contents_list = make_batch(flattened_contents, batch_size)
+	results = []
+	for batch_texts, batch_contents in zip(batch_input_texts, batch_contents_list):
+		feature = tokenizer(
+			batch_texts,
+			batch_contents,
+			padding=True,
+			truncation=True,
+			return_tensors="pt",
+		).to(device)
+		with torch.no_grad():
+			pred_scores = model(**feature).logits
+			normalized_scores = [
+				float(score[1]) for score in F.softmax(pred_scores, dim=1)
+			]
+		results.extend(normalized_scores)
+	return results
--- a/autorag/nodes/passagereranker/tart/tokenization_enc_t5.py
+++ b/autorag/nodes/passagereranker/tart/tokenization_enc_t5.py
@@ -0,0 +1,112 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from typing import Any, Dict, List, Optional
+
+from transformers import T5Tokenizer
+
+
+class EncT5Tokenizer(T5Tokenizer):
+	def __init__(
+		self,
+		vocab_file,
+		bos_token="<s>",
+		eos_token="</s>",
+		unk_token="<unk>",
+		pad_token="<pad>",
+		extra_ids=100,
+		additional_special_tokens=None,
+		sp_model_kwargs: Optional[Dict[str, Any]] = None,
+		**kwargs,
+	) -> None:
+		sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
+
+		super().__init__(
+			vocab_file=vocab_file,
+			bos_token=bos_token,
+			eos_token=eos_token,
+			unk_token=unk_token,
+			pad_token=pad_token,
+			extra_ids=extra_ids,
+			additional_special_tokens=additional_special_tokens,
+			sp_model_kwargs=sp_model_kwargs,
+			**kwargs,
+		)
+
+	def get_special_tokens_mask(
+		self,
+		token_ids_0: List[int],
+		token_ids_1: Optional[List[int]] = None,
+		already_has_special_tokens: bool = False,
+	) -> List[int]:
+		"""
+		Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
+		special tokens using the tokenizer `prepare_for_model` method.
+		Args:
+		    token_ids_0 (`List[int]`):
+		        List of IDs.
+		    token_ids_1 (`List[int]`, *optional*):
+		        Optional second list of IDs for sequence pairs.
+		    already_has_special_tokens (`bool`, *optional*, defaults to `False`):
+		        Whether or not the token list is already formatted with special tokens for the model.
+		Returns:
+		    `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+		"""
+		if already_has_special_tokens:
+			return super().get_special_tokens_mask(
+				token_ids_0=token_ids_0,
+				token_ids_1=token_ids_1,
+				already_has_special_tokens=True,
+			)
+
+		# normal case: some special tokens
+		if token_ids_1 is None:
+			return [1] + ([0] * len(token_ids_0)) + [1]
+		return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
+
+	def create_token_type_ids_from_sequences(
+		self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+	) -> List[int]:
+		"""
+		Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make
+		use of token type ids, therefore a list of zeros is returned.
+		Args:
+		    token_ids_0 (`List[int]`):
+		        List of IDs.
+		    token_ids_1 (`List[int]`, *optional*):
+		        Optional second list of IDs for sequence pairs.
+		Returns:
+		    `List[int]`: List of zeros.
+		"""
+		bos = [self.bos_token_id]
+		eos = [self.eos_token_id]
+
+		if token_ids_1 is None:
+			return len(bos + token_ids_0 + eos) * [0]
+		return len(bos + token_ids_0 + eos + token_ids_1 + eos) * [0]
+
+	def build_inputs_with_special_tokens(
+		self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+	) -> List[int]:
+		"""
+		Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
+		adding special tokens. A sequence has the following format:
+		- single sequence: `<s> X </s>`
+		- pair of sequences: `<s> A </s> B </s>`
+		Args:
+		    token_ids_0 (`List[int]`):
+		        List of IDs to which the special tokens will be added.
+		    token_ids_1 (`List[int]`, *optional*):
+		        Optional second list of IDs for sequence pairs.
+		Returns:
+		    `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
+		"""
+		if token_ids_1 is None:
+			return [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
+		else:
+			return (
+				[self.bos_token_id]
+				+ token_ids_0
+				+ [self.eos_token_id]
+				+ token_ids_1
+				+ [self.eos_token_id]
+			)
--- a/autorag/nodes/passagereranker/time_reranker.py
+++ b/autorag/nodes/passagereranker/time_reranker.py
@@ -0,0 +1,72 @@
+import os
+from datetime import datetime
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils import result_to_dataframe, fetch_contents
+
+
+class TimeReranker(BasePassageReranker):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		super().__init__(project_dir, *args, **kwargs)
+		self.corpus_df = pd.read_parquet(
+			os.path.join(project_dir, "data", "corpus.parquet"), engine="pyarrow"
+		)
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		_, contents, scores, ids = self.cast_to_run(previous_result)
+		metadatas = fetch_contents(self.corpus_df, ids, column_name="metadata")
+		times = [
+			[time["last_modified_datetime"] for time in time_list]
+			for time_list in metadatas
+		]
+		top_k = kwargs.pop("top_k")
+		return self._pure(contents, scores, ids, top_k, times)
+
+	def _pure(
+		self,
+		contents_list: List[List[str]],
+		scores_list: List[List[float]],
+		ids_list: List[List[str]],
+		top_k: int,
+		time_list: List[List[datetime]],
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank the passages based on merely the datetime of the passage.
+		It uses 'last_modified_datetime' key in the corpus metadata,
+		so the metadata should be in the format of {'last_modified_datetime': datetime.datetime} at the corpus data file.
+
+		:param contents_list: The list of lists of contents
+		:param scores_list: The list of lists of scores from the initial ranking
+		:param ids_list: The list of lists of ids
+		:param top_k: The number of passages to be retrieved after reranking
+		:param time_list: The metadata list of lists of datetime.datetime
+			It automatically extracts the 'last_modified_datetime' key from the metadata in the corpus data.
+		:return: The reranked contents, ids, and scores
+		"""
+
+		def sort_row(contents, scores, ids, time, top_k):
+			combined = list(zip(contents, scores, ids, time))
+			combined.sort(key=lambda x: x[3], reverse=True)
+			sorted_contents, sorted_scores, sorted_ids, _ = zip(*combined)
+			return (
+				list(sorted_contents)[:top_k],
+				list(sorted_scores)[:top_k],
+				list(sorted_ids)[:top_k],
+			)
+
+		reranked_contents, reranked_scores, reranked_ids = zip(
+			*map(
+				sort_row,
+				contents_list,
+				scores_list,
+				ids_list,
+				time_list,
+				[top_k] * len(contents_list),
+			)
+		)
+
+		return list(reranked_contents), list(reranked_ids), list(reranked_scores)
--- a/autorag/nodes/passagereranker/upr.py
+++ b/autorag/nodes/passagereranker/upr.py
@@ -0,0 +1,160 @@
+import logging
+from typing import List, Tuple
+
+import pandas as pd
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils import result_to_dataframe
+from autorag.utils.util import select_top_k, sort_by_scores, empty_cuda_cache
+
+logger = logging.getLogger("AutoRAG")
+
+
+class Upr(BasePassageReranker):
+	def __init__(
+		self,
+		project_dir: str,
+		use_bf16: bool = False,
+		prefix_prompt: str = "Passage: ",
+		suffix_prompt: str = "Please write a question based on this passage.",
+		*args,
+		**kwargs,
+	):
+		"""
+		Initialize the UPR reranker node.
+
+		:param project_dir: The project directory
+		:param use_bf16: Whether to use bfloat16 for the model. Default is False.
+		:param prefix_prompt: The prefix prompt for the language model that generates question for reranking.
+			Default is "Passage: ".
+			The prefix prompt serves as the initial context or instruction for the language model.
+			It sets the stage for what is expected in the output
+		:param suffix_prompt: The suffix prompt for the language model that generates question for reranking.
+			Default is "Please write a question based on this passage.".
+			The suffix prompt provides a cue or a closing instruction to the language model,
+				signaling how to conclude the generated text or what format to follow at the end.
+		:param kwargs: Extra arguments
+		"""
+		super().__init__(project_dir, *args, **kwargs)
+
+		self.scorer = UPRScorer(
+			suffix_prompt=suffix_prompt, prefix_prompt=prefix_prompt, use_bf16=use_bf16
+		)
+
+	def __del__(self):
+		del self.scorer
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, _, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		return self._pure(queries, contents, ids, top_k)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents based on their relevance to a query using UPR.
+		UPR is a reranker based on UPR (https://github.com/DevSinghSachan/unsupervised-passage-reranking).
+		The language model will make a question based on the passage and rerank the passages by the likelihood of the question.
+		The default model is t5-large.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+
+		:return: tuple of lists containing the reranked contents, ids, and scores
+		"""
+		df = pd.DataFrame(
+			{
+				"query": queries,
+				"contents": contents_list,
+				"ids": ids_list,
+			}
+		)
+
+		df["scores"] = df.apply(
+			lambda row: self.scorer.compute(
+				query=row["query"], contents=row["contents"]
+			),
+			axis=1,
+		)
+		df[["contents", "ids", "scores"]] = df.apply(
+			lambda x: sort_by_scores(x, reverse=False), axis=1, result_type="expand"
+		)
+		results = select_top_k(df, ["contents", "ids", "scores"], top_k)
+		return (
+			results["contents"].tolist(),
+			results["ids"].tolist(),
+			results["scores"].tolist(),
+		)
+
+
+class UPRScorer:
+	def __init__(self, suffix_prompt: str, prefix_prompt: str, use_bf16: bool = False):
+		try:
+			import torch
+			from transformers import T5Tokenizer, T5ForConditionalGeneration
+		except ImportError:
+			raise ImportError(
+				"torch is not installed. Please install torch to use UPRReranker."
+			)
+		model_name = "t5-large"
+		self.device = "cuda" if torch.cuda.is_available() else "cpu"
+		self.tokenizer = T5Tokenizer.from_pretrained(model_name)
+		self.model = T5ForConditionalGeneration.from_pretrained(
+			model_name, torch_dtype=torch.bfloat16 if use_bf16 else torch.float32
+		).to(self.device)
+		self.suffix_prompt = suffix_prompt
+		self.prefix_prompt = prefix_prompt
+
+	def compute(self, query: str, contents: List[str]) -> List[float]:
+		try:
+			import torch
+		except ImportError:
+			raise ImportError(
+				"torch is not installed. Please install torch to use UPRReranker."
+			)
+		query_token = self.tokenizer(
+			query, max_length=128, truncation=True, return_tensors="pt"
+		)
+		prompts = list(
+			map(
+				lambda content: f"{self.prefix_prompt} {content} {self.suffix_prompt}",
+				contents,
+			)
+		)
+		prompt_token_outputs = self.tokenizer(
+			prompts,
+			padding="longest",
+			max_length=512,
+			pad_to_multiple_of=8,
+			truncation=True,
+			return_tensors="pt",
+		)
+
+		query_input_ids = torch.repeat_interleave(
+			query_token["input_ids"], len(contents), dim=0
+		).to(self.device)
+
+		with torch.no_grad():
+			logits = self.model(
+				input_ids=prompt_token_outputs["input_ids"].to(self.device),
+				attention_mask=prompt_token_outputs["attention_mask"].to(self.device),
+				labels=query_input_ids,
+			).logits
+		log_softmax = torch.nn.functional.log_softmax(logits, dim=-1)
+		nll = -log_softmax.gather(2, query_input_ids.unsqueeze(2)).squeeze(2)
+		avg_nll = torch.sum(nll, dim=1)
+		return avg_nll.tolist()
+
+	def __del__(self):
+		del self.model
+		del self.tokenizer
+		empty_cuda_cache()
--- a/autorag/nodes/passagereranker/voyageai.py
+++ b/autorag/nodes/passagereranker/voyageai.py
@@ -0,0 +1,109 @@
+import os
+from typing import List, Tuple
+import pandas as pd
+import voyageai
+
+from autorag.nodes.passagereranker.base import BasePassageReranker
+from autorag.utils.util import result_to_dataframe, get_event_loop, process_batch
+
+
+class VoyageAIReranker(BasePassageReranker):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		super().__init__(project_dir)
+		api_key = kwargs.pop("api_key", None)
+		api_key = os.getenv("VOYAGE_API_KEY", None) if api_key is None else api_key
+		if api_key is None:
+			raise KeyError(
+				"Please set the API key for VoyageAI rerank in the environment variable VOYAGE_API_KEY "
+				"or directly set it on the config YAML file."
+			)
+
+		self.voyage_client = voyageai.AsyncClient(api_key=api_key)
+
+	def __del__(self):
+		del self.voyage_client
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries, contents, scores, ids = self.cast_to_run(previous_result)
+		top_k = kwargs.pop("top_k")
+		batch = kwargs.pop("batch", 8)
+		model = kwargs.pop("model", "rerank-2")
+		truncation = kwargs.pop("truncation", True)
+		return self._pure(queries, contents, ids, top_k, model, batch, truncation)
+
+	def _pure(
+		self,
+		queries: List[str],
+		contents_list: List[List[str]],
+		ids_list: List[List[str]],
+		top_k: int,
+		model: str = "rerank-2",
+		batch: int = 8,
+		truncation: bool = True,
+	) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
+		"""
+		Rerank a list of contents with VoyageAI rerank models.
+		You can get the API key from https://docs.voyageai.com/docs/api-key-and-installation and set it in the environment variable VOYAGE_API_KEY.
+
+		:param queries: The list of queries to use for reranking
+		:param contents_list: The list of lists of contents to rerank
+		:param ids_list: The list of lists of ids retrieved from the initial ranking
+		:param top_k: The number of passages to be retrieved
+		:param model: The model name for VoyageAI rerank.
+		    You can choose between "rerank-2" and "rerank-2-lite".
+		    Default is "rerank-2".
+		:param batch: The number of queries to be processed in a batch
+		:param truncation: Whether to truncate the input to satisfy the 'context length limit' on the query and the documents.
+		:return: Tuple of lists containing the reranked contents, ids, and scores
+		"""
+		tasks = [
+			voyageai_rerank_pure(
+				self.voyage_client, model, query, contents, ids, top_k, truncation
+			)
+			for query, contents, ids in zip(queries, contents_list, ids_list)
+		]
+		loop = get_event_loop()
+		results = loop.run_until_complete(process_batch(tasks, batch))
+
+		content_result, id_result, score_result = zip(*results)
+
+		return list(content_result), list(id_result), list(score_result)
+
+
+async def voyageai_rerank_pure(
+	voyage_client: voyageai.AsyncClient,
+	model: str,
+	query: str,
+	documents: List[str],
+	ids: List[str],
+	top_k: int,
+	truncation: bool = True,
+) -> Tuple[List[str], List[str], List[float]]:
+	"""
+	Rerank a list of contents with VoyageAI rerank models.
+
+	:param voyage_client: The Voyage Client to use for reranking
+	:param model: The model name for VoyageAI rerank
+	:param query: The query to use for reranking
+	:param documents: The list of contents to rerank
+	:param ids: The list of ids corresponding to the documents
+	:param top_k: The number of passages to be retrieved
+	:param truncation: Whether to truncate the input to satisfy the 'context length limit' on the query and the documents.
+	:return: Tuple of lists containing the reranked contents, ids, and scores
+	"""
+	rerank_results = await voyage_client.rerank(
+		model=model,
+		query=query,
+		documents=documents,
+		top_k=top_k,
+		truncation=truncation,
+	)
+	reranked_scores: List[float] = list(
+		map(lambda x: x.relevance_score, rerank_results.results)
+	)
+	indices = list(map(lambda x: x.index, rerank_results.results))
+	reranked_contents: List[str] = list(map(lambda i: documents[i], indices))
+	reranked_ids: List[str] = list(map(lambda i: ids[i], indices))
+	return reranked_contents, reranked_ids, reranked_scores
--- a/autorag/nodes/promptmaker/init.py
+++ b/autorag/nodes/promptmaker/init.py
@@ -0,0 +1,3 @@
+from .long_context_reorder import LongContextReorder
+from .window_replacement import WindowReplacement
+from .fstring import Fstring
--- a/autorag/nodes/promptmaker/base.py
+++ b/autorag/nodes/promptmaker/base.py
@@ -0,0 +1,34 @@
+import logging
+from abc import ABCMeta
+from pathlib import Path
+from typing import Union
+
+import pandas as pd
+
+from autorag.schema.base import BaseModule
+
+logger = logging.getLogger("AutoRAG")
+
+
+class BasePromptMaker(BaseModule, metaclass=ABCMeta):
+	def __init__(self, project_dir: Union[str, Path], *args, **kwargs):
+		logger.info(
+			f"Initialize prompt maker node - {self.__class__.__name__} module..."
+		)
+
+	def __del__(self):
+		logger.info(f"Prompt maker node - {self.__class__.__name__} module is deleted.")
+
+	def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
+		logger.info(f"Running prompt maker node - {self.__class__.__name__} module...")
+		# get query and retrieved contents from previous_result
+		assert (
+			"query" in previous_result.columns
+		), "previous_result must have query column."
+		assert (
+			"retrieved_contents" in previous_result.columns
+		), "previous_result must have retrieved_contents column."
+		query = previous_result["query"].tolist()
+		retrieved_contents = previous_result["retrieved_contents"].tolist()
+		prompt = kwargs.pop("prompt")
+		return query, retrieved_contents, prompt
--- a/autorag/nodes/promptmaker/fstring.py
+++ b/autorag/nodes/promptmaker/fstring.py
@@ -0,0 +1,49 @@
+from typing import List
+
+import pandas as pd
+
+from autorag.nodes.promptmaker.base import BasePromptMaker
+from autorag.utils import result_to_dataframe
+
+
+class Fstring(BasePromptMaker):
+	@result_to_dataframe(["prompts"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		query, retrieved_contents, prompt = self.cast_to_run(
+			previous_result, *args, **kwargs
+		)
+		return self._pure(prompt, query, retrieved_contents)
+
+	def _pure(
+		self, prompt: str, queries: List[str], retrieved_contents: List[List[str]]
+	) -> List[str]:
+		"""
+		Make a prompt using f-string from a query and retrieved_contents.
+		You must type a prompt or prompt list at a config YAML file like this:
+
+		.. Code:: yaml
+		nodes:
+		- node_type: prompt_maker
+		  modules:
+		  - module_type: fstring
+			prompt: [Answer this question: {query} \n\n {retrieved_contents},
+			Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
+
+		:param prompt: A prompt string.
+		:param queries: List of query strings.
+		:param retrieved_contents: List of retrieved contents.
+		:return: Prompts that are made by f-string.
+		"""
+
+		def fstring_row(
+			_prompt: str, _query: str, _retrieved_contents: List[str]
+		) -> str:
+			contents_str = "\n\n".join(_retrieved_contents)
+			return _prompt.format(query=_query, retrieved_contents=contents_str)
+
+		return list(
+			map(
+				lambda x: fstring_row(prompt, x[0], x[1]),
+				zip(queries, retrieved_contents),
+			)
+		)
--- a/autorag/nodes/promptmaker/long_context_reorder.py
+++ b/autorag/nodes/promptmaker/long_context_reorder.py
@@ -0,0 +1,83 @@
+import logging
+from typing import List
+
+import numpy as np
+import pandas as pd
+
+from autorag.nodes.promptmaker.base import BasePromptMaker
+from autorag.utils import result_to_dataframe
+
+logger = logging.getLogger("AutoRAG")
+
+
+class LongContextReorder(BasePromptMaker):
+	@result_to_dataframe(["prompts"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		query, retrieved_contents, prompt = self.cast_to_run(
+			previous_result, *args, **kwargs
+		)
+		assert (
+			"retrieve_scores" in previous_result.columns
+		), "previous_result must have retrieve_scores column."
+		retrieve_scores = previous_result["retrieve_scores"].tolist()
+		return self._pure(prompt, query, retrieved_contents, retrieve_scores)
+
+	def _pure(
+		self,
+		prompt: str,
+		queries: List[str],
+		retrieved_contents: List[List[str]],
+		retrieve_scores: List[List[float]],
+	) -> List[str]:
+		"""
+		Models struggle to access significant details found
+		in the center of extended contexts. A study
+		(https://arxiv.org/abs/2307.03172) observed that the best
+		performance typically arises when crucial data is positioned
+		at the start or conclusion of the input context. Additionally,
+		as the input context lengthens, performance drops notably, even
+		in models designed for long contexts."
+
+		.. Code:: yaml
+		nodes:
+		- node_type: prompt_maker
+		  modules:
+		  - module_type: long_context_reorder
+		    prompt: [Answer this question: {query} \n\n {retrieved_contents},
+		    Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
+
+		:param prompt: A prompt string.
+		:param queries: List of query strings.
+		:param retrieved_contents: List of retrieved contents.
+		:param retrieve_scores: List of `retrieve scores`.
+		:return: Prompts that are made by long context reorder.
+		"""
+
+		def long_context_reorder_row(
+			_prompt: str,
+			_query: str,
+			_retrieved_contents: List[str],
+			_retrieve_scores: List[float],
+		) -> str:
+			if isinstance(_retrieved_contents, np.ndarray):
+				_retrieved_contents = _retrieved_contents.tolist()
+			if not len(_retrieved_contents) == len(_retrieve_scores):
+				logger.info("If you use a summarizer, the reorder will not proceed.")
+				return _prompt.format(
+					query=_query, retrieved_contents="\n\n".join(_retrieved_contents)
+				)
+			content_scores = list(zip(_retrieved_contents, _retrieve_scores))
+			sorted_content_scores = sorted(
+				content_scores, key=lambda x: x[1], reverse=True
+			)
+			content_result, score_result = zip(*sorted_content_scores)
+			_retrieved_contents.append(content_result[0])
+			contents_str = "\n\n".join(_retrieved_contents)
+			return _prompt.format(query=_query, retrieved_contents=contents_str)
+
+		return list(
+			map(
+				lambda x: long_context_reorder_row(prompt, x[0], x[1], x[2]),
+				zip(queries, retrieved_contents, retrieve_scores),
+			)
+		)
--- a/autorag/nodes/promptmaker/run.py
+++ b/autorag/nodes/promptmaker/run.py
@@ -0,0 +1,280 @@
+import os
+import pathlib
+from copy import deepcopy
+from typing import List, Dict, Optional, Union
+
+import pandas as pd
+import tokenlog
+
+from autorag.evaluation import evaluate_generation
+from autorag.evaluation.util import cast_metrics
+from autorag.schema.metricinput import MetricInput
+from autorag.strategy import measure_speed, filter_by_threshold, select_best
+from autorag.support import get_support_modules
+from autorag.utils import validate_qa_dataset
+from autorag.utils.util import make_combinations, explode, split_dataframe
+
+
+def run_prompt_maker_node(
+	modules: List,
+	module_params: List[Dict],
+	previous_result: pd.DataFrame,
+	node_line_dir: str,
+	strategies: Dict,
+) -> pd.DataFrame:
+	"""
+	Run prompt maker node.
+	With this function, you can select the best prompt maker module.
+	As default, when you can use only one module, the evaluation will be skipped.
+	If you want to select the best prompt among modules, you can use strategies.
+	When you use them, you must pass 'generator_modules' and its parameters at strategies.
+	Because it uses generator modules and generator metrics for evaluation this module.
+	It is recommended to use one params and modules for evaluation,
+	but you can use multiple params and modules for evaluation.
+	When you don't set generator module at strategies, it will use the default generator module.
+	The default generator module is llama_index_llm with openai gpt-3.5-turbo model.
+
+	:param modules: Prompt maker module classes to run.
+	:param module_params: Prompt maker module parameters.
+	:param previous_result: Previous result dataframe.
+	    Could be query expansion's best result or qa data.
+	:param node_line_dir: This node line's directory.
+	:param strategies: Strategies for prompt maker node.
+	:return: The best result dataframe.
+	    It contains previous result columns and prompt maker's result columns which is 'prompts'.
+	"""
+	if not os.path.exists(node_line_dir):
+		os.makedirs(node_line_dir)
+	node_dir = os.path.join(node_line_dir, "prompt_maker")
+	if not os.path.exists(node_dir):
+		os.makedirs(node_dir)
+	project_dir = pathlib.PurePath(node_line_dir).parent.parent
+
+	# run modules
+	results, execution_times = zip(
+		*map(
+			lambda task: measure_speed(
+				task[0].run_evaluator,
+				project_dir=project_dir,
+				previous_result=previous_result,
+				**task[1],
+			),
+			zip(modules, module_params),
+		)
+	)
+	average_times = list(map(lambda x: x / len(results[0]), execution_times))
+
+	# get average token usage
+	token_usages = []
+	for i, result in enumerate(results):
+		token_logger = tokenlog.getLogger(
+			f"prompt_maker_{i}", strategies.get("tokenizer", "gpt2")
+		)
+		token_logger.query_batch(result["prompts"].tolist())
+		token_usages.append(token_logger.get_token_usage() / len(result))
+
+	# save results to folder
+	filepaths = list(
+		map(lambda x: os.path.join(node_dir, f"{x}.parquet"), range(len(modules)))
+	)
+	list(
+		map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
+	)  # execute save to parquet
+	filenames = list(map(lambda x: os.path.basename(x), filepaths))
+
+	# make summary file
+	summary_df = pd.DataFrame(
+		{
+			"filename": filenames,
+			"module_name": list(map(lambda module: module.__name__, modules)),
+			"module_params": module_params,
+			"execution_time": average_times,
+			"average_prompt_token": token_usages,
+		}
+	)
+
+	metric_names, metric_params = cast_metrics(strategies.get("metrics"))
+
+	# Run evaluation when there are more than one module.
+	if len(modules) > 1:
+		# pop general keys from strategies (e.g. metrics, speed_threshold)
+		general_key = ["metrics", "speed_threshold", "token_threshold", "tokenizer"]
+		general_strategy = dict(
+			filter(lambda x: x[0] in general_key, strategies.items())
+		)
+		extra_strategy = dict(
+			filter(lambda x: x[0] not in general_key, strategies.items())
+		)
+
+		# first, filter by threshold if it is enabled.
+		if general_strategy.get("speed_threshold") is not None:
+			results, filenames = filter_by_threshold(
+				results, average_times, general_strategy["speed_threshold"], filenames
+			)
+
+		# Calculate tokens and save to summary
+		if general_strategy.get("token_threshold") is not None:
+			results, filenames = filter_by_threshold(
+				results, token_usages, general_strategy["token_threshold"], filenames
+			)
+
+		# run metrics before filtering
+		if metric_names is None or len(metric_names) <= 0:
+			raise ValueError(
+				"You must at least one metrics for prompt maker evaluation."
+			)
+
+		# get generator modules from strategy
+		generator_callables, generator_params = make_generator_callable_params(
+			extra_strategy
+		)
+
+		# get generation_gt
+		qa_data = pd.read_parquet(
+			os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
+		)
+		validate_qa_dataset(qa_data)
+		generation_gt = qa_data["generation_gt"].tolist()
+		generation_gt = list(map(lambda x: x.tolist(), generation_gt))
+
+		metric_inputs = [MetricInput(generation_gt=gen_gt) for gen_gt in generation_gt]
+
+		all_prompts = []
+		for result in results:
+			all_prompts.extend(result["prompts"].tolist())
+
+		evaluation_result_all = evaluate_one_prompt_maker_node(
+			all_prompts,
+			generator_callables,
+			generator_params,
+			metric_inputs * len(results),
+			general_strategy["metrics"],
+			project_dir,
+			strategy_name=strategies.get("strategy", "mean"),
+		)
+		evaluation_results = split_dataframe(
+			evaluation_result_all, chunk_size=len(results[0])
+		)
+
+		evaluation_df = pd.DataFrame(
+			{
+				"filename": filenames,
+				**{
+					f"prompt_maker_{metric_name}": list(
+						map(lambda x: x[metric_name].mean(), evaluation_results)
+					)
+					for metric_name in metric_names
+				},
+			}
+		)
+		summary_df = pd.merge(
+			on="filename", left=summary_df, right=evaluation_df, how="left"
+		)
+
+		best_result, best_filename = select_best(
+			evaluation_results,
+			metric_names,
+			filenames,
+			strategies.get("strategy", "mean"),
+		)
+		# change metric name columns to prompt_maker_metric_name
+		best_result = best_result.rename(
+			columns={
+				metric_name: f"prompt_maker_{metric_name}"
+				for metric_name in metric_names
+			}
+		)
+		best_result = best_result.drop(columns=["generated_texts"])
+	else:
+		best_result, best_filename = results[0], filenames[0]
+
+	# add 'is_best' column at summary file
+	summary_df["is_best"] = summary_df["filename"] == best_filename
+
+	best_result = pd.concat([previous_result, best_result], axis=1)
+
+	# save files
+	summary_df.to_csv(os.path.join(node_dir, "summary.csv"), index=False)
+	best_result.to_parquet(
+		os.path.join(node_dir, f"best_{os.path.splitext(best_filename)[0]}.parquet"),
+		index=False,
+	)
+
+	return best_result
+
+
+def make_generator_callable_params(strategy_dict: Dict):
+	node_dict = deepcopy(strategy_dict)
+	generator_module_list: Optional[List[Dict]] = node_dict.pop(
+		"generator_modules", None
+	)
+	if generator_module_list is None:
+		generator_module_list = [
+			{
+				"module_type": "llama_index_llm",
+				"llm": "openai",
+				"model": "gpt-3.5-turbo",
+			}
+		]
+	node_params = node_dict
+	modules = list(
+		map(
+			lambda module_dict: get_support_modules(module_dict.pop("module_type")),
+			generator_module_list,
+		)
+	)
+	param_combinations = list(
+		map(
+			lambda module_dict: make_combinations({**module_dict, **node_params}),
+			generator_module_list,
+		)
+	)
+	return explode(modules, param_combinations)
+
+
+def evaluate_one_prompt_maker_node(
+	prompts: List[str],
+	generator_classes: List,
+	generator_params: List[Dict],
+	metric_inputs: List[MetricInput],
+	metrics: Union[List[str], List[Dict]],
+	project_dir,
+	strategy_name: str,
+) -> pd.DataFrame:
+	input_df = pd.DataFrame({"prompts": prompts})
+	generator_results = list(
+		map(
+			lambda x: x[0].run_evaluator(
+				project_dir=project_dir, previous_result=input_df, **x[1]
+			),
+			zip(generator_classes, generator_params),
+		)
+	)
+	evaluation_results = list(
+		map(
+			lambda x: evaluate_generator_result(x[0], metric_inputs, metrics),
+			zip(generator_results, generator_classes),
+		)
+	)
+	metric_names = (
+		list(map(lambda x: x["metric_name"], metrics))
+		if isinstance(metrics[0], dict)
+		else metrics
+	)
+	best_result, _ = select_best(
+		evaluation_results, metric_names, strategy_name=strategy_name
+	)
+	best_result = pd.concat([input_df, best_result], axis=1)
+	return best_result  # it has 'generated_texts' column
+
+
+def evaluate_generator_result(
+	result_df: pd.DataFrame,
+	metric_inputs: List[MetricInput],
+	metrics: Union[List[str], List[Dict]],
+) -> pd.DataFrame:
+	@evaluate_generation(metric_inputs=metric_inputs, metrics=metrics)
+	def evaluate(df):
+		return df["generated_texts"].tolist()
+
+	return evaluate(result_df)
--- a/autorag/nodes/promptmaker/window_replacement.py
+++ b/autorag/nodes/promptmaker/window_replacement.py
@@ -0,0 +1,85 @@
+import logging
+import os
+from typing import List, Dict
+
+import pandas as pd
+
+from autorag.nodes.promptmaker.base import BasePromptMaker
+from autorag.utils import result_to_dataframe, fetch_contents
+
+logger = logging.getLogger("AutoRAG")
+
+
+class WindowReplacement(BasePromptMaker):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		super().__init__(project_dir, *args, **kwargs)
+		# load corpus
+		data_dir = os.path.join(project_dir, "data")
+		self.corpus_data = pd.read_parquet(
+			os.path.join(data_dir, "corpus.parquet"), engine="pyarrow"
+		)
+
+	@result_to_dataframe(["prompts"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		query, retrieved_contents, prompt = self.cast_to_run(
+			previous_result, *args, **kwargs
+		)
+		retrieved_ids = previous_result["retrieved_ids"].tolist()
+		# get metadata from corpus
+		retrieved_metadata = fetch_contents(
+			self.corpus_data, retrieved_ids, column_name="metadata"
+		)
+		return self._pure(prompt, query, retrieved_contents, retrieved_metadata)
+
+	def _pure(
+		self,
+		prompt: str,
+		queries: List[str],
+		retrieved_contents: List[List[str]],
+		retrieved_metadata: List[List[Dict]],
+	) -> List[str]:
+		"""
+		Replace retrieved_contents with a window to create a Prompt
+		(only available for corpus chunked with Sentence window method)
+		You must type a prompt or prompt list at a config YAML file like this:
+
+		.. Code:: yaml
+		nodes:
+		- node_type: prompt_maker
+		  modules:
+		  - module_type: window_replacement
+		    prompt: [Answer this question: {query} \n\n {retrieved_contents},
+		    Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}]
+
+		:param prompt: A prompt string.
+		:param queries: List of query strings.
+		:param retrieved_contents: List of retrieved contents.
+		:param retrieved_metadata: List of retrieved metadata.
+		:return: Prompts that are made by window_replacement.
+		"""
+
+		def window_replacement_row(
+			_prompt: str,
+			_query: str,
+			_retrieved_contents,
+			_retrieved_metadata: List[Dict],
+		) -> str:
+			window_list = []
+			for content, metadata in zip(_retrieved_contents, _retrieved_metadata):
+				if "window" in metadata:
+					window_list.append(metadata["window"])
+				else:
+					window_list.append(content)
+					logger.info(
+						"Only available for corpus chunked with Sentence window method."
+						"window_replacement will not proceed."
+					)
+			contents_str = "\n\n".join(window_list)
+			return _prompt.format(query=_query, retrieved_contents=contents_str)
+
+		return list(
+			map(
+				lambda x: window_replacement_row(prompt, x[0], x[1], x[2]),
+				zip(queries, retrieved_contents, retrieved_metadata),
+			)
+		)
--- a/autorag/nodes/queryexpansion/init.py
+++ b/autorag/nodes/queryexpansion/init.py
@@ -0,0 +1,4 @@
+from .hyde import HyDE
+from .multi_query_expansion import MultiQueryExpansion
+from .pass_query_expansion import PassQueryExpansion
+from .query_decompose import QueryDecompose
--- a/autorag/nodes/queryexpansion/base.py
+++ b/autorag/nodes/queryexpansion/base.py
@@ -0,0 +1,62 @@
+import abc
+import logging
+from pathlib import Path
+from typing import List, Union
+
+import pandas as pd
+
+from autorag.nodes.util import make_generator_callable_param
+from autorag.schema import BaseModule
+from autorag.utils import validate_qa_dataset
+
+logger = logging.getLogger("AutoRAG")
+
+
+class BaseQueryExpansion(BaseModule, metaclass=abc.ABCMeta):
+	def __init__(self, project_dir: Union[str, Path], *args, **kwargs):
+		logger.info(
+			f"Initialize query expansion node - {self.__class__.__name__} module..."
+		)
+		# set generator module for query expansion
+		generator_class, generator_param = make_generator_callable_param(kwargs)
+		self.generator = generator_class(project_dir, **generator_param)
+
+	def __del__(self):
+		del self.generator
+		logger.info(
+			f"Delete query expansion node - {self.__class__.__name__} module..."
+		)
+
+	def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
+		logger.info(
+			f"Running query expansion node - {self.__class__.__name__} module..."
+		)
+		validate_qa_dataset(previous_result)
+
+		# find queries columns
+		assert (
+			"query" in previous_result.columns
+		), "previous_result must have query column."
+		queries = previous_result["query"].tolist()
+		return queries
+
+	@staticmethod
+	def _check_expanded_query(queries: List[str], expanded_queries: List[List[str]]):
+		return list(
+			map(
+				lambda query, expanded_query_list: check_expanded_query(
+					query, expanded_query_list
+				),
+				queries,
+				expanded_queries,
+			)
+		)
+
+
+def check_expanded_query(query: str, expanded_query_list: List[str]):
+	# check if the expanded query is the same as the original query
+	expanded_query_list = list(map(lambda x: x.strip(), expanded_query_list))
+	return [
+		expanded_query if expanded_query else query
+		for expanded_query in expanded_query_list
+	]
--- a/autorag/nodes/queryexpansion/hyde.py
+++ b/autorag/nodes/queryexpansion/hyde.py
@@ -0,0 +1,43 @@
+from typing import List
+
+import pandas as pd
+
+from autorag.nodes.queryexpansion.base import BaseQueryExpansion
+from autorag.utils import result_to_dataframe
+
+hyde_prompt = "Please write a passage to answer the question"
+
+
+class HyDE(BaseQueryExpansion):
+	@result_to_dataframe(["queries"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries = self.cast_to_run(previous_result, *args, **kwargs)
+
+		# pop prompt from kwargs
+		prompt = kwargs.pop("prompt", hyde_prompt)
+		kwargs.pop("generator_module_type", None)
+
+		expanded_queries = self._pure(queries, prompt, **kwargs)
+		return self._check_expanded_query(queries, expanded_queries)
+
+	def _pure(self, queries: List[str], prompt: str = hyde_prompt, **generator_params):
+		"""
+		HyDE, which inspired by "Precise Zero-shot Dense Retrieval without Relevance Labels" (https://arxiv.org/pdf/2212.10496.pdf)
+		LLM model creates a hypothetical passage.
+		And then, retrieve passages using hypothetical passage as a query.
+		:param queries: List[str], queries to retrieve.
+		:param prompt: Prompt to use when generating hypothetical passage
+		:return: List[List[str]], List of hyde results.
+		"""
+		full_prompts = list(
+			map(
+				lambda x: (prompt if not bool(prompt) else hyde_prompt)
+				+ f"\nQuestion: {x}\nPassage:",
+				queries,
+			)
+		)
+		input_df = pd.DataFrame({"prompts": full_prompts})
+		result_df = self.generator.pure(previous_result=input_df, **generator_params)
+		answers = result_df["generated_texts"].tolist()
+		results = list(map(lambda x: [x], answers))
+		return results
--- a/autorag/nodes/queryexpansion/multi_query_expansion.py
+++ b/autorag/nodes/queryexpansion/multi_query_expansion.py
@@ -0,0 +1,57 @@
+from typing import List
+
+import pandas as pd
+
+from autorag.nodes.queryexpansion.base import BaseQueryExpansion
+from autorag.utils import result_to_dataframe
+
+multi_query_expansion_prompt = """You are an AI language model assistant.
+    Your task is to generate 3 different versions of the given user
+    question to retrieve relevant documents from a vector  database.
+    By generating multiple perspectives on the user question,
+    your goal is to help the user overcome some of the limitations
+    of distance-based similarity search. Provide these alternative
+    questions separated by newlines. Original question: {query}"""
+
+
+class MultiQueryExpansion(BaseQueryExpansion):
+	@result_to_dataframe(["queries"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries = self.cast_to_run(previous_result, *args, **kwargs)
+
+		# pop prompt from kwargs
+		prompt = kwargs.pop("prompt", multi_query_expansion_prompt)
+		kwargs.pop("generator_module_type", None)
+
+		expanded_queries = self._pure(queries, prompt, **kwargs)
+		return self._check_expanded_query(queries, expanded_queries)
+
+	def _pure(
+		self, queries, prompt: str = multi_query_expansion_prompt, **kwargs
+	) -> List[List[str]]:
+		"""
+		Expand a list of queries using a multi-query expansion approach.
+		LLM model generate 3 different versions queries for each input query.
+
+		:param queries: List[str], queries to decompose.
+		:param prompt: str, prompt to use for multi-query expansion.
+			default prompt comes from langchain MultiQueryRetriever default query prompt.
+		:return: List[List[str]], list of expansion query.
+		"""
+		full_prompts = list(map(lambda x: prompt.format(query=x), queries))
+		input_df = pd.DataFrame({"prompts": full_prompts})
+		result_df = self.generator.pure(previous_result=input_df, **kwargs)
+		answers = result_df["generated_texts"].tolist()
+		results = list(
+			map(lambda x: get_multi_query_expansion(x[0], x[1]), zip(queries, answers))
+		)
+		return results
+
+
+def get_multi_query_expansion(query: str, answer: str) -> List[str]:
+	try:
+		queries = answer.split("\n")
+		queries.insert(0, query)
+		return queries
+	except:
+		return [query]
--- a/autorag/nodes/queryexpansion/pass_query_expansion.py
+++ b/autorag/nodes/queryexpansion/pass_query_expansion.py
@@ -0,0 +1,22 @@
+import pandas as pd
+
+from autorag.nodes.queryexpansion.base import BaseQueryExpansion
+from autorag.utils import result_to_dataframe
+
+
+class PassQueryExpansion(BaseQueryExpansion):
+	@result_to_dataframe(["queries"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		"""
+		Do not perform query expansion.
+		Return with the same queries.
+		The dimension will be 2-d list, and the column name will be 'queries'.
+		"""
+		assert (
+			"query" in previous_result.columns
+		), "previous_result must have query column."
+		queries = previous_result["query"].tolist()
+		return list(map(lambda x: [x], queries))
+
+	def _pure(self, *args, **kwargs):
+		pass
--- a/autorag/nodes/queryexpansion/query_decompose.py
+++ b/autorag/nodes/queryexpansion/query_decompose.py
@@ -0,0 +1,111 @@
+from typing import List
+
+import pandas as pd
+
+from autorag.nodes.queryexpansion.base import BaseQueryExpansion
+from autorag.utils import result_to_dataframe
+
+decompose_prompt = """Decompose a question in self-contained sub-questions. Use \"The question needs no decomposition\" when no decomposition is needed.
+
+    Example 1:
+
+    Question: Is Hamlet more common on IMDB than Comedy of Errors?
+    Decompositions:
+    1: How many listings of Hamlet are there on IMDB?
+    2: How many listing of Comedy of Errors is there on IMDB?
+
+    Example 2:
+
+    Question: Are birds important to badminton?
+
+    Decompositions:
+    The question needs no decomposition
+
+    Example 3:
+
+    Question: Is it legal for a licensed child driving Mercedes-Benz to be employed in US?
+
+    Decompositions:
+    1: What is the minimum driving age in the US?
+    2: What is the minimum age for someone to be employed in the US?
+
+    Example 4:
+
+    Question: Are all cucumbers the same texture?
+
+    Decompositions:
+    The question needs no decomposition
+
+    Example 5:
+
+    Question: Hydrogen's atomic number squared exceeds number of Spice Girls?
+
+    Decompositions:
+    1: What is the atomic number of hydrogen?
+    2: How many Spice Girls are there?
+
+    Example 6:
+
+    Question: {question}
+
+    Decompositions:
+    """
+
+
+class QueryDecompose(BaseQueryExpansion):
+	@result_to_dataframe(["queries"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries = self.cast_to_run(previous_result, *args, **kwargs)
+
+		# pop prompt from kwargs
+		prompt = kwargs.pop("prompt", decompose_prompt)
+		kwargs.pop("generator_module_type", None)
+
+		expanded_queries = self._pure(queries, prompt, **kwargs)
+		return self._check_expanded_query(queries, expanded_queries)
+
+	def _pure(
+		self, queries: List[str], prompt: str = decompose_prompt, *args, **kwargs
+	) -> List[List[str]]:
+		"""
+		decompose query to little piece of questions.
+		:param queries: List[str], queries to decompose.
+		:param prompt: str, prompt to use for query decomposition.
+			default prompt comes from Visconde's StrategyQA few-shot prompt.
+		:return: List[List[str]], list of decomposed query. Return input query if query is not decomposable.
+		"""
+		full_prompts = []
+		for query in queries:
+			if bool(prompt):
+				full_prompt = f"prompt: {prompt}\n\n question: {query}"
+			else:
+				full_prompt = decompose_prompt.format(question=query)
+			full_prompts.append(full_prompt)
+		input_df = pd.DataFrame({"prompts": full_prompts})
+		result_df = self.generator.pure(previous_result=input_df, *args, **kwargs)
+		answers = result_df["generated_texts"].tolist()
+		results = list(
+			map(lambda x: get_query_decompose(x[0], x[1]), zip(queries, answers))
+		)
+		return results
+
+
+def get_query_decompose(query: str, answer: str) -> List[str]:
+	"""
+	decompose query to little piece of questions.
+	:param query: str, query to decompose.
+	:param answer: str, answer from query_decompose function.
+	:return: List[str], list of a decomposed query. Return input query if query is not decomposable.
+	"""
+	if answer.lower() == "the question needs no decomposition":
+		return [query]
+	try:
+		lines = [line.strip() for line in answer.splitlines() if line.strip()]
+		if lines[0].startswith("Decompositions:"):
+			lines.pop(0)
+		questions = [line.split(":", 1)[1].strip() for line in lines if ":" in line]
+		if not questions:
+			return [query]
+		return questions
+	except:
+		return [query]
--- a/autorag/nodes/queryexpansion/run.py
+++ b/autorag/nodes/queryexpansion/run.py
@@ -0,0 +1,276 @@
+import logging
+import os
+import pathlib
+from copy import deepcopy
+from typing import List, Dict, Optional
+
+import pandas as pd
+
+from autorag.nodes.retrieval.run import evaluate_retrieval_node
+from autorag.schema.metricinput import MetricInput
+from autorag.strategy import measure_speed, filter_by_threshold, select_best
+from autorag.support import get_support_modules
+from autorag.utils.util import make_combinations, explode
+
+logger = logging.getLogger("AutoRAG")
+
+
+def run_query_expansion_node(
+	modules: List,
+	module_params: List[Dict],
+	previous_result: pd.DataFrame,
+	node_line_dir: str,
+	strategies: Dict,
+) -> pd.DataFrame:
+	"""
+	Run evaluation and select the best module among query expansion node results.
+	Initially, retrieval is run using expanded_queries, the result of the query_expansion module.
+	The retrieval module is run as a combination of the retrieval_modules in strategies.
+	If there are multiple retrieval_modules, run them all and choose the best result.
+	If there are no retrieval_modules, run them with the default of bm25.
+	In this way, the best result is selected for each module, and then the best result is selected.
+
+	:param modules: Query expansion modules to run.
+	:param module_params: Query expansion module parameters.
+	:param previous_result: Previous result dataframe.
+	    In this case, it would be qa data.
+	:param node_line_dir: This node line's directory.
+	:param strategies: Strategies for query expansion node.
+	:return: The best result dataframe.
+	"""
+	if not os.path.exists(node_line_dir):
+		os.makedirs(node_line_dir)
+	node_dir = os.path.join(node_line_dir, "query_expansion")
+	if not os.path.exists(node_dir):
+		os.makedirs(node_dir)
+	project_dir = pathlib.PurePath(node_line_dir).parent.parent
+
+	# run query expansion
+	results, execution_times = zip(
+		*map(
+			lambda task: measure_speed(
+				task[0].run_evaluator,
+				project_dir=project_dir,
+				previous_result=previous_result,
+				**task[1],
+			),
+			zip(modules, module_params),
+		)
+	)
+	average_times = list(map(lambda x: x / len(results[0]), execution_times))
+
+	# save results to folder
+	pseudo_module_params = deepcopy(module_params)
+	for i, module_param in enumerate(pseudo_module_params):
+		if "prompt" in module_params:
+			module_param["prompt"] = str(i)
+	filepaths = list(
+		map(lambda x: os.path.join(node_dir, f"{x}.parquet"), range(len(modules)))
+	)
+	list(
+		map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))
+	)  # execute save to parquet
+	filenames = list(map(lambda x: os.path.basename(x), filepaths))
+
+	# make summary file
+	summary_df = pd.DataFrame(
+		{
+			"filename": filenames,
+			"module_name": list(map(lambda module: module.__name__, modules)),
+			"module_params": module_params,
+			"execution_time": average_times,
+		}
+	)
+
+	# Run evaluation when there are more than one module.
+	if len(modules) > 1:
+		# pop general keys from strategies (e.g. metrics, speed_threshold)
+		general_key = ["metrics", "speed_threshold", "strategy"]
+		general_strategy = dict(
+			filter(lambda x: x[0] in general_key, strategies.items())
+		)
+		extra_strategy = dict(
+			filter(lambda x: x[0] not in general_key, strategies.items())
+		)
+
+		# first, filter by threshold if it is enabled.
+		if general_strategy.get("speed_threshold") is not None:
+			results, filenames = filter_by_threshold(
+				results, average_times, general_strategy["speed_threshold"], filenames
+			)
+
+		# check metrics in strategy
+		if general_strategy.get("metrics") is None:
+			raise ValueError(
+				"You must at least one metrics for query expansion evaluation."
+			)
+
+		if extra_strategy.get("top_k") is None:
+			extra_strategy["top_k"] = 10  # default value
+
+		# get retrieval modules from strategy
+		retrieval_callables, retrieval_params = make_retrieval_callable_params(
+			extra_strategy
+		)
+
+		# get retrieval_gt
+		retrieval_gt = pd.read_parquet(
+			os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
+		)["retrieval_gt"].tolist()
+
+		# make rows to metric_inputs
+		metric_inputs = [
+			MetricInput(retrieval_gt=ret_gt, query=query, generation_gt=gen_gt)
+			for ret_gt, query, gen_gt in zip(
+				retrieval_gt,
+				previous_result["query"].tolist(),
+				previous_result["generation_gt"].tolist(),
+			)
+		]
+
+		# run evaluation
+		evaluation_results = list(
+			map(
+				lambda result: evaluate_one_query_expansion_node(
+					retrieval_callables,
+					retrieval_params,
+					[
+						setattr(metric_input, "queries", queries) or metric_input
+						for metric_input, queries in zip(
+							metric_inputs, result["queries"].to_list()
+						)
+					],
+					general_strategy["metrics"],
+					project_dir,
+					previous_result,
+					general_strategy.get("strategy", "mean"),
+				),
+				results,
+			)
+		)
+
+		evaluation_df = pd.DataFrame(
+			{
+				"filename": filenames,
+				**{
+					f"query_expansion_{metric_name}": list(
+						map(lambda x: x[metric_name].mean(), evaluation_results)
+					)
+					for metric_name in general_strategy["metrics"]
+				},
+			}
+		)
+		summary_df = pd.merge(
+			on="filename", left=summary_df, right=evaluation_df, how="left"
+		)
+
+		best_result, best_filename = select_best(
+			evaluation_results,
+			general_strategy["metrics"],
+			filenames,
+			strategies.get("strategy", "mean"),
+		)
+		# change metric name columns to query_expansion_metric_name
+		best_result = best_result.rename(
+			columns={
+				metric_name: f"query_expansion_{metric_name}"
+				for metric_name in strategies["metrics"]
+			}
+		)
+		best_result = best_result.drop(
+			columns=["retrieved_contents", "retrieved_ids", "retrieve_scores"]
+		)
+	else:
+		best_result, best_filename = results[0], filenames[0]
+		best_result = pd.concat([previous_result, best_result], axis=1)
+
+	# add 'is_best' column at summary file
+	summary_df["is_best"] = summary_df["filename"] == best_filename
+
+	# save files
+	summary_df.to_csv(os.path.join(node_dir, "summary.csv"), index=False)
+	best_result.to_parquet(
+		os.path.join(node_dir, f"best_{os.path.splitext(best_filename)[0]}.parquet"),
+		index=False,
+	)
+
+	return best_result
+
+
+def evaluate_one_query_expansion_node(
+	retrieval_funcs: List,
+	retrieval_params: List[Dict],
+	metric_inputs: List[MetricInput],
+	metrics: List[str],
+	project_dir,
+	previous_result: pd.DataFrame,
+	strategy_name: str,
+) -> pd.DataFrame:
+	previous_result["queries"] = [
+		metric_input.queries for metric_input in metric_inputs
+	]
+	retrieval_results = list(
+		map(
+			lambda x: x[0].run_evaluator(
+				project_dir=project_dir, previous_result=previous_result, **x[1]
+			),
+			zip(retrieval_funcs, retrieval_params),
+		)
+	)
+	evaluation_results = list(
+		map(
+			lambda x: evaluate_retrieval_node(
+				x,
+				metric_inputs,
+				metrics,
+			),
+			retrieval_results,
+		)
+	)
+	best_result, _ = select_best(
+		evaluation_results, metrics, strategy_name=strategy_name
+	)
+	best_result = pd.concat([previous_result, best_result], axis=1)
+	return best_result
+
+
+def make_retrieval_callable_params(strategy_dict: Dict):
+	"""
+	strategy_dict looks like this:
+
+	.. Code:: json
+
+	    {
+	        "metrics": ["retrieval_f1", "retrieval_recall"],
+	        "top_k": 50,
+	        "retrieval_modules": [
+	          {"module_type": "bm25"},
+	          {"module_type": "vectordb", "embedding_model": ["openai", "huggingface"]}
+	        ]
+	      }
+
+	"""
+	node_dict = deepcopy(strategy_dict)
+	retrieval_module_list: Optional[List[Dict]] = node_dict.pop(
+		"retrieval_modules", None
+	)
+	if retrieval_module_list is None:
+		retrieval_module_list = [
+			{
+				"module_type": "bm25",
+			}
+		]
+	node_params = node_dict
+	modules = list(
+		map(
+			lambda module_dict: get_support_modules(module_dict.pop("module_type")),
+			retrieval_module_list,
+		)
+	)
+	param_combinations = list(
+		map(
+			lambda module_dict: make_combinations({**module_dict, **node_params}),
+			retrieval_module_list,
+		)
+	)
+	return explode(modules, param_combinations)
--- a/autorag/nodes/retrieval/init.py
+++ b/autorag/nodes/retrieval/init.py
@@ -0,0 +1,4 @@
+from .bm25 import BM25
+from .hybrid_cc import HybridCC
+from .hybrid_rrf import HybridRRF
+from .vectordb import VectorDB
--- a/autorag/nodes/retrieval/base.py
+++ b/autorag/nodes/retrieval/base.py
@@ -0,0 +1,127 @@
+import abc
+import logging
+import os
+from typing import List, Union, Tuple
+
+import pandas as pd
+
+from autorag.schema import BaseModule
+from autorag.support import get_support_modules
+from autorag.utils import fetch_contents, result_to_dataframe, validate_qa_dataset
+from autorag.utils.util import pop_params
+
+logger = logging.getLogger("AutoRAG")
+
+
+class BaseRetrieval(BaseModule, metaclass=abc.ABCMeta):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		logger.info(f"Initialize retrieval node - {self.__class__.__name__}")
+
+		self.resources_dir = os.path.join(project_dir, "resources")
+		data_dir = os.path.join(project_dir, "data")
+		# fetch data from corpus_data
+		self.corpus_df = pd.read_parquet(
+			os.path.join(data_dir, "corpus.parquet"), engine="pyarrow"
+		)
+
+	def __del__(self):
+		logger.info(f"Deleting retrieval node - {self.__class__.__name__} module...")
+
+	def cast_to_run(self, previous_result: pd.DataFrame, *args, **kwargs):
+		logger.info(f"Running retrieval node - {self.__class__.__name__} module...")
+		validate_qa_dataset(previous_result)
+		# find queries columns & type cast queries
+		assert (
+			"query" in previous_result.columns
+		), "previous_result must have query column."
+		if "queries" not in previous_result.columns:
+			previous_result["queries"] = previous_result["query"]
+		previous_result.loc[:, "queries"] = previous_result["queries"].apply(
+			cast_queries
+		)
+		queries = previous_result["queries"].tolist()
+		return queries
+
+
+class HybridRetrieval(BaseRetrieval, metaclass=abc.ABCMeta):
+	def __init__(
+		self, project_dir: str, target_modules, target_module_params, *args, **kwargs
+	):
+		super().__init__(project_dir)
+		self.target_modules = list(
+			map(
+				lambda x, y: get_support_modules(x)(
+					**y,
+					project_dir=project_dir,
+				),
+				target_modules,
+				target_module_params,
+			)
+		)
+		self.target_module_params = target_module_params
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		result_dfs: List[pd.DataFrame] = list(
+			map(
+				lambda x, y: x.pure(
+					**y,
+					previous_result=previous_result,
+				),
+				self.target_modules,
+				self.target_module_params,
+			)
+		)
+		ids = tuple(
+			map(lambda df: df["retrieved_ids"].apply(list).tolist(), result_dfs)
+		)
+		scores = tuple(
+			map(
+				lambda df: df["retrieve_scores"].apply(list).tolist(),
+				result_dfs,
+			)
+		)
+
+		_pure_params = pop_params(self._pure, kwargs)
+		if "ids" in _pure_params or "scores" in _pure_params:
+			raise ValueError(
+				"With specifying ids or scores, you must use HybridRRF.run_evaluator instead."
+			)
+		ids, scores = self._pure(ids=ids, scores=scores, **_pure_params)
+		contents = fetch_contents(self.corpus_df, ids)
+		return contents, ids, scores
+
+
+def cast_queries(queries: Union[str, List[str]]) -> List[str]:
+	if isinstance(queries, str):
+		return [queries]
+	elif isinstance(queries, List):
+		return queries
+	else:
+		raise ValueError(f"queries must be str or list, but got {type(queries)}")
+
+
+def evenly_distribute_passages(
+	ids: List[List[str]], scores: List[List[float]], top_k: int
+) -> Tuple[List[str], List[float]]:
+	assert len(ids) == len(scores), "ids and scores must have same length."
+	query_cnt = len(ids)
+	avg_len = top_k // query_cnt
+	remainder = top_k % query_cnt
+
+	new_ids = []
+	new_scores = []
+	for i in range(query_cnt):
+		if i < remainder:
+			new_ids.extend(ids[i][: avg_len + 1])
+			new_scores.extend(scores[i][: avg_len + 1])
+		else:
+			new_ids.extend(ids[i][:avg_len])
+			new_scores.extend(scores[i][:avg_len])
+
+	return new_ids, new_scores
+
+
+def get_bm25_pkl_name(bm25_tokenizer: str):
+	bm25_tokenizer = bm25_tokenizer.replace("/", "")
+	return f"bm25_{bm25_tokenizer}.pkl"
--- a/autorag/nodes/retrieval/bm25.py
+++ b/autorag/nodes/retrieval/bm25.py
@@ -0,0 +1,365 @@
+import asyncio
+import os
+import pickle
+import re
+from typing import List, Dict, Tuple, Callable, Union, Iterable, Optional
+
+import numpy as np
+import pandas as pd
+from llama_index.core.indices.keyword_table.utils import simple_extract_keywords
+from nltk import PorterStemmer
+from rank_bm25 import BM25Okapi
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
+
+from autorag.nodes.retrieval.base import (
+	evenly_distribute_passages,
+	BaseRetrieval,
+	get_bm25_pkl_name,
+)
+from autorag.utils import validate_corpus_dataset, fetch_contents
+from autorag.utils.util import (
+	get_event_loop,
+	normalize_string,
+	result_to_dataframe,
+	pop_params,
+)
+
+
+def tokenize_ko_kiwi(texts: List[str]) -> List[List[str]]:
+	try:
+		from kiwipiepy import Kiwi, Token
+	except ImportError:
+		raise ImportError(
+			"You need to install kiwipiepy to use 'ko_kiwi' tokenizer. "
+			"Please install kiwipiepy by running 'pip install kiwipiepy'. "
+			"Or install Korean version of AutoRAG by running 'pip install AutoRAG[ko]'."
+		)
+	texts = list(map(lambda x: x.strip().lower(), texts))
+	kiwi = Kiwi()
+	tokenized_list: Iterable[List[Token]] = kiwi.tokenize(texts)
+	return [list(map(lambda x: x.form, token_list)) for token_list in tokenized_list]
+
+
+def tokenize_ko_kkma(texts: List[str]) -> List[List[str]]:
+	try:
+		from konlpy.tag import Kkma
+	except ImportError:
+		raise ImportError(
+			"You need to install konlpy to use 'ko_kkma' tokenizer. "
+			"Please install konlpy by running 'pip install konlpy'. "
+			"Or install Korean version of AutoRAG by running 'pip install AutoRAG[ko]'."
+		)
+	tokenizer = Kkma()
+	tokenized_list: List[List[str]] = list(map(lambda x: tokenizer.morphs(x), texts))
+	return tokenized_list
+
+
+def tokenize_ko_okt(texts: List[str]) -> List[List[str]]:
+	try:
+		from konlpy.tag import Okt
+	except ImportError:
+		raise ImportError(
+			"You need to install konlpy to use 'ko_kkma' tokenizer. "
+			"Please install konlpy by running 'pip install konlpy'. "
+			"Or install Korean version of AutoRAG by running 'pip install AutoRAG[ko]'."
+		)
+	tokenizer = Okt()
+	tokenized_list: List[List[str]] = list(map(lambda x: tokenizer.morphs(x), texts))
+	return tokenized_list
+
+
+def tokenize_porter_stemmer(texts: List[str]) -> List[List[str]]:
+	def tokenize_remove_stopword(text: str, stemmer) -> List[str]:
+		text = text.lower()
+		words = list(simple_extract_keywords(text))
+		return [stemmer.stem(word) for word in words]
+
+	stemmer = PorterStemmer()
+	tokenized_list: List[List[str]] = list(
+		map(lambda x: tokenize_remove_stopword(x, stemmer), texts)
+	)
+	return tokenized_list
+
+
+def tokenize_space(texts: List[str]) -> List[List[str]]:
+	def tokenize_space_text(text: str) -> List[str]:
+		text = normalize_string(text)
+		return re.split(r"\s+", text.strip())
+
+	return list(map(tokenize_space_text, texts))
+
+
+def load_bm25_corpus(bm25_path: str) -> Dict:
+	if bm25_path is None:
+		return {}
+	with open(bm25_path, "rb") as f:
+		bm25_corpus = pickle.load(f)
+	return bm25_corpus
+
+
+def tokenize_ja_sudachipy(texts: List[str]) -> List[List[str]]:
+	try:
+		from sudachipy import dictionary, tokenizer
+	except ImportError:
+		raise ImportError(
+			"You need to install SudachiPy to use 'sudachipy' tokenizer. "
+			"Please install SudachiPy by running 'pip install sudachipy'."
+		)
+
+	# Initialize SudachiPy with the default tokenizer
+	tokenizer_obj = dictionary.Dictionary(dict="core").create()
+
+	# Choose the tokenizer mode: NORMAL, SEARCH, A
+	mode = tokenizer.Tokenizer.SplitMode.A
+
+	# Tokenize the input texts
+	tokenized_list = []
+	for text in texts:
+		tokens = tokenizer_obj.tokenize(text, mode)
+		tokenized_list.append([token.surface() for token in tokens])
+
+	return tokenized_list
+
+
+BM25_TOKENIZER = {
+	"porter_stemmer": tokenize_porter_stemmer,
+	"ko_kiwi": tokenize_ko_kiwi,
+	"space": tokenize_space,
+	"ko_kkma": tokenize_ko_kkma,
+	"ko_okt": tokenize_ko_okt,
+	"sudachipy": tokenize_ja_sudachipy,
+}
+
+
+class BM25(BaseRetrieval):
+	def __init__(self, project_dir: str, *args, **kwargs):
+		"""
+		Initialize BM25 module.
+		(Retrieval)
+
+		:param project_dir: The project directory path.
+		:param bm25_tokenizer: The tokenizer name that is used to the BM25.
+		    It supports 'porter_stemmer', 'ko_kiwi', and huggingface `AutoTokenizer`.
+		    You can pass huggingface tokenizer name.
+		    Default is porter_stemmer.
+		:param kwargs: The optional arguments.
+		"""
+
+		super().__init__(project_dir)
+		# check if bm25_path and file exist
+		bm25_tokenizer = kwargs.get("bm25_tokenizer", None)
+		if bm25_tokenizer is None:
+			bm25_tokenizer = "porter_stemmer"
+		bm25_path = os.path.join(self.resources_dir, get_bm25_pkl_name(bm25_tokenizer))
+
+		assert (
+			bm25_path is not None
+		), "bm25_path must be specified for using bm25 retrieval."
+		assert os.path.exists(
+			bm25_path
+		), f"bm25_path {bm25_path} does not exist. Please ingest first."
+
+		self.bm25_corpus = load_bm25_corpus(bm25_path)
+		assert (
+			"tokens" and "passage_id" in list(self.bm25_corpus.keys())
+		), "bm25_corpus must contain tokens and passage_id. Please check you ingested bm25 corpus correctly."
+		self.tokenizer = select_bm25_tokenizer(bm25_tokenizer)
+		assert self.bm25_corpus["tokenizer_name"] == bm25_tokenizer, (
+			f"The bm25 corpus tokenizer is {self.bm25_corpus['tokenizer_name']}, but your input is {bm25_tokenizer}. "
+			f"You need to ingest again. Delete bm25 pkl file and re-ingest it."
+		)
+		self.bm25_instance = BM25Okapi(self.bm25_corpus["tokens"])
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries = self.cast_to_run(previous_result)
+		pure_params = pop_params(self._pure, kwargs)
+		ids, scores = self._pure(queries, *args, **pure_params)
+		contents = fetch_contents(self.corpus_df, ids)
+		return contents, ids, scores
+
+	def _pure(
+		self,
+		queries: List[List[str]],
+		top_k: int,
+		ids: Optional[List[List[str]]] = None,
+	) -> Tuple[List[List[str]], List[List[float]]]:
+		"""
+		BM25 retrieval function.
+		You have to load a pickle file that is already ingested.
+
+		:param queries: 2-d list of query strings.
+		    Each element of the list is a query strings of each row.
+		:param top_k: The number of passages to be retrieved.
+		:param ids: The optional list of ids that you want to retrieve.
+		    You don't need to specify this in the general use cases.
+		    Default is None.
+		:return: The 2-d list contains a list of passage ids that retrieved from bm25 and 2-d list of its scores.
+		    It will be a length of queries. And each element has a length of top_k.
+		"""
+		if ids is not None:
+			score_result = list(
+				map(
+					lambda query_list, id_list: get_bm25_scores(
+						query_list,
+						id_list,
+						self.tokenizer,
+						self.bm25_instance,
+						self.bm25_corpus,
+					),
+					queries,
+					ids,
+				)
+			)
+			return ids, score_result
+
+		# run async bm25_pure function
+		tasks = [
+			bm25_pure(
+				input_queries,
+				top_k,
+				self.tokenizer,
+				self.bm25_instance,
+				self.bm25_corpus,
+			)
+			for input_queries in queries
+		]
+		loop = get_event_loop()
+		results = loop.run_until_complete(asyncio.gather(*tasks))
+		id_result = list(map(lambda x: x[0], results))
+		score_result = list(map(lambda x: x[1], results))
+		return id_result, score_result
+
+
+async def bm25_pure(
+	queries: List[str], top_k: int, tokenizer, bm25_api: BM25Okapi, bm25_corpus: Dict
+) -> Tuple[List[str], List[float]]:
+	"""
+	Async BM25 retrieval function.
+	Its usage is for async retrieval of bm25 row by row.
+
+	:param queries: A list of query strings.
+	:param top_k: The number of passages to be retrieved.
+	:param tokenizer: A tokenizer that will be used to tokenize queries.
+	:param bm25_api: A bm25 api instance that will be used to retrieve passages.
+	:param bm25_corpus: A dictionary containing the bm25 corpus, which is doc_id from corpus and tokenized corpus.
+	    Its data structure looks like this:
+
+	    .. Code:: python
+
+	        {
+	            "tokens": [], # 2d list of tokens
+	            "passage_id": [], # 2d list of passage_id. Type must be str.
+	        }
+	:return: The tuple contains a list of passage ids that retrieved from bm25 and its scores.
+	"""
+	# I don't make queries operation to async, because queries length might be small, so it will occur overhead.
+	tokenized_queries = tokenize(queries, tokenizer)
+	id_result = []
+	score_result = []
+	for query in tokenized_queries:
+		scores = bm25_api.get_scores(query)
+		sorted_scores = sorted(scores, reverse=True)
+		top_n_index = np.argsort(scores)[::-1][:top_k]
+		ids = [bm25_corpus["passage_id"][i] for i in top_n_index]
+		id_result.append(ids)
+		score_result.append(sorted_scores[:top_k])
+
+	# make a total result to top_k
+	id_result, score_result = evenly_distribute_passages(id_result, score_result, top_k)
+	# sort id_result and score_result by score
+	result = [
+		(_id, score)
+		for score, _id in sorted(
+			zip(score_result, id_result), key=lambda pair: pair[0], reverse=True
+		)
+	]
+	id_result, score_result = zip(*result)
+	return list(id_result), list(score_result)
+
+
+def get_bm25_scores(
+	queries: List[str],
+	ids: List[str],
+	tokenizer,
+	bm25_api: BM25Okapi,
+	bm25_corpus: Dict,
+) -> List[float]:
+	if len(ids) == 0 or not bool(ids):
+		return []
+	tokenized_queries = tokenize(queries, tokenizer)
+	result_dict = {id_: [] for id_ in ids}
+	for query in tokenized_queries:
+		scores = bm25_api.get_scores(query)
+		for i, id_ in enumerate(ids):
+			result_dict[id_].append(scores[bm25_corpus["passage_id"].index(id_)])
+	result_df = pd.DataFrame(result_dict)
+	return result_df.max(axis=0).tolist()
+
+
+def tokenize(queries: List[str], tokenizer) -> List[List[int]]:
+	if isinstance(tokenizer, PreTrainedTokenizerBase):
+		tokenized_queries = tokenizer(queries).input_ids
+	else:
+		tokenized_queries = tokenizer(queries)
+	return tokenized_queries
+
+
+def bm25_ingest(
+	corpus_path: str, corpus_data: pd.DataFrame, bm25_tokenizer: str = "porter_stemmer"
+):
+	if not corpus_path.endswith(".pkl"):
+		raise ValueError(f"Corpus path {corpus_path} is not a pickle file.")
+	validate_corpus_dataset(corpus_data)
+	ids = corpus_data["doc_id"].tolist()
+
+	# Initialize bm25_corpus
+	bm25_corpus = pd.DataFrame()
+
+	# Load the BM25 corpus if it exists and get the passage ids
+	if os.path.exists(corpus_path) and os.path.getsize(corpus_path) > 0:
+		with open(corpus_path, "rb") as r:
+			corpus = pickle.load(r)
+			bm25_corpus = pd.DataFrame.from_dict(corpus)
+		duplicated_passage_rows = bm25_corpus[bm25_corpus["passage_id"].isin(ids)]
+		new_passage = corpus_data[
+			~corpus_data["doc_id"].isin(duplicated_passage_rows["passage_id"])
+		]
+	else:
+		new_passage = corpus_data
+
+	if not new_passage.empty:
+		tokenizer = select_bm25_tokenizer(bm25_tokenizer)
+		if isinstance(tokenizer, PreTrainedTokenizerBase):
+			tokenized_corpus = tokenizer(new_passage["contents"].tolist()).input_ids
+		else:
+			tokenized_corpus = tokenizer(new_passage["contents"].tolist())
+		new_bm25_corpus = pd.DataFrame(
+			{
+				"tokens": tokenized_corpus,
+				"passage_id": new_passage["doc_id"].tolist(),
+			}
+		)
+
+		if not bm25_corpus.empty:
+			bm25_corpus_updated = pd.concat(
+				[bm25_corpus, new_bm25_corpus], ignore_index=True
+			)
+			bm25_dict = bm25_corpus_updated.to_dict("list")
+		else:
+			bm25_dict = new_bm25_corpus.to_dict("list")
+
+		# add tokenizer name to bm25_dict
+		bm25_dict["tokenizer_name"] = bm25_tokenizer
+
+		with open(corpus_path, "wb") as w:
+			pickle.dump(bm25_dict, w)
+
+
+def select_bm25_tokenizer(
+	bm25_tokenizer: str,
+) -> Callable[[str], List[Union[int, str]]]:
+	if bm25_tokenizer in list(BM25_TOKENIZER.keys()):
+		return BM25_TOKENIZER[bm25_tokenizer]
+
+	return AutoTokenizer.from_pretrained(bm25_tokenizer, use_fast=False)
--- a/autorag/nodes/retrieval/hybrid_cc.py
+++ b/autorag/nodes/retrieval/hybrid_cc.py
@@ -0,0 +1,214 @@
+import os
+from pathlib import Path
+from typing import Tuple, List, Union
+
+import numpy as np
+import pandas as pd
+
+from autorag.nodes.retrieval.base import HybridRetrieval
+from autorag.utils.util import pop_params, fetch_contents, result_to_dataframe
+
+
+def normalize_mm(scores: List[str], fixed_min_value: float = 0):
+	arr = np.array(scores)
+	max_value = np.max(arr)
+	min_value = np.min(arr)
+	norm_score = (arr - min_value) / (max_value - min_value)
+	return norm_score
+
+
+def normalize_tmm(scores: List[str], fixed_min_value: float):
+	arr = np.array(scores)
+	max_value = np.max(arr)
+	norm_score = (arr - fixed_min_value) / (max_value - fixed_min_value)
+	return norm_score
+
+
+def normalize_z(scores: List[str], fixed_min_value: float = 0):
+	arr = np.array(scores)
+	mean_value = np.mean(arr)
+	std_value = np.std(arr)
+	norm_score = (arr - mean_value) / std_value
+	return norm_score
+
+
+def normalize_dbsf(scores: List[str], fixed_min_value: float = 0):
+	arr = np.array(scores)
+	mean_value = np.mean(arr)
+	std_value = np.std(arr)
+	min_value = mean_value - 3 * std_value
+	max_value = mean_value + 3 * std_value
+	norm_score = (arr - min_value) / (max_value - min_value)
+	return norm_score
+
+
+normalize_method_dict = {
+	"mm": normalize_mm,
+	"tmm": normalize_tmm,
+	"z": normalize_z,
+	"dbsf": normalize_dbsf,
+}
+
+
+class HybridCC(HybridRetrieval):
+	def _pure(
+		self,
+		ids: Tuple,
+		scores: Tuple,
+		top_k: int,
+		weight: float,
+		normalize_method: str = "mm",
+		semantic_theoretical_min_value: float = -1.0,
+		lexical_theoretical_min_value: float = 0.0,
+	):
+		return hybrid_cc(
+			ids,
+			scores,
+			top_k,
+			weight,
+			normalize_method,
+			semantic_theoretical_min_value,
+			lexical_theoretical_min_value,
+		)
+
+	@classmethod
+	def run_evaluator(
+		cls,
+		project_dir: Union[str, Path],
+		previous_result: pd.DataFrame,
+		*args,
+		**kwargs,
+	):
+		if "ids" in kwargs and "scores" in kwargs:
+			data_dir = os.path.join(project_dir, "data")
+			corpus_df = pd.read_parquet(
+				os.path.join(data_dir, "corpus.parquet"), engine="pyarrow"
+			)
+
+			params = pop_params(hybrid_cc, kwargs)
+			assert (
+				"ids" in params and "scores" in params and "top_k" in params
+			), "ids, scores, and top_k must be specified."
+
+			@result_to_dataframe(
+				["retrieved_contents", "retrieved_ids", "retrieve_scores"]
+			)
+			def __cc(**cc_params):
+				ids, scores = hybrid_cc(**cc_params)
+				contents = fetch_contents(corpus_df, ids)
+				return contents, ids, scores
+
+			return __cc(**params)
+		else:
+			assert (
+				"target_modules" in kwargs and "target_module_params" in kwargs
+			), "target_modules and target_module_params must be specified if there is not ids and scores."
+			instance = cls(project_dir, *args, **kwargs)
+			result = instance.pure(previous_result, *args, **kwargs)
+			del instance
+			return result
+
+
+def hybrid_cc(
+	ids: Tuple,
+	scores: Tuple,
+	top_k: int,
+	weight: float,
+	normalize_method: str = "mm",
+	semantic_theoretical_min_value: float = -1.0,
+	lexical_theoretical_min_value: float = 0.0,
+) -> Tuple[List[List[str]], List[List[float]]]:
+	"""
+	Hybrid CC function.
+	CC (convex combination) is a method to fuse lexical and semantic retrieval results.
+	It is a method that first normalizes the scores of each retrieval result,
+	and then combines them with the given weights.
+	It is uniquer than other retrieval modules, because it does not really execute retrieval,
+	but just fuse the results of other retrieval functions.
+	So you have to run more than two retrieval modules before running this function.
+	And collect ids and scores result from each retrieval module.
+	Make it as tuple and input it to this function.
+
+	:param ids: The tuple of ids that you want to fuse.
+	    The length of this must be the same as the length of scores.
+	    The semantic retrieval ids must be the first index.
+	:param scores: The retrieve scores that you want to fuse.
+	    The length of this must be the same as the length of ids.
+	    The semantic retrieval scores must be the first index.
+	:param top_k: The number of passages to be retrieved.
+	:param normalize_method: The normalization method to use.
+	  There are some normalization method that you can use at the hybrid cc method.
+	  AutoRAG support following.
+	    - `mm`: Min-max scaling
+	    - `tmm`: Theoretical min-max scaling
+	    - `z`: z-score normalization
+	    - `dbsf`: 3-sigma normalization
+	:param weight: The weight value. If the weight is 1.0, it means the
+	  weight to the semantic module will be 1.0 and weight to the lexical module will be 0.0.
+	:param semantic_theoretical_min_value: This value used by `tmm` normalization method. You can set the
+	    theoretical minimum value by yourself. Default is -1.
+	:param lexical_theoretical_min_value: This value used by `tmm` normalization method. You can set the
+	    theoretical minimum value by yourself. Default is 0.
+	:return: The tuple of ids and fused scores that fused by CC. Plus, the third element is selected weight value.
+	"""
+	assert len(ids) == len(scores), "The length of ids and scores must be the same."
+	assert len(ids) > 1, "You must input more than one retrieval results."
+	assert top_k > 0, "top_k must be greater than 0."
+	assert weight >= 0, "The weight must be greater than 0."
+	assert weight <= 1, "The weight must be less than 1."
+
+	df = pd.DataFrame(
+		{
+			"semantic_ids": ids[0],
+			"lexical_ids": ids[1],
+			"semantic_score": scores[0],
+			"lexical_score": scores[1],
+		}
+	)
+
+	def cc_pure_apply(row):
+		return fuse_per_query(
+			row["semantic_ids"],
+			row["lexical_ids"],
+			row["semantic_score"],
+			row["lexical_score"],
+			normalize_method=normalize_method,
+			weight=weight,
+			top_k=top_k,
+			semantic_theoretical_min_value=semantic_theoretical_min_value,
+			lexical_theoretical_min_value=lexical_theoretical_min_value,
+		)
+
+	# fixed weight
+	df[["cc_id", "cc_score"]] = df.apply(
+		lambda row: cc_pure_apply(row), axis=1, result_type="expand"
+	)
+	return df["cc_id"].tolist(), df["cc_score"].tolist()
+
+
+def fuse_per_query(
+	semantic_ids: List[str],
+	lexical_ids: List[str],
+	semantic_scores: List[float],
+	lexical_scores: List[float],
+	normalize_method: str,
+	weight: float,
+	top_k: int,
+	semantic_theoretical_min_value: float,
+	lexical_theoretical_min_value: float,
+):
+	normalize_func = normalize_method_dict[normalize_method]
+	norm_semantic_scores = normalize_func(
+		semantic_scores, semantic_theoretical_min_value
+	)
+	norm_lexical_scores = normalize_func(lexical_scores, lexical_theoretical_min_value)
+	ids = [semantic_ids, lexical_ids]
+	scores = [norm_semantic_scores, norm_lexical_scores]
+	df = pd.concat(
+		[pd.Series(dict(zip(_id, score))) for _id, score in zip(ids, scores)], axis=1
+	)
+	df.columns = ["semantic", "lexical"]
+	df = df.fillna(0)
+	df["weighted_sum"] = df.mul((weight, 1.0 - weight)).sum(axis=1)
+	df = df.sort_values(by="weighted_sum", ascending=False)
+	return df.index.tolist()[:top_k], df["weighted_sum"][:top_k].tolist()
--- a/autorag/nodes/retrieval/hybrid_rrf.py
+++ b/autorag/nodes/retrieval/hybrid_rrf.py
@@ -0,0 +1,128 @@
+import os
+from pathlib import Path
+from typing import List, Tuple, Union
+
+import pandas as pd
+
+from autorag.nodes.retrieval.base import HybridRetrieval
+from autorag.utils.util import pop_params, fetch_contents, result_to_dataframe
+
+
+class HybridRRF(HybridRetrieval):
+	def _pure(self, ids, scores, top_k: int, weight: int = 60, rrf_k: int = -1):
+		return hybrid_rrf(ids, scores, top_k, weight, rrf_k)
+
+	@classmethod
+	def run_evaluator(
+		cls,
+		project_dir: Union[str, Path],
+		previous_result: pd.DataFrame,
+		*args,
+		**kwargs,
+	):
+		if "ids" in kwargs and "scores" in kwargs:
+			data_dir = os.path.join(project_dir, "data")
+			corpus_df = pd.read_parquet(
+				os.path.join(data_dir, "corpus.parquet"), engine="pyarrow"
+			)
+
+			params = pop_params(hybrid_rrf, kwargs)
+			assert (
+				"ids" in params and "scores" in params and "top_k" in params
+			), "ids, scores, and top_k must be specified."
+
+			@result_to_dataframe(
+				["retrieved_contents", "retrieved_ids", "retrieve_scores"]
+			)
+			def __rrf(**rrf_params):
+				ids, scores = hybrid_rrf(**rrf_params)
+				contents = fetch_contents(corpus_df, ids)
+				return contents, ids, scores
+
+			return __rrf(**params)
+		else:
+			assert (
+				"target_modules" in kwargs and "target_module_params" in kwargs
+			), "target_modules and target_module_params must be specified if there is not ids and scores."
+			instance = cls(project_dir, *args, **kwargs)
+			result = instance.pure(previous_result, *args, **kwargs)
+			del instance
+			return result
+
+
+def hybrid_rrf(
+	ids: Tuple,
+	scores: Tuple,
+	top_k: int,
+	weight: int = 60,
+	rrf_k: int = -1,
+) -> Tuple[List[List[str]], List[List[float]]]:
+	"""
+	Hybrid RRF function.
+	RRF (Rank Reciprocal Fusion) is a method to fuse multiple retrieval results.
+	It is common to fuse dense retrieval and sparse retrieval results using RRF.
+	To use this function, you must input ids and scores as tuple.
+	It is more unique than other retrieval modules because it does not really execute retrieval but just fuses
+	the results of other retrieval functions.
+	So you have to run more than two retrieval modules before running this function.
+	And collect ids and scores result from each retrieval module.
+	Make it as a tuple and input it to this function.
+
+	:param ids: The tuple of ids that you want to fuse.
+	    The length of this must be the same as the length of scores.
+	:param scores: The retrieve scores that you want to fuse.
+	    The length of this must be the same as the length of ids.
+	:param top_k: The number of passages to be retrieved.
+	:param weight: Hyperparameter for RRF.
+	    It was originally rrf_k value.
+	    Default is 60.
+	    For more information, please visit our documentation.
+	:param rrf_k: (Deprecated) Hyperparameter for RRF.
+	    It was originally rrf_k value. Will remove at a further version.
+	:return: The tuple of ids and fused scores that are fused by RRF.
+	"""
+	assert len(ids) == len(scores), "The length of ids and scores must be the same."
+	assert len(ids) > 1, "You must input more than one retrieval results."
+	assert top_k > 0, "top_k must be greater than 0."
+	assert weight > 0, "rrf_k must be greater than 0."
+
+	if rrf_k != -1:
+		weight = int(rrf_k)
+	else:
+		weight = int(weight)
+
+	id_df = pd.DataFrame({f"id_{i}": id_list for i, id_list in enumerate(ids)})
+	score_df = pd.DataFrame(
+		{f"score_{i}": score_list for i, score_list in enumerate(scores)}
+	)
+	df = pd.concat([id_df, score_df], axis=1)
+
+	def rrf_pure_apply(row):
+		ids_tuple = tuple(row[[f"id_{i}" for i in range(len(ids))]].values)
+		scores_tuple = tuple(row[[f"score_{i}" for i in range(len(scores))]].values)
+		return pd.Series(rrf_pure(ids_tuple, scores_tuple, weight, top_k))
+
+	df[["rrf_id", "rrf_score"]] = df.apply(rrf_pure_apply, axis=1)
+	return df["rrf_id"].tolist(), df["rrf_score"].tolist()
+
+
+def rrf_pure(
+	ids: Tuple, scores: Tuple, rrf_k: int, top_k: int
+) -> Tuple[List[str], List[float]]:
+	df = pd.concat(
+		[pd.Series(dict(zip(_id, score))) for _id, score in zip(ids, scores)], axis=1
+	)
+	rank_df = df.rank(ascending=False, method="min")
+	rank_df = rank_df.fillna(0)
+	rank_df["rrf"] = rank_df.apply(lambda row: rrf_calculate(row, rrf_k), axis=1)
+	rank_df = rank_df.sort_values(by="rrf", ascending=False)
+	return rank_df.index.tolist()[:top_k], rank_df["rrf"].tolist()[:top_k]
+
+
+def rrf_calculate(row, rrf_k):
+	result = 0
+	for r in row:
+		if r == 0:
+			continue
+		result += 1 / (r + rrf_k)
+	return result
--- a/autorag/nodes/retrieval/run.py
+++ b/autorag/nodes/retrieval/run.py
@@ -0,0 +1,544 @@
+import logging
+import os
+import pathlib
+from copy import deepcopy
+from typing import List, Callable, Dict, Tuple, Union
+
+import numpy as np
+import pandas as pd
+
+from autorag.evaluation import evaluate_retrieval
+from autorag.schema.metricinput import MetricInput
+from autorag.strategy import measure_speed, filter_by_threshold, select_best
+from autorag.support import get_support_modules
+from autorag.utils.util import get_best_row, to_list, apply_recursive
+
+logger = logging.getLogger("AutoRAG")
+
+semantic_module_names = ["vectordb", "VectorDB"]
+lexical_module_names = ["bm25", "BM25"]
+hybrid_module_names = ["hybrid_rrf", "hybrid_cc", "HybridCC", "HybridRRF"]
+
+
+def run_retrieval_node(
+	modules: List,
+	module_params: List[Dict],
+	previous_result: pd.DataFrame,
+	node_line_dir: str,
+	strategies: Dict,
+) -> pd.DataFrame:
+	"""
+	Run evaluation and select the best module among retrieval node results.
+
+	:param modules: Retrieval modules to run.
+	:param module_params: Retrieval module parameters.
+	:param previous_result: Previous result dataframe.
+	    Could be query expansion's best result or qa data.
+	:param node_line_dir: This node line's directory.
+	:param strategies: Strategies for retrieval node.
+	:return: The best result dataframe.
+	    It contains previous result columns and retrieval node's result columns.
+	"""
+	if not os.path.exists(node_line_dir):
+		os.makedirs(node_line_dir)
+	project_dir = pathlib.PurePath(node_line_dir).parent.parent
+	qa_df = pd.read_parquet(
+		os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow"
+	)
+	retrieval_gt = qa_df["retrieval_gt"].tolist()
+	retrieval_gt = apply_recursive(lambda x: str(x), to_list(retrieval_gt))
+	# make rows to metric_inputs
+	metric_inputs = [
+		MetricInput(retrieval_gt=ret_gt, query=query, generation_gt=gen_gt)
+		for ret_gt, query, gen_gt in zip(
+			retrieval_gt, qa_df["query"].tolist(), qa_df["generation_gt"].tolist()
+		)
+	]
+
+	save_dir = os.path.join(node_line_dir, "retrieval")  # node name
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+
+	def run(input_modules, input_module_params) -> Tuple[List[pd.DataFrame], List]:
+		"""
+		Run input modules and parameters.
+
+		:param input_modules: Input modules
+		:param input_module_params: Input module parameters
+		:return: First, it returns list of result dataframe.
+		Second, it returns list of execution times.
+		"""
+		result, execution_times = zip(
+			*map(
+				lambda task: measure_speed(
+					task[0].run_evaluator,
+					project_dir=project_dir,
+					previous_result=previous_result,
+					**task[1],
+				),
+				zip(input_modules, input_module_params),
+			)
+		)
+		average_times = list(map(lambda x: x / len(result[0]), execution_times))
+
+		# run metrics before filtering
+		if strategies.get("metrics") is None:
+			raise ValueError("You must at least one metrics for retrieval evaluation.")
+		result = list(
+			map(
+				lambda x: evaluate_retrieval_node(
+					x,
+					metric_inputs,
+					strategies.get("metrics"),
+				),
+				result,
+			)
+		)
+
+		return result, average_times
+
+	def save_and_summary(
+		input_modules,
+		input_module_params,
+		result_list,
+		execution_time_list,
+		filename_start: int,
+	):
+		"""
+		Save the result and make summary file
+
+		:param input_modules: Input modules
+		:param input_module_params: Input module parameters
+		:param result_list: Result list
+		:param execution_time_list: Execution times
+		:param filename_start: The first filename to use
+		:return: First, it returns list of result dataframe.
+		Second, it returns list of execution times.
+		"""
+
+		# save results to folder
+		filepaths = list(
+			map(
+				lambda x: os.path.join(save_dir, f"{x}.parquet"),
+				range(filename_start, filename_start + len(input_modules)),
+			)
+		)
+		list(
+			map(
+				lambda x: x[0].to_parquet(x[1], index=False),
+				zip(result_list, filepaths),
+			)
+		)  # execute save to parquet
+		filename_list = list(map(lambda x: os.path.basename(x), filepaths))
+
+		summary_df = pd.DataFrame(
+			{
+				"filename": filename_list,
+				"module_name": list(map(lambda module: module.__name__, input_modules)),
+				"module_params": input_module_params,
+				"execution_time": execution_time_list,
+				**{
+					metric: list(map(lambda result: result[metric].mean(), result_list))
+					for metric in strategies.get("metrics")
+				},
+			}
+		)
+		summary_df.to_csv(os.path.join(save_dir, "summary.csv"), index=False)
+		return summary_df
+
+	def find_best(results, average_times, filenames):
+		# filter by strategies
+		if strategies.get("speed_threshold") is not None:
+			results, filenames = filter_by_threshold(
+				results, average_times, strategies["speed_threshold"], filenames
+			)
+		selected_result, selected_filename = select_best(
+			results,
+			strategies.get("metrics"),
+			filenames,
+			strategies.get("strategy", "mean"),
+		)
+		return selected_result, selected_filename
+
+	filename_first = 0
+	# run semantic modules
+	logger.info("Running retrieval node - semantic retrieval module...")
+	if any([module.__name__ in semantic_module_names for module in modules]):
+		semantic_modules, semantic_module_params = zip(
+			*filter(
+				lambda x: x[0].__name__ in semantic_module_names,
+				zip(modules, module_params),
+			)
+		)
+		semantic_results, semantic_times = run(semantic_modules, semantic_module_params)
+		semantic_summary_df = save_and_summary(
+			semantic_modules,
+			semantic_module_params,
+			semantic_results,
+			semantic_times,
+			filename_first,
+		)
+		semantic_selected_result, semantic_selected_filename = find_best(
+			semantic_results, semantic_times, semantic_summary_df["filename"].tolist()
+		)
+		semantic_summary_df["is_best"] = (
+			semantic_summary_df["filename"] == semantic_selected_filename
+		)
+		filename_first += len(semantic_modules)
+	else:
+		(
+			semantic_selected_filename,
+			semantic_summary_df,
+			semantic_results,
+			semantic_times,
+		) = None, pd.DataFrame(), [], []
+	# run lexical modules
+	logger.info("Running retrieval node - lexical retrieval module...")
+	if any([module.__name__ in lexical_module_names for module in modules]):
+		lexical_modules, lexical_module_params = zip(
+			*filter(
+				lambda x: x[0].__name__ in lexical_module_names,
+				zip(modules, module_params),
+			)
+		)
+		lexical_results, lexical_times = run(lexical_modules, lexical_module_params)
+		lexical_summary_df = save_and_summary(
+			lexical_modules,
+			lexical_module_params,
+			lexical_results,
+			lexical_times,
+			filename_first,
+		)
+		lexical_selected_result, lexical_selected_filename = find_best(
+			lexical_results, lexical_times, lexical_summary_df["filename"].tolist()
+		)
+		lexical_summary_df["is_best"] = (
+			lexical_summary_df["filename"] == lexical_selected_filename
+		)
+		filename_first += len(lexical_modules)
+	else:
+		(
+			lexical_selected_filename,
+			lexical_summary_df,
+			lexical_results,
+			lexical_times,
+		) = None, pd.DataFrame(), [], []
+
+	logger.info("Running retrieval node - hybrid retrieval module...")
+	# Next, run hybrid retrieval
+	if any([module.__name__ in hybrid_module_names for module in modules]):
+		hybrid_modules, hybrid_module_params = zip(
+			*filter(
+				lambda x: x[0].__name__ in hybrid_module_names,
+				zip(modules, module_params),
+			)
+		)
+		if all(
+			["target_module_params" in x for x in hybrid_module_params]
+		):  # for Runner.run
+			# If target_module_params are already given, run hybrid retrieval directly
+			hybrid_results, hybrid_times = run(hybrid_modules, hybrid_module_params)
+			hybrid_summary_df = save_and_summary(
+				hybrid_modules,
+				hybrid_module_params,
+				hybrid_results,
+				hybrid_times,
+				filename_first,
+			)
+			filename_first += len(hybrid_modules)
+		else:  # for Evaluator
+			# get id and score
+			ids_scores = get_ids_and_scores(
+				save_dir,
+				[semantic_selected_filename, lexical_selected_filename],
+				semantic_summary_df,
+				lexical_summary_df,
+				previous_result,
+			)
+			hybrid_module_params = list(
+				map(lambda x: {**x, **ids_scores}, hybrid_module_params)
+			)
+
+			# optimize each modules
+			real_hybrid_times = [
+				get_hybrid_execution_times(semantic_summary_df, lexical_summary_df)
+			] * len(hybrid_module_params)
+			hybrid_times = real_hybrid_times.copy()
+			hybrid_results = []
+			for module, module_param in zip(hybrid_modules, hybrid_module_params):
+				module_result_df, module_best_weight = optimize_hybrid(
+					module,
+					module_param,
+					strategies,
+					metric_inputs,
+					project_dir,
+					previous_result,
+				)
+				module_param["weight"] = module_best_weight
+				hybrid_results.append(module_result_df)
+
+			hybrid_summary_df = save_and_summary(
+				hybrid_modules,
+				hybrid_module_params,
+				hybrid_results,
+				hybrid_times,
+				filename_first,
+			)
+			filename_first += len(hybrid_modules)
+			hybrid_summary_df["execution_time"] = hybrid_times
+			best_semantic_summary_row = semantic_summary_df.loc[
+				semantic_summary_df["is_best"]
+			].iloc[0]
+			best_lexical_summary_row = lexical_summary_df.loc[
+				lexical_summary_df["is_best"]
+			].iloc[0]
+			target_modules = (
+				best_semantic_summary_row["module_name"],
+				best_lexical_summary_row["module_name"],
+			)
+			target_module_params = (
+				best_semantic_summary_row["module_params"],
+				best_lexical_summary_row["module_params"],
+			)
+			hybrid_summary_df = edit_summary_df_params(
+				hybrid_summary_df, target_modules, target_module_params
+			)
+	else:
+		if any([module.__name__ in hybrid_module_names for module in modules]):
+			logger.warning(
+				"You must at least one semantic module and lexical module for hybrid evaluation."
+				"Passing hybrid module."
+			)
+		_, hybrid_summary_df, hybrid_results, hybrid_times = (
+			None,
+			pd.DataFrame(),
+			[],
+			[],
+		)
+
+	summary = pd.concat(
+		[semantic_summary_df, lexical_summary_df, hybrid_summary_df], ignore_index=True
+	)
+	results = semantic_results + lexical_results + hybrid_results
+	average_times = semantic_times + lexical_times + hybrid_times
+	filenames = summary["filename"].tolist()
+
+	# filter by strategies
+	selected_result, selected_filename = find_best(results, average_times, filenames)
+	best_result = pd.concat([previous_result, selected_result], axis=1)
+
+	# add summary.csv 'is_best' column
+	summary["is_best"] = summary["filename"] == selected_filename
+
+	# save the result files
+	best_result.to_parquet(
+		os.path.join(
+			save_dir, f"best_{os.path.splitext(selected_filename)[0]}.parquet"
+		),
+		index=False,
+	)
+	summary.to_csv(os.path.join(save_dir, "summary.csv"), index=False)
+	return best_result
+
+
+def evaluate_retrieval_node(
+	result_df: pd.DataFrame,
+	metric_inputs: List[MetricInput],
+	metrics: Union[List[str], List[Dict]],
+) -> pd.DataFrame:
+	"""
+	Evaluate retrieval node from retrieval node result dataframe.
+
+	:param result_df: The result dataframe from a retrieval node.
+	:param metric_inputs: List of metric input schema for AutoRAG.
+	:param metrics: Metric list from input strategies.
+	:return: Return result_df with metrics columns.
+	    The columns will be 'retrieved_contents', 'retrieved_ids', 'retrieve_scores', and metric names.
+	"""
+
+	@evaluate_retrieval(
+		metric_inputs=metric_inputs,
+		metrics=metrics,
+	)
+	def evaluate_this_module(df: pd.DataFrame):
+		return (
+			df["retrieved_contents"].tolist(),
+			df["retrieved_ids"].tolist(),
+			df["retrieve_scores"].tolist(),
+		)
+
+	return evaluate_this_module(result_df)
+
+
+def edit_summary_df_params(
+	summary_df: pd.DataFrame, target_modules, target_module_params
+) -> pd.DataFrame:
+	def delete_ids_scores(x):
+		del x["ids"]
+		del x["scores"]
+		return x
+
+	summary_df["module_params"] = summary_df["module_params"].apply(delete_ids_scores)
+	summary_df["new_params"] = [
+		{"target_modules": target_modules, "target_module_params": target_module_params}
+	] * len(summary_df)
+	summary_df["module_params"] = summary_df.apply(
+		lambda row: {**row["module_params"], **row["new_params"]}, axis=1
+	)
+	summary_df = summary_df.drop(columns=["new_params"])
+	return summary_df
+
+
+def get_ids_and_scores(
+	node_dir: str,
+	filenames: List[str],
+	semantic_summary_df: pd.DataFrame,
+	lexical_summary_df: pd.DataFrame,
+	previous_result,
+) -> Dict[str, Tuple[List[List[str]], List[List[float]]]]:
+	project_dir = pathlib.PurePath(node_dir).parent.parent.parent
+	best_results_df = list(
+		map(
+			lambda filename: pd.read_parquet(
+				os.path.join(node_dir, filename), engine="pyarrow"
+			),
+			filenames,
+		)
+	)
+	ids = tuple(
+		map(lambda df: df["retrieved_ids"].apply(list).tolist(), best_results_df)
+	)
+	scores = tuple(
+		map(lambda df: df["retrieve_scores"].apply(list).tolist(), best_results_df)
+	)
+	# search non-duplicate ids
+	semantic_ids = deepcopy(ids[0])
+	lexical_ids = deepcopy(ids[1])
+
+	def get_non_duplicate_ids(target_ids, compare_ids) -> List[List[str]]:
+		"""
+		Get non-duplicate ids from target_ids and compare_ids.
+		If you want to non-duplicate ids of semantic_ids, you have to put it at target_ids.
+		"""
+		result_ids = []
+		assert len(target_ids) == len(compare_ids)
+		for target_id_list, compare_id_list in zip(target_ids, compare_ids):
+			query_duplicated = list(set(compare_id_list) - set(target_id_list))
+			duplicate_list = query_duplicated if len(query_duplicated) != 0 else []
+			result_ids.append(duplicate_list)
+		return result_ids
+
+	lexical_target_ids = get_non_duplicate_ids(lexical_ids, semantic_ids)
+	semantic_target_ids = get_non_duplicate_ids(semantic_ids, lexical_ids)
+
+	new_id_tuple = (
+		[a + b for a, b in zip(semantic_ids, semantic_target_ids)],
+		[a + b for a, b in zip(lexical_ids, lexical_target_ids)],
+	)
+
+	# search non-duplicate ids' scores
+	new_semantic_scores = get_scores_by_ids(
+		semantic_target_ids, semantic_summary_df, project_dir, previous_result
+	)
+	new_lexical_scores = get_scores_by_ids(
+		lexical_target_ids, lexical_summary_df, project_dir, previous_result
+	)
+
+	new_score_tuple = (
+		[a + b for a, b in zip(scores[0], new_semantic_scores)],
+		[a + b for a, b in zip(scores[1], new_lexical_scores)],
+	)
+	return {
+		"ids": new_id_tuple,
+		"scores": new_score_tuple,
+	}
+
+
+def get_scores_by_ids(
+	ids: List[List[str]], module_summary_df: pd.DataFrame, project_dir, previous_result
+) -> List[List[float]]:
+	module_name = get_best_row(module_summary_df)["module_name"]
+	module_params = get_best_row(module_summary_df)["module_params"]
+	module = get_support_modules(module_name)
+	result_df = module.run_evaluator(
+		project_dir=project_dir,
+		previous_result=previous_result,
+		ids=ids,
+		**module_params,
+	)
+	return to_list(result_df["retrieve_scores"].tolist())
+
+
+def find_unique_elems(list1: List[str], list2: List[str]) -> List[str]:
+	return list(set(list1).symmetric_difference(set(list2)))
+
+
+def get_hybrid_execution_times(lexical_summary, semantic_summary) -> float:
+	lexical_execution_time = lexical_summary.loc[lexical_summary["is_best"]].iloc[0][
+		"execution_time"
+	]
+	semantic_execution_time = semantic_summary.loc[semantic_summary["is_best"]].iloc[0][
+		"execution_time"
+	]
+	return lexical_execution_time + semantic_execution_time
+
+
+def optimize_hybrid(
+	hybrid_module_func: Callable,
+	hybrid_module_param: Dict,
+	strategy: Dict,
+	input_metrics: List[MetricInput],
+	project_dir,
+	previous_result,
+):
+	if (
+		hybrid_module_func.__name__ == "HybridRRF"
+		or hybrid_module_func.__name__ == "hybrid_rrf"
+	):
+		weight_range = hybrid_module_param.pop("weight_range", (4, 80))
+		test_weight_size = weight_range[1] - weight_range[0] + 1
+	elif (
+		hybrid_module_func.__name__ == "HybridCC"
+		or hybrid_module_func.__name__ == "hybrid_cc"
+	):
+		weight_range = hybrid_module_param.pop("weight_range", (0.0, 1.0))
+		test_weight_size = hybrid_module_param.pop("test_weight_size", 101)
+	else:
+		raise ValueError("You must input hybrid module function at hybrid_module_func.")
+
+	weight_candidates = np.linspace(
+		weight_range[0], weight_range[1], test_weight_size
+	).tolist()
+
+	result_list = []
+	for weight_value in weight_candidates:
+		result_df = hybrid_module_func.run_evaluator(
+			project_dir=project_dir,
+			previous_result=previous_result,
+			weight=weight_value,
+			**hybrid_module_param,
+		)
+		result_list.append(result_df)
+
+		# evaluate here
+	if strategy.get("metrics") is None:
+		raise ValueError("You must at least one metrics for retrieval evaluation.")
+	result_list = list(
+		map(
+			lambda x: evaluate_retrieval_node(
+				x,
+				input_metrics,
+				strategy.get("metrics"),
+			),
+			result_list,
+		)
+	)
+
+	# select best result
+	best_result_df, best_weight = select_best(
+		result_list,
+		strategy.get("metrics"),
+		metadatas=weight_candidates,
+		strategy_name=strategy.get("strategy", "normalize_mean"),
+	)
+	return best_result_df, best_weight
--- a/autorag/nodes/retrieval/vectordb.py
+++ b/autorag/nodes/retrieval/vectordb.py
@@ -0,0 +1,303 @@
+import itertools
+import logging
+import os
+from typing import List, Tuple, Optional
+
+import numpy as np
+import pandas as pd
+from llama_index.core.embeddings import BaseEmbedding
+from llama_index.embeddings.openai import OpenAIEmbedding
+
+from autorag.evaluation.metric.util import (
+	calculate_l2_distance,
+	calculate_inner_product,
+	calculate_cosine_similarity,
+)
+from autorag.nodes.retrieval.base import evenly_distribute_passages, BaseRetrieval
+from autorag.utils import (
+	validate_corpus_dataset,
+	cast_corpus_dataset,
+	cast_qa_dataset,
+	validate_qa_dataset,
+)
+from autorag.utils.util import (
+	get_event_loop,
+	process_batch,
+	openai_truncate_by_token,
+	flatten_apply,
+	result_to_dataframe,
+	pop_params,
+	fetch_contents,
+	empty_cuda_cache,
+	convert_inputs_to_list,
+	make_batch,
+)
+from autorag.vectordb import load_vectordb_from_yaml
+from autorag.vectordb.base import BaseVectorStore
+
+logger = logging.getLogger("AutoRAG")
+
+
+class VectorDB(BaseRetrieval):
+	def __init__(self, project_dir: str, vectordb: str = "default", **kwargs):
+		"""
+		Initialize VectorDB retrieval node.
+
+		:param project_dir: The project directory path.
+		:param vectordb: The vectordb name.
+			You must configure the vectordb name in the config.yaml file.
+			If you don't configure, it uses the default vectordb.
+		:param kwargs: The optional arguments.
+			Not affected in the init method.
+		"""
+		super().__init__(project_dir)
+
+		vectordb_config_path = os.path.join(self.resources_dir, "vectordb.yaml")
+		self.vector_store = load_vectordb_from_yaml(
+			vectordb_config_path, vectordb, project_dir
+		)
+
+		self.embedding_model = self.vector_store.embedding
+
+	def __del__(self):
+		del self.vector_store
+		del self.embedding_model
+		empty_cuda_cache()
+		super().__del__()
+
+	@result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"])
+	def pure(self, previous_result: pd.DataFrame, *args, **kwargs):
+		queries = self.cast_to_run(previous_result)
+		pure_params = pop_params(self._pure, kwargs)
+		ids, scores = self._pure(queries, **pure_params)
+		contents = fetch_contents(self.corpus_df, ids)
+		return contents, ids, scores
+
+	def _pure(
+		self,
+		queries: List[List[str]],
+		top_k: int,
+		embedding_batch: int = 128,
+		ids: Optional[List[List[str]]] = None,
+	) -> Tuple[List[List[str]], List[List[float]]]:
+		"""
+		VectorDB retrieval function.
+		You have to get a chroma collection that is already ingested.
+		You have to get an embedding model that is already used in ingesting.
+
+		:param queries: 2-d list of query strings.
+		    Each element of the list is a query strings of each row.
+		:param top_k: The number of passages to be retrieved.
+		:param embedding_batch: The number of queries to be processed in parallel.
+		    This is used to prevent API error at the query embedding.
+		    Default is 128.
+		:param ids: The optional list of ids that you want to retrieve.
+		    You don't need to specify this in the general use cases.
+		    Default is None.
+
+		:return: The 2-d list contains a list of passage ids that retrieved from vectordb and 2-d list of its scores.
+		    It will be a length of queries. And each element has a length of top_k.
+		"""
+		# if ids are specified, fetch the ids score from Chroma
+		if ids is not None:
+			return self.__get_ids_scores(queries, ids, embedding_batch)
+
+		# run async vector_db_pure function
+		tasks = [
+			vectordb_pure(query_list, top_k, self.vector_store)
+			for query_list in queries
+		]
+		loop = get_event_loop()
+		results = loop.run_until_complete(
+			process_batch(tasks, batch_size=embedding_batch)
+		)
+		id_result = list(map(lambda x: x[0], results))
+		score_result = list(map(lambda x: x[1], results))
+		return id_result, score_result
+
+	def __get_ids_scores(self, queries, ids, embedding_batch: int):
+		# truncate queries and embedding execution here.
+		openai_embedding_limit = 8000
+		if isinstance(self.embedding_model, OpenAIEmbedding):
+			queries = list(
+				map(
+					lambda query_list: openai_truncate_by_token(
+						query_list,
+						openai_embedding_limit,
+						self.embedding_model.model_name,
+					),
+					queries,
+				)
+			)
+
+		query_embeddings = flatten_apply(
+			run_query_embedding_batch,
+			queries,
+			embedding_model=self.embedding_model,
+			batch_size=embedding_batch,
+		)
+
+		loop = get_event_loop()
+
+		async def run_fetch(ids):
+			final_result = []
+			for id_list in ids:
+				if len(id_list) == 0:
+					final_result.append([])
+				else:
+					result = await self.vector_store.fetch(id_list)
+					final_result.append(result)
+			return final_result
+
+		content_embeddings = loop.run_until_complete(run_fetch(ids))
+
+		score_result = list(
+			map(
+				lambda query_embedding_list, content_embedding_list: get_id_scores(
+					query_embedding_list,
+					content_embedding_list,
+					similarity_metric=self.vector_store.similarity_metric,
+				),
+				query_embeddings,
+				content_embeddings,
+			)
+		)
+		return ids, score_result
+
+
+async def vectordb_pure(
+	queries: List[str], top_k: int, vectordb: BaseVectorStore
+) -> Tuple[List[str], List[float]]:
+	"""
+	Async VectorDB retrieval function.
+	Its usage is for async retrieval of vector_db row by row.
+
+	:param query_embeddings: A list of query embeddings.
+	:param top_k: The number of passages to be retrieved.
+	:param vectordb: The vector store instance.
+	:return: The tuple contains a list of passage ids that are retrieved from vectordb and a list of its scores.
+	"""
+	id_result, score_result = await vectordb.query(queries=queries, top_k=top_k)
+
+	# Distribute passages evenly
+	id_result, score_result = evenly_distribute_passages(id_result, score_result, top_k)
+	# sort id_result and score_result by score
+	result = [
+		(_id, score)
+		for score, _id in sorted(
+			zip(score_result, id_result), key=lambda pair: pair[0], reverse=True
+		)
+	]
+	id_result, score_result = zip(*result)
+	return list(id_result), list(score_result)
+
+
+async def filter_exist_ids(
+	vectordb: BaseVectorStore,
+	corpus_data: pd.DataFrame,
+) -> pd.DataFrame:
+	corpus_data = cast_corpus_dataset(corpus_data)
+	validate_corpus_dataset(corpus_data)
+	ids = corpus_data["doc_id"].tolist()
+
+	# Query the collection to check if IDs already exist
+	existed_bool_list = await vectordb.is_exist(ids=ids)
+	# Assuming 'ids' is the key in the response
+	new_passage = corpus_data[~pd.Series(existed_bool_list)]
+	return new_passage
+
+
+async def filter_exist_ids_from_retrieval_gt(
+	vectordb: BaseVectorStore,
+	qa_data: pd.DataFrame,
+	corpus_data: pd.DataFrame,
+) -> pd.DataFrame:
+	qa_data = cast_qa_dataset(qa_data)
+	validate_qa_dataset(qa_data)
+	corpus_data = cast_corpus_dataset(corpus_data)
+	validate_corpus_dataset(corpus_data)
+	retrieval_gt = (
+		qa_data["retrieval_gt"]
+		.apply(lambda x: list(itertools.chain.from_iterable(x)))
+		.tolist()
+	)
+	retrieval_gt = list(itertools.chain.from_iterable(retrieval_gt))
+	retrieval_gt = list(set(retrieval_gt))
+
+	existed_bool_list = await vectordb.is_exist(ids=retrieval_gt)
+	add_ids = []
+	for ret_gt, is_exist in zip(retrieval_gt, existed_bool_list):
+		if not is_exist:
+			add_ids.append(ret_gt)
+	new_passage = corpus_data[corpus_data["doc_id"].isin(add_ids)]
+	return new_passage
+
+
+async def vectordb_ingest(
+	vectordb: BaseVectorStore,
+	corpus_data: pd.DataFrame,
+):
+	"""
+	Ingest given corpus data to the vectordb.
+	It truncates corpus content when the embedding model is OpenAIEmbedding to the 8000 tokens.
+	Plus, when the corpus content is empty (whitespace), it will be ignored.
+	And if there is a document id that already exists in the collection, it will be ignored.
+
+	:param vectordb: A vector stores instance that you want to ingest.
+	:param corpus_data: The corpus data that contains doc_id and contents columns.
+	"""
+	embedding_batch = vectordb.embedding_batch
+	if not corpus_data.empty:
+		new_contents = corpus_data["contents"].tolist()
+		new_ids = corpus_data["doc_id"].tolist()
+		content_batches = make_batch(new_contents, embedding_batch)
+		id_batches = make_batch(new_ids, embedding_batch)
+		for content_batch, id_batch in zip(content_batches, id_batches):
+			await vectordb.add(ids=id_batch, texts=content_batch)
+
+
+def run_query_embedding_batch(
+	queries: List[str], embedding_model: BaseEmbedding, batch_size: int
+) -> List[List[float]]:
+	result = []
+	for i in range(0, len(queries), batch_size):
+		batch = queries[i : i + batch_size]
+		embeddings = embedding_model.get_text_embedding_batch(batch)
+		result.extend(embeddings)
+	return result
+
+
+@convert_inputs_to_list
+def get_id_scores(  # To find the uncalculated score when fuse the scores for the hybrid retrieval
+	query_embeddings: List[
+		List[float]
+	],  # `queries` is input. This is one user input query.
+	content_embeddings: List[List[float]],
+	similarity_metric: str,
+) -> List[
+	float
+]:  # The most high scores among each query. The length of a result is the same as the contents length.
+	"""
+	Calculate the highest similarity scores between query embeddings and content embeddings.
+
+	:param query_embeddings: A list of lists containing query embeddings.
+	:param content_embeddings: A list of lists containing content embeddings.
+	:param similarity_metric: The similarity metric to use ('l2', 'ip', or 'cosine').
+	:return: A list of the highest similarity scores for each content embedding.
+	"""
+	metric_func_dict = {
+		"l2": lambda x, y: 1 - calculate_l2_distance(x, y),
+		"ip": calculate_inner_product,
+		"cosine": calculate_cosine_similarity,
+	}
+	metric_func = metric_func_dict[similarity_metric]
+
+	result = []
+	for content_embedding in content_embeddings:
+		scores = []
+		for query_embedding in query_embeddings:
+			scores.append(
+				metric_func(np.array(query_embedding), np.array(content_embedding))
+			)
+		result.append(max(scores))
+	return result
--- a/autorag/nodes/util.py
+++ b/autorag/nodes/util.py
@@ -0,0 +1,16 @@
+from typing import Optional, Dict
+
+from autorag.support import get_support_modules
+
+
+def make_generator_callable_param(generator_dict: Optional[Dict]):
+	if "generator_module_type" not in generator_dict.keys():
+		generator_dict = {
+			"generator_module_type": "llama_index_llm",
+			"llm": "openai",
+			"model": "gpt-4o-mini",
+		}
+	module_str = generator_dict.pop("generator_module_type")
+	module_class = get_support_modules(module_str)
+	module_param = generator_dict
+	return module_class, module_param