autorag_evaluation/autorag/evaluation/metric/retrieval_contents.py

"""
This file contains the retrieval contents metric,
which means calculate the metric based on the contents of the retrieved items.
"""

import itertools
from collections import Counter

import numpy as np

from autorag.evaluation.metric.util import autorag_metric
from autorag.schema.metricinput import MetricInput
from autorag.utils.util import normalize_string


def single_token_f1(ground_truth: str, prediction: str):
	prediction_tokens = normalize_string(prediction).split()
	ground_truth_tokens = normalize_string(ground_truth).split()
	common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
	num_same = sum(common.values())
	if num_same == 0:
		return 0, 0, 0
	precision = 1.0 * num_same / len(prediction_tokens)
	recall = 1.0 * num_same / len(ground_truth_tokens)
	f1 = (2 * precision * recall) / (precision + recall)
	return precision, recall, f1


@autorag_metric(fields_to_check=["retrieved_contents", "retrieval_gt_contents"])
def retrieval_token_f1(metric_input: MetricInput):
	pred = metric_input.retrieved_contents
	gt = itertools.chain.from_iterable(metric_input.retrieval_gt_contents)

	calculated_results = list(
		map(lambda x: single_token_f1(x[1], x[0]), list(itertools.product(pred, gt)))
	)
	_, _, result = zip(*calculated_results)
	result_np = np.array(list(result)).reshape(len(pred), -1)
	return result_np.max(axis=1).mean()


@autorag_metric(fields_to_check=["retrieved_contents", "retrieval_gt_contents"])
def retrieval_token_precision(metric_input: MetricInput):
	pred = metric_input.retrieved_contents
	gt = itertools.chain.from_iterable(metric_input.retrieval_gt_contents)

	calculated_results = list(
		map(lambda x: single_token_f1(x[1], x[0]), list(itertools.product(pred, gt)))
	)
	result, _, _ = zip(*calculated_results)
	result_np = np.array(list(result)).reshape(len(pred), -1)
	return result_np.max(axis=1).mean()


@autorag_metric(fields_to_check=["retrieved_contents", "retrieval_gt_contents"])
def retrieval_token_recall(metric_input: MetricInput):
	pred = metric_input.retrieved_contents
	gt = itertools.chain.from_iterable(metric_input.retrieval_gt_contents)

	calculated_results = list(
		map(lambda x: single_token_f1(x[1], x[0]), list(itertools.product(pred, gt)))
	)
	_, result, _ = zip(*calculated_results)
	result_np = np.array(list(result)).reshape(len(pred), -1)
	return result_np.max(axis=1).mean()