diff --git a/.env.sample b/.env.sample index a76c5e9..f45ac6e 100644 --- a/.env.sample +++ b/.env.sample @@ -1,2 +1 @@ OPENAI_API_KEY=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 -LLAMA_CLOUD_API_KEY=llx-MkHkuDxnSxXEHvsIPAtjEZl4iSB8pHS1mgYDVZQlA690LUub \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 4e61a58..5f91811 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,8 +7,9 @@ ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 -# Copy only requirements files first to leverage Docker cache -COPY pyproject.toml ./ +# Copy only necessary files +COPY pyproject.toml ./ +COPY requirements.txt ./ # Install system and Python dependencies in a single layer RUN apt-get update && \ @@ -19,10 +20,20 @@ RUN apt-get update && \ pip install --no-cache-dir --upgrade pip setuptools setuptools-scm && \ rm -rf /var/lib/apt/lists/* -# Copy project files -COPY . . - # Install base project -RUN pip install --no-cache-dir -e . +RUN pip install -r requirements.txt -CMD ["bash"] +# Copy project files +COPY autorag /usr/src/app/autorag +COPY main.py /usr/src/app/main.py +COPY making.sh /usr/src/app/making.sh +COPY entrypoint.sh /usr/src/app/entrypoint.sh + +# Set permissions for entrypoint +RUN chmod +x /usr/src/app/entrypoint.sh + +# Use entrypoint.sh as the container entrypoint +ENTRYPOINT ["/usr/src/app/entrypoint.sh"] + +# Set a default command (optional, can be overridden) +CMD ["bash"] \ No newline at end of file diff --git a/README.md b/README.md index ca3068a..036f3b3 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,68 @@ -# AutoRAG Evaluation +# ๐Ÿ“Š AutoRAG Evaluation -์ด ๋ฌธ์„œ๋Š” AutoRAG์„ ํ™œ์šฉํ•˜์—ฌ RAG ํŒŒ์ดํ”„๋ผ์ธ์„ ์„ค์ •ํ•˜๊ณ  ํ‰๊ฐ€ํ•˜๋Š” ๊ณผ์ •์— ๋Œ€ํ•œ ์•ˆ๋‚ด์ž…๋‹ˆ๋‹ค. +AutoRAG Evaluation์€ RAG(Relevance-Augmented Generation) ํŒŒ์ดํ”„๋ผ์ธ์„ ํ‰๊ฐ€ํ•˜๊ธฐ ์œ„ํ•œ ๋„๊ตฌ์ž…๋‹ˆ๋‹ค. +์ด ๋ฌธ์„œ๋Š” ๋ฐ์ดํ„ฐ ์ƒ์„ฑ๋ถ€ํ„ฐ RAG ํ‰๊ฐ€ ๋ฐ ๋Œ€์‹œ๋ณด๋“œ ํ™œ์šฉ๊นŒ์ง€์˜ ์ „์ฒด ๊ณผ์ •์— ๋Œ€ํ•œ ์•ˆ๋‚ด๋ฅผ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. --- -## ๐Ÿ“Œ ํ™˜๊ฒฝ ์„ธํŒ… +## **๐Ÿš€ 1. ํ™˜๊ฒฝ ์„ค์ •** -1. `.env` ํŒŒ์ผ์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค. (`.env.sample` ์ฐธ๊ณ ) -2. Docker ์ด๋ฏธ์ง€๋ฅผ ๋นŒ๋“œํ•ฉ๋‹ˆ๋‹ค. +### **1๏ธโƒฃ `.env` ํŒŒ์ผ ์„ค์ •** + +`.env.sample` ํŒŒ์ผ์„ ์ฐธ๊ณ ํ•˜์—ฌ `.env` ํŒŒ์ผ์„ ์ƒ์„ฑํ•˜๊ณ  ํ•„์š”ํ•œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค. + +### **2๏ธโƒฃ Docker ์ด๋ฏธ์ง€ ๋นŒ๋“œ** ```bash - docker build -t autorag-base . +docker build -t autorag-base . ``` -3. Docker Compose๋ฅผ ์‹คํ–‰ํ•˜์—ฌ ์„œ๋น„์Šค๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. +### **3๏ธโƒฃ Docker Compose ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์‹คํ–‰** ```bash - docker compose up -d +docker compose up -d ``` -4. Hugginface embedding, Ollama LLM์„ ์œ„ํ•œ ์ถ”๊ฐ€ ๋ชจ๋“ˆ ์„ค์น˜๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. +### **4๏ธโƒฃ ์‹คํ–‰ ๋ชจ๋“œ (Docker)** + +AutoRAG Evaluation์€ ์‹คํ–‰ ๋ชจ๋“œ์— ๋”ฐ๋ผ ๋‹ค๋ฅด๊ฒŒ ๋™์ž‘ํ•ฉ๋‹ˆ๋‹ค. +Docker ์‹คํ–‰ ์‹œ ์•„๋ž˜ ๋ช…๋ น์–ด๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋“œ๋ฅผ ์„ ํƒํ•˜์„ธ์š”. ```bash - pip install -r requirements_custom.txt +docker run autorag-base +``` + +| ์‹คํ–‰ ๋ชจ๋“œ | ์„ค๋ช… | +| ---------- | -------------------------------- | +| `data-gen` | ๋ฐ์ดํ„ฐ ์ƒ์„ฑ (`making.sh` ์‹คํ–‰) | +| `evaluate` | RAG ํ‰๊ฐ€ (`main.py` ์‹คํ–‰) | +| `bash` | ์ปจํ…Œ์ด๋„ˆ ๋‚ด๋ถ€ ์ ‘๊ทผ (`/bin/bash`) | + +์˜ˆ์‹œ: + +```bash +docker run autorag-base data-gen +docker run autorag-base evaluate +docker run autorag-base bash ``` --- -## ๐Ÿ“‚ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ +## **๐Ÿ“‚ 2. ๋ฐ์ดํ„ฐ ์ƒ์„ฑ** RAG ํ‰๊ฐ€๋ฅผ ์œ„ํ•ด **QA ๋ฐ์ดํ„ฐ ์„ธํŠธ**์™€ **์ฝ”ํผ์Šค ๋ฐ์ดํ„ฐ ์„ธํŠธ**๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. -### 1๏ธโƒฃ ํ”„๋กœ์ ํŠธ ํด๋” ์ƒ์„ฑ +### **1๏ธโƒฃ ํ”„๋กœ์ ํŠธ ํด๋” ์ƒ์„ฑ** ```bash cd projects -mkdir -p "project_name" -cd "project_name" -mkdir -p raw_data config +mkdir -p example_project/{raw_data,config} ``` -- **`raw_data/`**: ๋ถ„์„ํ•  ์›๋ณธ ๋ฐ์ดํ„ฐ๋ฅผ ์ €์žฅ (`.pdf` ๋“ฑ) -- **`config/`**: ํŒŒ์‹ฑ(`parse.yaml`), ์ฒญํ‚น(`chunk.yaml`) ์„ค์ • ํŒŒ์ผ์„ ์ €์žฅ +- **`raw_data/`**: ์›๋ณธ ๋ฐ์ดํ„ฐ ์ €์žฅ (`.pdf`, `.txt` ๋“ฑ) +- **`config/`**: ์„ค์ • ํŒŒ์ผ ์ €์žฅ (`parse.yaml`, `chunk.yaml` ๋“ฑ) -### 2๏ธโƒฃ ํŒŒ์‹ฑ ์„ค์ • (`parse.yaml`) - -ํŒŒ์‹ฑ ๋ชจ๋“ˆ์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค. +### **2๏ธโƒฃ ํŒŒ์‹ฑ ์„ค์ • (`parse.yaml`)** ```yaml modules: @@ -53,11 +70,7 @@ modules: parse_method: pdfminer ``` -์—ฌ๋Ÿฌ ๊ฐœ์˜ ํŒŒ์‹ฑ ๋ชจ๋“ˆ์„ ๋™์‹œ์— ์‚ฌ์šฉํ•  ์ˆ˜๋„ ์žˆ์Šต๋‹ˆ๋‹ค. - -### 3๏ธโƒฃ ์ฒญํ‚น ์„ค์ • (`chunk.yaml`) - -์ฒญํ‚น ๋ชจ๋“ˆ์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค. +### **3๏ธโƒฃ ์ฒญํ‚น ์„ค์ • (`chunk.yaml`)** ```yaml modules: @@ -68,25 +81,21 @@ modules: add_file_name: en ``` -์—ฌ๋Ÿฌ ๊ฐœ์˜ ์ฒญํ‚น ๋ชจ๋“ˆ์„ ์‚ฌ์šฉํ•  ๊ฒฝ์šฐ, QA ๋ฐ์ดํ„ฐ์™€ ๋งคํ•‘ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. - -### 4๏ธโƒฃ QA ๋ฐ์ดํ„ฐ ์ƒ์„ฑ - -`raw_data/`์— ์ €์žฅ๋œ ํŒŒ์ผ์„ ๋ฐ”ํƒ•์œผ๋กœ **ํŒŒ์‹ฑ โ†’ ์ฒญํ‚น โ†’ QA ๋ฐ์ดํ„ฐ ์ƒ์„ฑ**์„ ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค. -QA ๋ฐ์ดํ„ฐ๋Š” `GPT-4o-mini` ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ **20๊ฑด**์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค. +### **4๏ธโƒฃ QA ๋ฐ์ดํ„ฐ ์ƒ์„ฑ** ```bash -cd autorag-workspace -sh making.sh +bash making.sh ``` +> **์ฐธ๊ณ :** `GPT-4o-mini` ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ์ž๋™์œผ๋กœ QA ๋ฐ์ดํ„ฐ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค. (๊ธฐ๋ณธ 20๊ฑด) + --- -## ๐Ÿ” RAG Pipeline ํ‰๊ฐ€ +## **๐Ÿ” 3. RAG Pipeline ํ‰๊ฐ€** -### 1๏ธโƒฃ Ollama ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ +### **1๏ธโƒฃ Ollama ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ** -WSL(Windows Subsystem for Linux)์—์„œ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค. +WSL(Windows Subsystem for Linux)์—์„œ ์‹คํ–‰ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. ```bash docker exec -it autorag-ollama bash @@ -94,61 +103,32 @@ ollama pull phi4 ollama list ``` -### 2๏ธโƒฃ AutoRAG ํ‰๊ฐ€ ์‹คํ–‰ +### **2๏ธโƒฃ ํ‰๊ฐ€ ์‹คํ–‰** ```bash -cd Autorag-workspace python main.py ``` -### 3๏ธโƒฃ ํ‰๊ฐ€ ๊ฒฐ๊ณผ ํ™•์ธ +### **3๏ธโƒฃ ํ‰๊ฐ€ ๊ฒฐ๊ณผ ํ™•์ธ** ํ‰๊ฐ€ ๊ฒฐ๊ณผ๋Š” ํ”„๋กœ์ ํŠธ ํด๋” ๋‚ด `benchmark_*` ๊ฒฝ๋กœ์— ์ €์žฅ๋ฉ๋‹ˆ๋‹ค. -#### โœ… ์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ ํ‰๊ฐ€ ๊ฒฐ๊ณผ +#### **โœ… ์ „์ฒด ํ‰๊ฐ€ ๊ฒฐ๊ณผ** ```bash -cd projects/ํ”„๋กœ์ ํŠธ์ด๋ฆ„/benchmark_{*}/*/ -summary.csv +cd projects/example_project/benchmark_*/summary.csv ``` -#### โœ… ์„ธ๋ถ€ ํ‰๊ฐ€ ๊ฒฐ๊ณผ +#### **โœ… ์„ธ๋ถ€ ํ‰๊ฐ€ ๊ฒฐ๊ณผ** -- **๊ฒ€์ƒ‰๊ธฐ ๋…ธ๋“œ ๋ผ์ธ ํ‰๊ฐ€ ๊ฒฐ๊ณผ** - ```bash - cd ./retrieve_node_line - summary.csv - ``` -- **๊ฒ€์ƒ‰ ๋…ธ๋“œ ํ‰๊ฐ€ ๊ฒฐ๊ณผ** - ```bash - cd ./retrieve_node_line/retrival - summary.csv - ``` -- **๋ฆฌ๋žญ์ปค ๋…ธ๋“œ ํ‰๊ฐ€ ๊ฒฐ๊ณผ** - ```bash - cd ./retrieve_node_line/passage_reranker - summary.csv - ``` -- **์ƒ์„ฑ๊ธฐ ๋…ธ๋“œ ๋ผ์ธ ํ‰๊ฐ€ ๊ฒฐ๊ณผ** - ```bash - cd ./post_retrieve_node_line - summary.csv - ``` -- **์ƒ์„ฑ ๋…ธ๋“œ ํ‰๊ฐ€ ๊ฒฐ๊ณผ** - ```bash - cd ./post_retrieve_node_line/generator - summary.csv - ``` +| ํ‰๊ฐ€ ํ•ญ๋ชฉ | ํŒŒ์ผ ๊ฒฝ๋กœ | +| ------------------------- | --------------------------------------------------- | +| **๊ฒ€์ƒ‰๊ธฐ ๋…ธ๋“œ ๋ผ์ธ ํ‰๊ฐ€** | `./retrieve_node_line/summary.csv` | +| **๊ฒ€์ƒ‰ ๋…ธ๋“œ ํ‰๊ฐ€** | `./retrieve_node_line/retrieval/summary.csv` | +| **๋ฆฌ๋žญ์ปค ๋…ธ๋“œ ํ‰๊ฐ€** | `./retrieve_node_line/passage_reranker/summary.csv` | +| **์ƒ์„ฑ๊ธฐ ๋…ธ๋“œ ๋ผ์ธ ํ‰๊ฐ€** | `./post_retrieve_node_line/summary.csv` | +| **์ƒ์„ฑ ๋…ธ๋“œ ํ‰๊ฐ€** | `./post_retrieve_node_line/generator/summary.csv` | -> ๐Ÿ“Œ **์ฐธ๊ณ :** `./projects/example_01` ํด๋”๋Š” ๋ฐ์ดํ„ฐ ์ƒ์„ฑ๋ถ€ํ„ฐ ํ‰๊ฐ€๊นŒ์ง€ ์ง„ํ–‰๋œ ์˜ˆ์ œ์ž…๋‹ˆ๋‹ค. +> ๐Ÿ“Œ `./projects/example_01` ํด๋”์—๋Š” ๋ฐ์ดํ„ฐ ์ƒ์„ฑ๋ถ€ํ„ฐ ํ‰๊ฐ€๊นŒ์ง€ ์ง„ํ–‰๋œ ์˜ˆ์ œ ํŒŒ์ผ์ด ํฌํ•จ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. --- - -## ๐Ÿ“Š ํ‰๊ฐ€ ๋Œ€์‹œ๋ณด๋“œ ์‹คํ–‰ - -```bash -cd Autorag-workspace -sh dashboard.sh -``` - -AutoRAG ํ‰๊ฐ€ ๊ฒฐ๊ณผ๋ฅผ ์ž์„ธํžˆ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. diff --git a/autorag-workspace/autorag/cli.py b/autorag-workspace/autorag/cli.py deleted file mode 100644 index d114b83..0000000 --- a/autorag-workspace/autorag/cli.py +++ /dev/null @@ -1,209 +0,0 @@ -import importlib.resources -import logging -import os -import pathlib -import subprocess -from pathlib import Path -from typing import Optional - -import click -import nest_asyncio - -from autorag import dashboard -from autorag.deploy import extract_best_config as original_extract_best_config -from autorag.deploy.api import ApiRunner -from autorag.evaluator import Evaluator -from autorag.validator import Validator - -logger = logging.getLogger("AutoRAG") - -autorag_dir = os.path.dirname(os.path.realpath(__file__)) -version_file = os.path.join(autorag_dir, "VERSION") -with open(version_file, "r") as f: - __version__ = f.read().strip() - - -@click.group() -@click.version_option(__version__) -def cli(): - pass - - -@click.command() -@click.option( - "--config", - "-c", - help="Path to config yaml file. Must be yaml or yml file.", - type=str, -) -@click.option( - "--qa_data_path", help="Path to QA dataset. Must be parquet file.", type=str -) -@click.option( - "--corpus_data_path", help="Path to corpus dataset. Must be parquet file.", type=str -) -@click.option( - "--project_dir", help="Path to project directory.", type=str, default=None -) -@click.option( - "--skip_validation", - help="Skip validation or not. Default is False.", - type=bool, - default=False, -) -def evaluate(config, qa_data_path, corpus_data_path, project_dir, skip_validation): - if not config.endswith(".yaml") and not config.endswith(".yml"): - raise ValueError(f"Config file {config} is not a yaml or yml file.") - if not os.path.exists(config): - raise ValueError(f"Config file {config} does not exist.") - evaluator = Evaluator(qa_data_path, corpus_data_path, project_dir=project_dir) - evaluator.start_trial(config, skip_validation=skip_validation) - - -@click.command() -@click.option( - "--config_path", type=str, help="Path to extracted config yaml file.", default=None -) -@click.option("--host", type=str, default="0.0.0.0", help="Host address") -@click.option("--port", type=int, default=8000, help="Port number") -@click.option( - "--trial_dir", - type=click.Path(file_okay=False, dir_okay=True, exists=True), - default=None, - help="Path to trial directory.", -) -@click.option( - "--project_dir", help="Path to project directory.", type=str, default=None -) -@click.option( - "--remote", help="Run the API server in remote mode.", type=bool, default=False -) -def run_api(config_path, host, port, trial_dir, project_dir, remote: bool): - if trial_dir is None: - runner = ApiRunner.from_yaml(config_path, project_dir=project_dir) - else: - runner = ApiRunner.from_trial_folder(trial_dir) - logger.info(f"Running API server at {host}:{port}...") - nest_asyncio.apply() - runner.run_api_server(host, port, remote=remote) - - -@click.command() -@click.option( - "--yaml_path", type=click.Path(path_type=Path), help="Path to the YAML file." -) -@click.option( - "--project_dir", - type=click.Path(path_type=Path), - help="Path to the project directory.", -) -@click.option( - "--trial_path", type=click.Path(path_type=Path), help="Path to the trial directory." -) -def run_web( - yaml_path: Optional[str], project_dir: Optional[str], trial_path: Optional[str] -): - try: - with importlib.resources.path("autorag", "web.py") as web_path: - web_py_path = str(web_path) - except ImportError: - raise ImportError( - "Could not locate the web.py file within the autorag package." - " Please ensure that autorag is correctly installed." - ) - - if not yaml_path and not trial_path: - raise ValueError("yaml_path or trial_path must be given.") - elif yaml_path and trial_path: - raise ValueError("yaml_path and trial_path cannot be given at the same time.") - elif yaml_path and not project_dir: - subprocess.run( - ["streamlit", "run", web_py_path, "--", "--yaml_path", yaml_path] - ) - elif yaml_path and project_dir: - subprocess.run( - [ - "streamlit", - "run", - web_py_path, - "--", - "--yaml_path", - yaml_path, - "--project_dir", - project_dir, - ] - ) - elif trial_path: - subprocess.run( - ["streamlit", "run", web_py_path, "--", "--trial_path", trial_path] - ) - - -@click.command() -@click.option( - "--trial_dir", - type=click.Path(dir_okay=True, file_okay=False, exists=True), - required=True, -) -@click.option( - "--port", type=int, default=7690, help="Port number. The default is 7690." -) -def run_dashboard(trial_dir: str, port: int): - dashboard.run(trial_dir, port=port) - - -@click.command() -@click.option("--trial_path", type=click.Path(), help="Path to the trial directory.") -@click.option( - "--output_path", - type=click.Path(), - help="Path to the output directory." " Must be .yaml or .yml file.", -) -def extract_best_config(trial_path: str, output_path: str): - original_extract_best_config(trial_path, output_path) - - -@click.command() -@click.option("--trial_path", help="Path to trial directory.", type=str) -def restart_evaluate(trial_path): - if not os.path.exists(trial_path): - raise ValueError(f"trial_path {trial_path} does not exist.") - project_dir = str(pathlib.PurePath(trial_path).parent) - qa_data_path = os.path.join(project_dir, "data", "qa.parquet") - corpus_data_path = os.path.join(project_dir, "data", "corpus.parquet") - evaluator = Evaluator(qa_data_path, corpus_data_path, project_dir) - evaluator.restart_trial(trial_path) - - -@click.command() -@click.option( - "--config", - "-c", - help="Path to config yaml file. Must be yaml or yml file.", - type=str, -) -@click.option( - "--qa_data_path", help="Path to QA dataset. Must be parquet file.", type=str -) -@click.option( - "--corpus_data_path", help="Path to corpus dataset. Must be parquet file.", type=str -) -def validate(config, qa_data_path, corpus_data_path): - if not config.endswith(".yaml") and not config.endswith(".yml"): - raise ValueError(f"Config file {config} is not a parquet file.") - if not os.path.exists(config): - raise ValueError(f"Config file {config} does not exist.") - validator = Validator(qa_data_path=qa_data_path, corpus_data_path=corpus_data_path) - validator.validate(config) - - -cli.add_command(evaluate, "evaluate") -cli.add_command(run_api, "run_api") -cli.add_command(run_web, "run_web") -cli.add_command(run_dashboard, "dashboard") -cli.add_command(extract_best_config, "extract_best_config") -cli.add_command(restart_evaluate, "restart_evaluate") -cli.add_command(validate, "validate") - -if __name__ == "__main__": - cli() diff --git a/autorag-workspace/autorag/dashboard.py b/autorag-workspace/autorag/dashboard.py deleted file mode 100644 index b55d2e9..0000000 --- a/autorag-workspace/autorag/dashboard.py +++ /dev/null @@ -1,199 +0,0 @@ -import ast -import logging -import os -from typing import Dict, List - -import matplotlib.pyplot as plt -import pandas as pd -import panel as pn -import seaborn as sns -import yaml -from bokeh.models import NumberFormatter, BooleanFormatter - -from autorag.utils.util import dict_to_markdown, dict_to_markdown_table - -pn.extension( - "terminal", - "tabulator", - "mathjax", - "ipywidgets", - console_output="disable", - sizing_mode="stretch_width", - css_files=[ - "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css" - ], -) -logger = logging.getLogger("AutoRAG") - - -def find_node_dir(trial_dir: str) -> List[str]: - trial_summary_df = pd.read_csv(os.path.join(trial_dir, "summary.csv")) - result_paths = [] - for idx, row in trial_summary_df.iterrows(): - node_line_name = row["node_line_name"] - node_type = row["node_type"] - result_paths.append(os.path.join(trial_dir, node_line_name, node_type)) - return result_paths - - -def get_metric_values(node_summary_df: pd.DataFrame) -> Dict: - non_metric_column_names = [ - "filename", - "module_name", - "module_params", - "execution_time", - "average_output_token", - "is_best", - ] - best_row = node_summary_df.loc[node_summary_df["is_best"]].drop( - columns=non_metric_column_names, errors="ignore" - ) - assert len(best_row) == 1, "The best module must be only one." - return best_row.iloc[0].to_dict() - - -def make_trial_summary_md(trial_dir): - markdown_text = f"""# Trial Result Summary -- Trial Directory : {trial_dir} - -""" - node_dirs = find_node_dir(trial_dir) - for node_dir in node_dirs: - node_summary_filepath = os.path.join(node_dir, "summary.csv") - node_type = os.path.basename(node_dir) - node_summary_df = pd.read_csv(node_summary_filepath) - best_row = node_summary_df.loc[node_summary_df["is_best"]].iloc[0] - metric_dict = get_metric_values(node_summary_df) - markdown_text += f"""--- - -## {node_type} best module - -### Module Name - -{best_row['module_name']} - -### Module Params - -{dict_to_markdown(ast.literal_eval(best_row['module_params']), level=3)} - -### Metric Values - -{dict_to_markdown_table(metric_dict, key_column_name='metric_name', value_column_name='metric_value')} - -""" - - return markdown_text - - -def node_view(node_dir: str): - non_metric_column_names = [ - "filename", - "module_name", - "module_params", - "execution_time", - "average_output_token", - "is_best", - ] - summary_df = pd.read_csv(os.path.join(node_dir, "summary.csv")) - bokeh_formatters = { - "float": NumberFormatter(format="0.000"), - "bool": BooleanFormatter(), - } - first_df = pd.read_parquet(os.path.join(node_dir, "0.parquet"), engine="pyarrow") - - each_module_df_widget = pn.widgets.Tabulator( - pd.DataFrame(columns=first_df.columns), - name="Module DataFrame", - formatters=bokeh_formatters, - pagination="local", - page_size=20, - widths=150, - ) - - def change_module_widget(event): - if event.column == "detail": - filename = summary_df["filename"].iloc[event.row] - filepath = os.path.join(node_dir, filename) - each_module_df = pd.read_parquet(filepath, engine="pyarrow") - each_module_df_widget.value = each_module_df - - df_widget = pn.widgets.Tabulator( - summary_df, - name="Summary DataFrame", - formatters=bokeh_formatters, - buttons={"detail": ''}, - widths=150, - ) - df_widget.on_click(change_module_widget) - - try: - fig, ax = plt.subplots(figsize=(10, 5)) - metric_df = summary_df.drop(columns=non_metric_column_names, errors="ignore") - sns.stripplot(data=metric_df, ax=ax) - strip_plot_pane = pn.pane.Matplotlib(fig, tight=True) - - fig2, ax2 = plt.subplots(figsize=(10, 5)) - sns.boxplot(data=metric_df, ax=ax2) - box_plot_pane = pn.pane.Matplotlib(fig2, tight=True) - plot_pane = pn.Row(strip_plot_pane, box_plot_pane) - - layout = pn.Column( - "## Summary distribution plot", - plot_pane, - "## Summary DataFrame", - df_widget, - "## Module Result DataFrame", - each_module_df_widget, - ) - except Exception as e: - logger.error(f"Skipping make boxplot and stripplot with error {e}") - layout = pn.Column("## Summary DataFrame", df_widget) - layout.servable() - return layout - - -CSS = """ -div.card-margin:nth-child(1) { - max-height: 300px; -} -div.card-margin:nth-child(2) { - max-height: 400px; -} -""" - - -def yaml_to_markdown(yaml_filepath): - markdown_content = "" - with open(yaml_filepath, "r", encoding="utf-8") as file: - try: - content = yaml.safe_load(file) - markdown_content += f"## {os.path.basename(yaml_filepath)}\n```yaml\n{yaml.safe_dump(content, allow_unicode=True)}\n```\n\n" - except yaml.YAMLError as exc: - print(f"Error in {yaml_filepath}: {exc}") - return markdown_content - - -def run(trial_dir: str, port: int = 7690): - trial_summary_md = make_trial_summary_md(trial_dir=trial_dir) - trial_summary_tab = pn.pane.Markdown(trial_summary_md, sizing_mode="stretch_width") - - node_views = [ - (str(os.path.basename(node_dir)), node_view(node_dir)) - for node_dir in find_node_dir(trial_dir) - ] - - yaml_file_markdown = yaml_to_markdown(os.path.join(trial_dir, "config.yaml")) - - yaml_file = pn.pane.Markdown(yaml_file_markdown, sizing_mode="stretch_width") - - tabs = pn.Tabs( - ("Summary", trial_summary_tab), - *node_views, - ("Used YAML file", yaml_file), - dynamic=True, - ) - - template = pn.template.FastListTemplate( - site="AutoRAG", title="Dashboard", main=[tabs], raw_css=[CSS] - ).servable() - template.show(port=port) diff --git a/autorag-workspace/dashboard.sh b/autorag-workspace/dashboard.sh deleted file mode 100644 index cbc2e58..0000000 --- a/autorag-workspace/dashboard.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -export BOKEH_ALLOW_WS_ORIGIN="localhost:7690,172.16.9.208:7690" - -python -m autorag.cli dashboard \ - --trial_dir ../projects/daesan-dangjin_01/benchmark/1 - -echo "๐Ÿ“Š AutoRAG ๋Œ€์‹œ๋ณด๋“œ http://localhost:7690/..." diff --git a/autorag-workspace/example/sample_config/chunk/chunk_full.yaml b/autorag-workspace/example/sample_config/chunk/chunk_full.yaml deleted file mode 100644 index 429daef..0000000 --- a/autorag-workspace/example/sample_config/chunk/chunk_full.yaml +++ /dev/null @@ -1,32 +0,0 @@ -modules: - - module_type: llama_index_chunk - chunk_method: [ Token, Sentence ] - chunk_size: [ 1024, 512 ] - chunk_overlap: 24 - add_file_name: en - - module_type: llama_index_chunk - chunk_method: [ SentenceWindow ] - window_size: 3 - add_file_name: en - - module_type: llama_index_chunk - chunk_method: [ Semantic_llama_index ] - embed_model: openai - buffer_size: 1 - breakpoint_percentile_threshold: 95 - add_file_name: en - - module_type: llama_index_chunk - chunk_method: [ SemanticDoubleMerging ] - add_file_name: en - - module_type: llama_index_chunk - chunk_method: [ SimpleFile ] - add_file_name: en - - module_type: langchain_chunk - chunk_method: sentencetransformerstoken - - module_type: langchain_chunk - chunk_method: recursivecharacter - separators: [ " ", "\n" ] - - module_type: langchain_chunk - chunk_method: character - separator: ". " - - module_type: langchain_chunk - chunk_method: Konlpy diff --git a/autorag-workspace/example/sample_config/chunk/chunk_ko.yaml b/autorag-workspace/example/sample_config/chunk/chunk_ko.yaml deleted file mode 100644 index 322a971..0000000 --- a/autorag-workspace/example/sample_config/chunk/chunk_ko.yaml +++ /dev/null @@ -1,19 +0,0 @@ -modules: - - module_type: llama_index_chunk - chunk_method: [ Token, Sentence ] - chunk_size: [ 1024, 512 ] - chunk_overlap: 24 - add_file_name: ko - - module_type: llama_index_chunk - chunk_method: [ SentenceWindow ] - sentence_splitter: kiwi - add_file_name: ko - - module_type: llama_index_chunk - chunk_method: [ Semantic_llama_index ] - embed_model: openai - add_file_name: ko - - module_type: llama_index_chunk - chunk_method: [ SimpleFile ] - add_file_name: ko - - module_type: langchain_chunk - chunk_method: KonlpyTextSplitter diff --git a/autorag-workspace/example/sample_config/chunk/simple_chunk.yaml b/autorag-workspace/example/sample_config/chunk/simple_chunk.yaml deleted file mode 100644 index 3f65aeb..0000000 --- a/autorag-workspace/example/sample_config/chunk/simple_chunk.yaml +++ /dev/null @@ -1,3 +0,0 @@ -modules: - - module_type: llama_index_chunk - chunk_method: Token diff --git a/autorag-workspace/example/sample_config/parse/all_files_full.yaml b/autorag-workspace/example/sample_config/parse/all_files_full.yaml deleted file mode 100644 index 9cdbf7e..0000000 --- a/autorag-workspace/example/sample_config/parse/all_files_full.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# You can use only one of the following modules at a time. -modules: - # Use Directory Parse - - module_type: langchain_parse - file_type: all_files - parse_method: directory - # Use Unstructured - - module_type: langchain_parse - file_type: all_files - parse_method: unstructured - # Use Upsatge Document Parse - - module_type: langchain_parse - file_type: all_files - parse_method: upstagedocumentparse - # Use Naver Clova OCR - - module_type: clova - file_type: all_files - table_detection: true - # Use Llama Parse - - module_type: llamaparse - file_type: all_files - result_type: markdown - language: ko - use_vendor_multimodal_model: true - vendor_multimodal_model_name: openai-gpt-4o-mini diff --git a/autorag-workspace/example/sample_config/parse/file_types_full.yaml b/autorag-workspace/example/sample_config/parse/file_types_full.yaml deleted file mode 100644 index 180842b..0000000 --- a/autorag-workspace/example/sample_config/parse/file_types_full.yaml +++ /dev/null @@ -1,26 +0,0 @@ -modules: - # PDF - - module_type: langchain_parse - file_type: pdf - parse_method: pdfminer - # CSV - - module_type: langchain_parse - file_type: csv - parse_method: csv - # JSON - - module_type: langchain_parse - file_type: json - parse_method: json - jq_schema: .content - # Markdown - - module_type: langchain_parse - file_type: md - parse_method: unstructuredmarkdown - # HTML - - module_type: langchain_parse - file_type: html - parse_method: bshtml - # XML - - module_type: langchain_parse - file_type: xml - parse_method: unstructuredxml diff --git a/autorag-workspace/example/sample_config/parse/parse_hybird.yaml b/autorag-workspace/example/sample_config/parse/parse_hybird.yaml deleted file mode 100644 index 1ea4c7f..0000000 --- a/autorag-workspace/example/sample_config/parse/parse_hybird.yaml +++ /dev/null @@ -1,12 +0,0 @@ -modules: - - module_type: table_hybrid_parse - file_type: pdf - text_parse_module: langchain_parse - text_params: - parse_method: pdfplumber - table_parse_module: llamaparse - table_params: - result_type: markdown - language: ko - use_vendor_multimodal_model: true - vendor_multimodal_model_name: openai-gpt-4o-mini diff --git a/autorag-workspace/example/sample_config/parse/parse_ko.yaml b/autorag-workspace/example/sample_config/parse/parse_ko.yaml deleted file mode 100644 index b216046..0000000 --- a/autorag-workspace/example/sample_config/parse/parse_ko.yaml +++ /dev/null @@ -1,11 +0,0 @@ -modules: - - module_type: llama_parse - file_type: all_files - result_type: markdown - language: ko - - module_type: clova - file_type: all_files - table_detection: true - - module_type: langchain_parse - file_type: all_files - parse_method: upstagedocumentparse diff --git a/autorag-workspace/example/sample_config/parse/parse_multimodal.yaml b/autorag-workspace/example/sample_config/parse/parse_multimodal.yaml deleted file mode 100644 index 9ced86e..0000000 --- a/autorag-workspace/example/sample_config/parse/parse_multimodal.yaml +++ /dev/null @@ -1,8 +0,0 @@ -modules: - - module_type: llamaparse - file_type: all_files - result_type: markdown - language: ko - use_vendor_multimodal_model: true - vendor_multimodal_model_name: openai-gpt-4o-mini - use_own_key: true diff --git a/autorag-workspace/example/sample_config/parse/parse_ocr.yaml b/autorag-workspace/example/sample_config/parse/parse_ocr.yaml deleted file mode 100644 index 4c0b757..0000000 --- a/autorag-workspace/example/sample_config/parse/parse_ocr.yaml +++ /dev/null @@ -1,10 +0,0 @@ -modules: - - module_type: langchain_parse - file_type: all_files - parse_method: upstagedocumentparse - - module_type: llama_parse - file_type: all_files - result_type: markdown - - module_type: clova - file_type: all_files - table_detection: true diff --git a/autorag-workspace/example/sample_config/parse/simple_parse.yaml b/autorag-workspace/example/sample_config/parse/simple_parse.yaml deleted file mode 100644 index 73ae58a..0000000 --- a/autorag-workspace/example/sample_config/parse/simple_parse.yaml +++ /dev/null @@ -1,4 +0,0 @@ -modules: - - module_type: langchain_parse - file_type: pdf - parse_method: pdfminer diff --git a/autorag-workspace/example/sample_config/rag/english/gpu/compact_local.yaml b/autorag-workspace/example/sample_config/rag/english/gpu/compact_local.yaml deleted file mode 100644 index 6f66932..0000000 --- a/autorag-workspace/example/sample_config/rag/english/gpu/compact_local.yaml +++ /dev/null @@ -1,50 +0,0 @@ -vectordb: - - name: mpnet_base_chroma - db_type: chroma - client_type: persistent - embedding_model: huggingface_all_mpnet_base_v2 - collection_name: huggingface_all_mpnet_base_v2 - path: ${PROJECT_DIR}/data/chroma -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - top_k: 20 - modules: - - module_type: bm25 - - module_type: vectordb - vectordb: mpnet_base_chroma - - module_type: hybrid_rrf - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - - node_type: passage_reranker - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - top_k: 3 - modules: - - module_type: pass_reranker - - module_type: tart - - module_type: upr -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: [bleu, meteor, rouge, sem_score] - generator_modules: - - module_type: vllm - llm: mistralai/Mistral-7B-Instruct-v0.2 - modules: - - module_type: fstring - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - node_type: generator - strategy: - metrics: [bleu, meteor, rouge, sem_score] - modules: - - module_type: vllm - llm: mistralai/Mistral-7B-Instruct-v0.2 - temperature: [0.1, 0.5, 1.1] diff --git a/autorag-workspace/example/sample_config/rag/english/gpu/compact_openai.yaml b/autorag-workspace/example/sample_config/rag/english/gpu/compact_openai.yaml deleted file mode 100644 index 26f2309..0000000 --- a/autorag-workspace/example/sample_config/rag/english/gpu/compact_openai.yaml +++ /dev/null @@ -1,101 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 10 - top_k: 5 - modules: - - module_type: pass_reranker - - module_type: tart - - module_type: monot5 - - module_type: upr - - module_type: rankgpt - - module_type: colbert_reranker - - module_type: sentence_transformer_reranker - - module_type: flag_embedding_reranker - - module_type: flag_embedding_llm_reranker - - module_type: time_reranker - - module_type: openvino_reranker - - module_type: flashrank_reranker - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - module_type: long_context_reorder - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/english/gpu/full.yaml b/autorag-workspace/example/sample_config/rag/english/gpu/full.yaml deleted file mode 100644 index dd1da67..0000000 --- a/autorag-workspace/example/sample_config/rag/english/gpu/full.yaml +++ /dev/null @@ -1,154 +0,0 @@ -vectordb: - - name: chroma_bge_m3 - db_type: chroma - client_type: persistent - embedding_model: huggingface_bge_m3 - collection_name: openai - path: ${PROJECT_DIR}/resources/chroma -node_lines: -- node_line_name: pre_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: query_expansion - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 10 - retrieval_modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: chroma_bge_m3 - modules: - - module_type: pass_query_expansion - - module_type: query_decompose - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] - - module_type: hyde - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] - max_token: 64 - - module_type: multi_query_expansion - generator_module_type: llama_index_llm - llm: openai - temperature: [ 0.2, 1.0 ] -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - - module_type: vectordb - vectordb: chroma_bge_m3 - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 5 - modules: - - module_type: pass_reranker - - module_type: tart - - module_type: monot5 - - module_type: upr - - module_type: rankgpt - - module_type: colbert_reranker - - module_type: sentence_transformer_reranker - - module_type: flag_embedding_reranker - - module_type: flag_embedding_llm_reranker - - module_type: time_reranker - - module_type: openvino_reranker - - module_type: flashrank_reranker - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: recency_filter - threshold_datetime: 2015-01-01 3:45:07 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - - module_type: refine - llm: openai - model: gpt-4o-mini - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - - metric_name: g_eval - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?"] - - module_type: long_context_reorder - prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ] - - module_type: window_replacement - prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ] - - node_type: generator - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - - metric_name: g_eval # LLM Judge Metric. Default Model: gpt-4-turbo - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0, 1.5] - - module_type: openai_llm - llm: gpt-4o-mini - temperature: 0.8 diff --git a/autorag-workspace/example/sample_config/rag/english/gpu/half.yaml b/autorag-workspace/example/sample_config/rag/english/gpu/half.yaml deleted file mode 100644 index 9f25628..0000000 --- a/autorag-workspace/example/sample_config/rag/english/gpu/half.yaml +++ /dev/null @@ -1,121 +0,0 @@ -vectordb: - - name: chroma_bge_m3 - db_type: chroma - client_type: persistent - embedding_model: huggingface_bge_m3 - collection_name: openai - path: ${PROJECT_DIR}/resources/chroma -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: chroma_bge_m3 - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 10 - top_k: 5 - modules: - - module_type: pass_reranker - - module_type: tart - - module_type: monot5 - - module_type: upr - - module_type: rankgpt - - module_type: colbert_reranker - - module_type: sentence_transformer_reranker - - module_type: flag_embedding_reranker - - module_type: flag_embedding_llm_reranker - - module_type: time_reranker - - module_type: openvino_reranker - - module_type: flashrank_reranker - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - - module_type: refine - llm: openai - model: gpt-4o-mini - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - module_type: long_context_reorder - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/english/gpu_api/compact.yaml b/autorag-workspace/example/sample_config/rag/english/gpu_api/compact.yaml deleted file mode 100644 index fc286b9..0000000 --- a/autorag-workspace/example/sample_config/rag/english/gpu_api/compact.yaml +++ /dev/null @@ -1,105 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 10 - top_k: 5 - modules: - - module_type: pass_reranker - - module_type: tart - - module_type: monot5 - - module_type: upr - - module_type: cohere_reranker - - module_type: rankgpt - - module_type: jina_reranker - - module_type: colbert_reranker - - module_type: sentence_transformer_reranker - - module_type: flag_embedding_reranker - - module_type: flag_embedding_llm_reranker - - module_type: time_reranker - - module_type: openvino_reranker - - module_type: voyageai_reranker - - module_type: mixedbreadai_reranker - - module_type: flashrank_reranker - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - module_type: long_context_reorder - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/english/gpu_api/full.yaml b/autorag-workspace/example/sample_config/rag/english/gpu_api/full.yaml deleted file mode 100644 index a7435af..0000000 --- a/autorag-workspace/example/sample_config/rag/english/gpu_api/full.yaml +++ /dev/null @@ -1,151 +0,0 @@ -node_lines: -- node_line_name: pre_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: query_expansion - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 10 - retrieval_modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - modules: - - module_type: pass_query_expansion - - module_type: query_decompose - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] - - module_type: hyde - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] - max_token: 64 - - module_type: multi_query_expansion - generator_module_type: llama_index_llm - llm: openai - temperature: [ 0.2, 1.0 ] -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 5 - modules: - - module_type: pass_reranker - - module_type: tart - - module_type: monot5 - - module_type: upr - - module_type: cohere_reranker - - module_type: rankgpt - - module_type: jina_reranker - - module_type: colbert_reranker - - module_type: sentence_transformer_reranker - - module_type: flag_embedding_reranker - - module_type: flag_embedding_llm_reranker - - module_type: time_reranker - - module_type: openvino_reranker - - module_type: voyageai_reranker - - module_type: mixedbreadai_reranker - - module_type: flashrank_reranker - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: recency_filter - threshold_datetime: 2015-01-01 3:45:07 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - - module_type: refine - llm: openai - model: gpt-4o-mini - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - - metric_name: g_eval - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?"] - - module_type: long_context_reorder - prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ] - - module_type: window_replacement - prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ] - - node_type: generator - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - - metric_name: g_eval # LLM Judge Metric. Default Model: gpt-4-turbo - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0, 1.5] - - module_type: openai_llm - llm: gpt-4o-mini - temperature: 0.8 diff --git a/autorag-workspace/example/sample_config/rag/english/gpu_api/half.yaml b/autorag-workspace/example/sample_config/rag/english/gpu_api/half.yaml deleted file mode 100644 index 75e1a0c..0000000 --- a/autorag-workspace/example/sample_config/rag/english/gpu_api/half.yaml +++ /dev/null @@ -1,118 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 10 - top_k: 5 - modules: - - module_type: pass_reranker - - module_type: tart - - module_type: monot5 - - module_type: upr - - module_type: cohere_reranker - - module_type: rankgpt - - module_type: jina_reranker - - module_type: colbert_reranker - - module_type: sentence_transformer_reranker - - module_type: flag_embedding_reranker - - module_type: flag_embedding_llm_reranker - - module_type: time_reranker - - module_type: openvino_reranker - - module_type: voyageai_reranker - - module_type: mixedbreadai_reranker - - module_type: flashrank_reranker - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - - module_type: refine - llm: openai - model: gpt-4o-mini - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - module_type: long_context_reorder - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/english/non_gpu/compact.yaml b/autorag-workspace/example/sample_config/rag/english/non_gpu/compact.yaml deleted file mode 100644 index 63ad754..0000000 --- a/autorag-workspace/example/sample_config/rag/english/non_gpu/compact.yaml +++ /dev/null @@ -1,83 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - module_type: long_context_reorder - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/english/non_gpu/full.yaml b/autorag-workspace/example/sample_config/rag/english/non_gpu/full.yaml deleted file mode 100644 index ba18f58..0000000 --- a/autorag-workspace/example/sample_config/rag/english/non_gpu/full.yaml +++ /dev/null @@ -1,129 +0,0 @@ -node_lines: -- node_line_name: pre_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: query_expansion - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 10 - retrieval_modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - modules: - - module_type: pass_query_expansion - - module_type: query_decompose - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] - - module_type: hyde - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] - max_token: 64 - - module_type: multi_query_expansion - generator_module_type: llama_index_llm - llm: openai - temperature: [ 0.2, 1.0 ] -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - - module_type: vectordb - embedding_model: openai - embedding_batch: 256 - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: recency_filter - threshold_datetime: 2015-01-01 3:45:07 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - - module_type: refine - llm: openai - model: gpt-4o-mini -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - - metric_name: g_eval - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?"] - - module_type: long_context_reorder - prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ] - - module_type: window_replacement - prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ] - - node_type: generator - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - - metric_name: g_eval # LLM Judge Metric. Default Model: gpt-4-turbo - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0, 1.5] - - module_type: openai_llm - llm: gpt-4o-mini - temperature: 0.8 diff --git a/autorag-workspace/example/sample_config/rag/english/non_gpu/half.yaml b/autorag-workspace/example/sample_config/rag/english/non_gpu/half.yaml deleted file mode 100644 index 19aaf7a..0000000 --- a/autorag-workspace/example/sample_config/rag/english/non_gpu/half.yaml +++ /dev/null @@ -1,95 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - - module_type: refine - llm: openai - model: gpt-4o-mini -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - module_type: long_context_reorder - prompt: - - "Answer to given questions with the following passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - "There is a passages related to user question. Please response carefully to the following question. \n\n Passage: {retrieved_contents} \n\n Question: {query} \n\n Answer the question. Think step by step." # Zero-shot CoT prompt - - "{retrieved_contents} \n\n Read the passage carefully, and answer this question. \n\n Question: {query} \n\n Answer the question. Be concise." # concise prompt - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_bedrock.yaml b/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_bedrock.yaml deleted file mode 100644 index e59fb63..0000000 --- a/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_bedrock.yaml +++ /dev/null @@ -1,33 +0,0 @@ -vectordb: - - name: mpnet_base_chroma - db_type: chroma - client_type: persistent - embedding_model: huggingface_all_mpnet_base_v2 - collection_name: huggingface_all_mpnet_base_v2 - path: ${PROJECT_DIR}/data/chroma -node_lines: - - node_line_name: retrieve_node_line - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - top_k: 3 - modules: - - module_type: vectordb - vectordb: mpnet_base_chroma - - node_line_name: post_retrieve_node_line - nodes: - - node_type: prompt_maker - strategy: - metrics: [ meteor, rouge, bert_score ] - modules: - - module_type: fstring - prompt: "Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : " - - node_type: generator - strategy: - metrics: [ bleu, rouge, bert_score ] - modules: - - module_type: llama_index_llm - llm: bedrock - model: amazon.titan-text-express-v1 - profile_name: your_profile_name # Plz replace this with your profile name diff --git a/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_local.yaml b/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_local.yaml deleted file mode 100644 index f366b66..0000000 --- a/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_local.yaml +++ /dev/null @@ -1,31 +0,0 @@ -vectordb: - - name: baai_chroma - db_type: chroma - client_type: persistent - embedding_model: huggingface_baai_bge_small - collection_name: huggingface_baai_bge_small - path: ${PROJECT_DIR}/data/chroma -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - top_k: 3 - modules: - - module_type: vectordb - vectordb: baai_chroma -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: [ meteor, rouge, bert_score ] - modules: - - module_type: fstring - prompt: "Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : " - - node_type: generator - strategy: - metrics: [ bleu, rouge, bert_score ] - modules: - - module_type: vllm - llm: mistralai/Mistral-7B-Instruct-v0.2 diff --git a/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_ollama.yaml b/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_ollama.yaml deleted file mode 100644 index 32eb8ca..0000000 --- a/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_ollama.yaml +++ /dev/null @@ -1,34 +0,0 @@ -vectordb: - - name: mpnet_base_chroma - db_type: chroma - client_type: persistent - embedding_model: huggingface_all_mpnet_base_v2 - collection_name: huggingface_all_mpnet_base_v2 - path: ${PROJECT_DIR}/data/chroma -node_lines: - - node_line_name: retrieve_node_line - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - top_k: 3 - modules: - - module_type: vectordb - vectordb: mpnet_base_chroma - - node_line_name: post_retrieve_node_line - nodes: - - node_type: prompt_maker - strategy: - metrics: [ meteor, rouge, bert_score ] - modules: - - module_type: fstring - prompt: "Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : " - - node_type: generator - strategy: - metrics: [ bleu, rouge, bert_score ] - modules: - - module_type: llama_index_llm - llm: ollama - model: llama3 - batch: 1 - request_timeout: 100 # You can increase this value if your model is big (slow) diff --git a/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_openai.yaml b/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_openai.yaml deleted file mode 100644 index 923c490..0000000 --- a/autorag-workspace/example/sample_config/rag/english/non_gpu/simple_openai.yaml +++ /dev/null @@ -1,25 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - top_k: 3 - modules: - - module_type: vectordb - vectordb: default -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: [bleu, meteor, rouge] - modules: - - module_type: fstring - prompt: "Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : " - - node_type: generator - strategy: - metrics: [bleu, rouge] - modules: - - module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] diff --git a/autorag-workspace/example/sample_config/rag/extracted_sample.yaml b/autorag-workspace/example/sample_config/rag/extracted_sample.yaml deleted file mode 100644 index 01f48ed..0000000 --- a/autorag-workspace/example/sample_config/rag/extracted_sample.yaml +++ /dev/null @@ -1,47 +0,0 @@ -vectordb: - - name: default - db_type: chroma - client_type: persistent - embedding_model: openai - collection_name: openai - path: ${PROJECT_DIR}/data/chroma -node_lines: -- node_line_name: retrieve_node_line - nodes: - - node_type: retrieval - modules: - - module_type: vectordb - vectordb: default - top_k: 3 - strategy: - metrics: - - retrieval_f1 - - retrieval_recall - - retrieval_precision -- node_line_name: post_retrieve_node_line - nodes: - - node_type: prompt_maker - modules: - - module_type: fstring - prompt: "Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : " - strategy: - generator_modules: - - batch: 2 - llm: openai - module_type: llama_index_llm - metrics: - - bleu - - meteor - - rouge - - node_type: generator - modules: - - batch: 2 - llm: openai - model: gpt-3.5-turbo-16k - module_type: llama_index_llm - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - embedding_model: openai - metric_name: sem_score diff --git a/autorag-workspace/example/sample_config/rag/full.yaml b/autorag-workspace/example/sample_config/rag/full.yaml deleted file mode 100644 index 92fd748..0000000 --- a/autorag-workspace/example/sample_config/rag/full.yaml +++ /dev/null @@ -1,159 +0,0 @@ -vectordb: - - name: chroma_large - db_type: chroma - client_type: persistent - embedding_model: openai_embed_3_large - collection_name: openai_embed_3_large - path: ${PROJECT_DIR}/resources/chroma -node_lines: -- node_line_name: pre_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: query_expansion - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 10 - retrieval_modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, ko_kiwi, space, gpt2, ko_okt, ko_kkma, sudachipy ] - - module_type: vectordb - vectordb: chroma_large - modules: - - module_type: pass_query_expansion - - module_type: query_decompose - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-3.5-turbo-16k, gpt-3.5-turbo-1106 ] - - module_type: hyde - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-3.5-turbo-16k ] - max_token: 64 - - module_type: multi_query_expansion - generator_module_type: llama_index_llm - llm: openai - temperature: [ 0.2, 1.0 ] -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - - module_type: vectordb - vectordb: chroma_large - embedding_batch: 256 - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 5 - modules: - - module_type: pass_reranker - - module_type: tart - - module_type: monot5 - - module_type: upr - - module_type: cohere_reranker - - module_type: rankgpt - - module_type: jina_reranker - - module_type: colbert_reranker - - module_type: sentence_transformer_reranker - - module_type: flag_embedding_reranker - - module_type: flag_embedding_llm_reranker - - module_type: time_reranker - - module_type: openvino_reranker - - module_type: voyageai_reranker - - module_type: mixedbreadai_reranker - - module_type: flashrank_reranker - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: recency_filter - threshold_datetime: 2015-01-01 3:45:07 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-3.5-turbo-16k - - module_type: refine - llm: openai - model: gpt-3.5-turbo-16k - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - - metric_name: g_eval - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-3.5-turbo-16k, gpt-3.5-turbo-1106] - modules: - - module_type: fstring - prompt: ["Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?"] - - module_type: long_context_reorder - prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ] - - module_type: window_replacement - prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}", - "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ] - - node_type: generator - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - - metric_name: g_eval - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-3.5-turbo-16k, gpt-3.5-turbo-1106] - temperature: [0.5, 1.0, 1.5] - - module_type: openai_llm - llm: gpt-3.5-turbo - temperature: 0.8 diff --git a/autorag-workspace/example/sample_config/rag/korean/gpu/compact_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/gpu/compact_korean.yaml deleted file mode 100644 index 3bcc84b..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/gpu/compact_korean.yaml +++ /dev/null @@ -1,93 +0,0 @@ -vectordb: - - name: chroma_bge_m3 - db_type: chroma - client_type: persistent - embedding_model: huggingface_bge_m3 - collection_name: openai - path: ${PROJECT_DIR}/resources/chroma -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] - - module_type: vectordb - vectordb: chroma_bge_m3 - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - modules: - - module_type: koreranker - - module_type: flag_embedding_llm_reranker # Requires enough GPU resources - - module_type: pass_reranker - strategy: - metrics: [ retrieval_recall, retrieval_precision, retrieval_map ] - top_k: 3 - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/gpu/full_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/gpu/full_korean.yaml deleted file mode 100644 index 70bc4b2..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/gpu/full_korean.yaml +++ /dev/null @@ -1,157 +0,0 @@ -vectordb: - - name: chroma_bge_m3 - db_type: chroma - client_type: persistent - embedding_model: huggingface_bge_m3 - collection_name: openai - path: ${PROJECT_DIR}/resources/chroma -node_lines: -- node_line_name: pre_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: query_expansion - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 10 - retrieval_modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: chroma_bge_m3 - modules: - - module_type: pass_query_expansion - - module_type: hyde - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] - max_token: 64 - prompt: "์งˆ๋ฌธ์— ๋‹ตํ•˜๊ธฐ ์œ„ํ•œ ๋‹จ๋ฝ์„ ์ž‘์„ฑํ•ด ์ฃผ์„ธ์š”." - - module_type: multi_query_expansion - generator_module_type: llama_index_llm - llm: openai - temperature: [ 0.2, 1.0 ] - prompt: | - ๋‹น์‹ ์€ ์ธ๊ณต์ง€๋Šฅ ์–ธ์–ด ๋ชจ๋ธ ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. - ์ฃผ์–ด์ง„ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์„ ์ด์šฉํ•ด ์„ธ ๊ฐ€์ง€ ๋ฒ„์ „์˜ ์ƒˆ ์งˆ๋ฌธ์„ ์ƒ์„ฑํ•˜์—ฌ ๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์—์„œ ๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•˜๋Š” ๊ฒƒ์ด ๊ณผ์ œ์ž…๋‹ˆ๋‹ค. - ์ฃผ์–ด์ง„ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ค์–‘ํ•œ ๊ด€์ ์„ ์ƒ์„ฑํ•จ์œผ๋กœ์จ ์‚ฌ์šฉ์ž๊ฐ€ ๊ฑฐ๋ฆฌ ๊ธฐ๋ฐ˜ ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰์˜ ํ•œ๊ณ„๋ฅผ ๊ทน๋ณตํ•  ์ˆ˜ ์žˆ๋„๋ก ๋•๋Š” ๊ฒƒ์ด ๋ชฉํ‘œ์ž…๋‹ˆ๋‹ค. - ๋‹ค์Œ๊ณผ ๊ฐ™์€ ๋Œ€์ฒด ์งˆ๋ฌธ์„ ์ค„ ๋ฐ”๊ฟˆ์œผ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ์ œ๊ณตํ•˜์‹ญ์‹œ์˜ค. - ์›๋ž˜ ์งˆ๋ฌธ: {query} -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] - - module_type: vectordb - vectordb: chroma_bge_m3 - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - modules: - - module_type: koreranker - - module_type: flag_embedding_llm_reranker # Requires enough GPU resources - - module_type: pass_reranker - strategy: - metrics: [ retrieval_recall, retrieval_precision, retrieval_map ] - top_k: 3 - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - prompt: | - ์—ฌ๋Ÿฌ ๋ฌธ๋งฅ ์ •๋ณด๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.\n - ---------------------\n - {context_str}\n - ---------------------\n - ์‚ฌ์ „ ์ง€์‹์ด ์•„๋‹Œ ์—ฌ๋Ÿฌ ์ •๋ณด๊ฐ€ ์ฃผ์–ด์กŒ์Šต๋‹ˆ๋‹ค, - ์งˆ๋ฌธ์— ๋Œ€๋‹ตํ•˜์„ธ์š”.\n - ์งˆ๋ฌธ: {query_str}\n - ๋‹ต๋ณ€: - - module_type: refine - llm: openai - model: gpt-4o-mini - prompt: | - ์›๋ž˜ ์งˆ๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {query_str} - ๊ธฐ์กด ๋‹ต๋ณ€์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {existing_answer} - ์•„๋ž˜์—์„œ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ๋Š” ๊ธฐํšŒ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. - (ํ•„์š”ํ•œ ๊ฒฝ์šฐ์—๋งŒ) ์•„๋ž˜์— ๋ช‡ ๊ฐ€์ง€ ๋งฅ๋ฝ์„ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ------------ - {context_msg} - ------------ - ์ƒˆ๋กœ์šด ๋ฌธ๋งฅ์ด ์ฃผ์–ด์ง€๋ฉด ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ˆ˜์ •ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ •์ œํ•ฉ๋‹ˆ๋‹ค. - ๋งฅ๋ฝ์ด ์“ธ๋ชจ ์—†๋‹ค๋ฉด, ๊ธฐ์กด ๋‹ต๋ณ€์„ ๊ทธ๋Œ€๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”. - ์ •์ œ๋œ ๋‹ต๋ณ€: - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - - metric_name: g_eval # LLM Judge Metric. Default Model: gpt-4-turbo - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/gpu/half_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/gpu/half_korean.yaml deleted file mode 100644 index a20c356..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/gpu/half_korean.yaml +++ /dev/null @@ -1,126 +0,0 @@ -vectordb: - - name: chroma_bge_m3 - db_type: chroma - client_type: persistent - embedding_model: huggingface_bge_m3 - collection_name: openai - path: ${PROJECT_DIR}/resources/chroma -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] - - module_type: vectordb - vectordb: chroma_bge_m3 - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - modules: - - module_type: koreranker - - module_type: flag_embedding_llm_reranker # Requires enough GPU resources - - module_type: pass_reranker - strategy: - metrics: [ retrieval_recall, retrieval_precision, retrieval_map ] - top_k: 3 - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - prompt: | - ์—ฌ๋Ÿฌ ๋ฌธ๋งฅ ์ •๋ณด๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.\n - ---------------------\n - {context_str}\n - ---------------------\n - ์‚ฌ์ „ ์ง€์‹์ด ์•„๋‹Œ ์—ฌ๋Ÿฌ ์ •๋ณด๊ฐ€ ์ฃผ์–ด์กŒ์Šต๋‹ˆ๋‹ค, - ์งˆ๋ฌธ์— ๋Œ€๋‹ตํ•˜์„ธ์š”.\n - ์งˆ๋ฌธ: {query_str}\n - ๋‹ต๋ณ€: - - module_type: refine - llm: openai - model: gpt-4o-mini - prompt: | - ์›๋ž˜ ์งˆ๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {query_str} - ๊ธฐ์กด ๋‹ต๋ณ€์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {existing_answer} - ์•„๋ž˜์—์„œ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ๋Š” ๊ธฐํšŒ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. - (ํ•„์š”ํ•œ ๊ฒฝ์šฐ์—๋งŒ) ์•„๋ž˜์— ๋ช‡ ๊ฐ€์ง€ ๋งฅ๋ฝ์„ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ------------ - {context_msg} - ------------ - ์ƒˆ๋กœ์šด ๋ฌธ๋งฅ์ด ์ฃผ์–ด์ง€๋ฉด ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ˆ˜์ •ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ •์ œํ•ฉ๋‹ˆ๋‹ค. - ๋งฅ๋ฝ์ด ์“ธ๋ชจ ์—†๋‹ค๋ฉด, ๊ธฐ์กด ๋‹ต๋ณ€์„ ๊ทธ๋Œ€๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”. - ์ •์ œ๋œ ๋‹ต๋ณ€: - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/gpu_api/compact_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/gpu_api/compact_korean.yaml deleted file mode 100644 index c60094d..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/gpu_api/compact_korean.yaml +++ /dev/null @@ -1,87 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - modules: - - module_type: koreranker - - module_type: flag_embedding_llm_reranker # Requires enough GPU resources - - module_type: cohere_reranker # Set Environment Variable: COHERE_API_KEY - - module_type: pass_reranker - strategy: - metrics: [ retrieval_recall, retrieval_precision, retrieval_map ] - top_k: 3 - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/gpu_api/full_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/gpu_api/full_korean.yaml deleted file mode 100644 index 5fe9f68..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/gpu_api/full_korean.yaml +++ /dev/null @@ -1,151 +0,0 @@ -node_lines: -- node_line_name: pre_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: query_expansion - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 10 - retrieval_modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - modules: - - module_type: pass_query_expansion - - module_type: hyde - generator_module_type: llama_index_llm - llm: openai # - model: [ gpt-4o-mini ] # - max_token: 64 - prompt: "์งˆ๋ฌธ์— ๋‹ตํ•˜๊ธฐ ์œ„ํ•œ ๋‹จ๋ฝ์„ ์ž‘์„ฑํ•ด ์ฃผ์„ธ์š”." - - module_type: multi_query_expansion - generator_module_type: llama_index_llm - llm: openai - temperature: [ 0.2, 1.0 ] - prompt: | - ๋‹น์‹ ์€ ์ธ๊ณต์ง€๋Šฅ ์–ธ์–ด ๋ชจ๋ธ ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. - ์ฃผ์–ด์ง„ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์„ ์ด์šฉํ•ด ์„ธ ๊ฐ€์ง€ ๋ฒ„์ „์˜ ์ƒˆ ์งˆ๋ฌธ์„ ์ƒ์„ฑํ•˜์—ฌ ๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์—์„œ ๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•˜๋Š” ๊ฒƒ์ด ๊ณผ์ œ์ž…๋‹ˆ๋‹ค. - ์ฃผ์–ด์ง„ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ค์–‘ํ•œ ๊ด€์ ์„ ์ƒ์„ฑํ•จ์œผ๋กœ์จ ์‚ฌ์šฉ์ž๊ฐ€ ๊ฑฐ๋ฆฌ ๊ธฐ๋ฐ˜ ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰์˜ ํ•œ๊ณ„๋ฅผ ๊ทน๋ณตํ•  ์ˆ˜ ์žˆ๋„๋ก ๋•๋Š” ๊ฒƒ์ด ๋ชฉํ‘œ์ž…๋‹ˆ๋‹ค. - ๋‹ค์Œ๊ณผ ๊ฐ™์€ ๋Œ€์ฒด ์งˆ๋ฌธ์„ ์ค„ ๋ฐ”๊ฟˆ์œผ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ์ œ๊ณตํ•˜์‹ญ์‹œ์˜ค. - ์›๋ž˜ ์งˆ๋ฌธ: {query} -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] # ko_kiwi, ko_okt - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - modules: - - module_type: koreranker - - module_type: flag_embedding_llm_reranker # Requires enough GPU resources - - module_type: cohere_reranker # Set Environment Variable: COHERE_API_KEY - - module_type: pass_reranker - strategy: - metrics: [ retrieval_recall, retrieval_precision, retrieval_map ] - top_k: 3 - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - prompt: | - ์—ฌ๋Ÿฌ ๋ฌธ๋งฅ ์ •๋ณด๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.\n - ---------------------\n - {context_str}\n - ---------------------\n - ์‚ฌ์ „ ์ง€์‹์ด ์•„๋‹Œ ์—ฌ๋Ÿฌ ์ •๋ณด๊ฐ€ ์ฃผ์–ด์กŒ์Šต๋‹ˆ๋‹ค, - ์งˆ๋ฌธ์— ๋Œ€๋‹ตํ•˜์„ธ์š”.\n - ์งˆ๋ฌธ: {query_str}\n - ๋‹ต๋ณ€: - - module_type: refine - llm: openai - model: gpt-4o-mini - prompt: | - ์›๋ž˜ ์งˆ๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {query_str} - ๊ธฐ์กด ๋‹ต๋ณ€์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {existing_answer} - ์•„๋ž˜์—์„œ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ๋Š” ๊ธฐํšŒ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. - (ํ•„์š”ํ•œ ๊ฒฝ์šฐ์—๋งŒ) ์•„๋ž˜์— ๋ช‡ ๊ฐ€์ง€ ๋งฅ๋ฝ์„ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ------------ - {context_msg} - ------------ - ์ƒˆ๋กœ์šด ๋ฌธ๋งฅ์ด ์ฃผ์–ด์ง€๋ฉด ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ˆ˜์ •ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ •์ œํ•ฉ๋‹ˆ๋‹ค. - ๋งฅ๋ฝ์ด ์“ธ๋ชจ ์—†๋‹ค๋ฉด, ๊ธฐ์กด ๋‹ต๋ณ€์„ ๊ทธ๋Œ€๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”. - ์ •์ œ๋œ ๋‹ต๋ณ€: - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - - metric_name: g_eval # LLM Judge Metric. Default Model: gpt-4-turbo - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/gpu_api/half_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/gpu_api/half_korean.yaml deleted file mode 100644 index 965e9eb..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/gpu_api/half_korean.yaml +++ /dev/null @@ -1,120 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_reranker - modules: - - module_type: koreranker - - module_type: flag_embedding_llm_reranker # Requires enough GPU resources - - module_type: cohere_reranker # Set Environment Variable: COHERE_API_KEY - - module_type: pass_reranker - strategy: - metrics: [ retrieval_recall, retrieval_precision, retrieval_map ] - top_k: 3 - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - prompt: | - ์—ฌ๋Ÿฌ ๋ฌธ๋งฅ ์ •๋ณด๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.\n - ---------------------\n - {context_str}\n - ---------------------\n - ์‚ฌ์ „ ์ง€์‹์ด ์•„๋‹Œ ์—ฌ๋Ÿฌ ์ •๋ณด๊ฐ€ ์ฃผ์–ด์กŒ์Šต๋‹ˆ๋‹ค, - ์งˆ๋ฌธ์— ๋Œ€๋‹ตํ•˜์„ธ์š”.\n - ์งˆ๋ฌธ: {query_str}\n - ๋‹ต๋ณ€: - - module_type: refine - llm: openai - model: gpt-4o-mini - prompt: | - ์›๋ž˜ ์งˆ๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {query_str} - ๊ธฐ์กด ๋‹ต๋ณ€์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {existing_answer} - ์•„๋ž˜์—์„œ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ๋Š” ๊ธฐํšŒ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. - (ํ•„์š”ํ•œ ๊ฒฝ์šฐ์—๋งŒ) ์•„๋ž˜์— ๋ช‡ ๊ฐ€์ง€ ๋งฅ๋ฝ์„ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ------------ - {context_msg} - ------------ - ์ƒˆ๋กœ์šด ๋ฌธ๋งฅ์ด ์ฃผ์–ด์ง€๋ฉด ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ˆ˜์ •ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ •์ œํ•ฉ๋‹ˆ๋‹ค. - ๋งฅ๋ฝ์ด ์“ธ๋ชจ ์—†๋‹ค๋ฉด, ๊ธฐ์กด ๋‹ต๋ณ€์„ ๊ทธ๋Œ€๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”. - ์ •์ œ๋œ ๋‹ต๋ณ€: - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/non_gpu/compact_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/non_gpu/compact_korean.yaml deleted file mode 100644 index e10bba6..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/non_gpu/compact_korean.yaml +++ /dev/null @@ -1,78 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/non_gpu/full_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/non_gpu/full_korean.yaml deleted file mode 100644 index e6b6a65..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/non_gpu/full_korean.yaml +++ /dev/null @@ -1,142 +0,0 @@ -node_lines: -- node_line_name: pre_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: query_expansion - strategy: - metrics: [retrieval_f1, retrieval_recall, retrieval_precision] - speed_threshold: 10 - top_k: 10 - retrieval_modules: - - module_type: bm25 - bm25_tokenizer: [ porter_stemmer, space, gpt2 ] - - module_type: vectordb - vectordb: default - modules: - - module_type: pass_query_expansion - - module_type: hyde - generator_module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] - max_token: 64 - prompt: "์งˆ๋ฌธ์— ๋‹ตํ•˜๊ธฐ ์œ„ํ•œ ๋‹จ๋ฝ์„ ์ž‘์„ฑํ•ด ์ฃผ์„ธ์š”." - - module_type: multi_query_expansion - generator_module_type: llama_index_llm - llm: openai - temperature: [ 0.2, 1.0 ] - prompt: | - ๋‹น์‹ ์€ ์ธ๊ณต์ง€๋Šฅ ์–ธ์–ด ๋ชจ๋ธ ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. - ์ฃผ์–ด์ง„ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์„ ์ด์šฉํ•ด ์„ธ ๊ฐ€์ง€ ๋ฒ„์ „์˜ ์ƒˆ ์งˆ๋ฌธ์„ ์ƒ์„ฑํ•˜์—ฌ ๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์—์„œ ๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•˜๋Š” ๊ฒƒ์ด ๊ณผ์ œ์ž…๋‹ˆ๋‹ค. - ์ฃผ์–ด์ง„ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ค์–‘ํ•œ ๊ด€์ ์„ ์ƒ์„ฑํ•จ์œผ๋กœ์จ ์‚ฌ์šฉ์ž๊ฐ€ ๊ฑฐ๋ฆฌ ๊ธฐ๋ฐ˜ ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰์˜ ํ•œ๊ณ„๋ฅผ ๊ทน๋ณตํ•  ์ˆ˜ ์žˆ๋„๋ก ๋•๋Š” ๊ฒƒ์ด ๋ชฉํ‘œ์ž…๋‹ˆ๋‹ค. - ๋‹ค์Œ๊ณผ ๊ฐ™์€ ๋Œ€์ฒด ์งˆ๋ฌธ์„ ์ค„ ๋ฐ”๊ฟˆ์œผ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ์ œ๊ณตํ•˜์‹ญ์‹œ์˜ค. - ์›๋ž˜ ์งˆ๋ฌธ: {query} -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - prompt: | - ์—ฌ๋Ÿฌ ๋ฌธ๋งฅ ์ •๋ณด๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.\n - ---------------------\n - {context_str}\n - ---------------------\n - ์‚ฌ์ „ ์ง€์‹์ด ์•„๋‹Œ ์—ฌ๋Ÿฌ ์ •๋ณด๊ฐ€ ์ฃผ์–ด์กŒ์Šต๋‹ˆ๋‹ค, - ์งˆ๋ฌธ์— ๋Œ€๋‹ตํ•˜์„ธ์š”.\n - ์งˆ๋ฌธ: {query_str}\n - ๋‹ต๋ณ€: - - module_type: refine - llm: openai - model: gpt-4o-mini - prompt: | - ์›๋ž˜ ์งˆ๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {query_str} - ๊ธฐ์กด ๋‹ต๋ณ€์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {existing_answer} - ์•„๋ž˜์—์„œ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ๋Š” ๊ธฐํšŒ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. - (ํ•„์š”ํ•œ ๊ฒฝ์šฐ์—๋งŒ) ์•„๋ž˜์— ๋ช‡ ๊ฐ€์ง€ ๋งฅ๋ฝ์„ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ------------ - {context_msg} - ------------ - ์ƒˆ๋กœ์šด ๋ฌธ๋งฅ์ด ์ฃผ์–ด์ง€๋ฉด ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ˆ˜์ •ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ •์ œํ•ฉ๋‹ˆ๋‹ค. - ๋งฅ๋ฝ์ด ์“ธ๋ชจ ์—†๋‹ค๋ฉด, ๊ธฐ์กด ๋‹ต๋ณ€์„ ๊ทธ๋Œ€๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”. - ์ •์ œ๋œ ๋‹ต๋ณ€: - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - - metric_name: g_eval # LLM Judge Metric. Default Model: gpt-4-turbo - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/non_gpu/half_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/non_gpu/half_korean.yaml deleted file mode 100644 index 18d098f..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/non_gpu/half_korean.yaml +++ /dev/null @@ -1,111 +0,0 @@ -node_lines: -- node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision, - retrieval_ndcg, retrieval_map, retrieval_mrr ] - speed_threshold: 10 - top_k: 10 - modules: - - module_type: bm25 - bm25_tokenizer: [ ko_kiwi, ko_okt, ko_kkma ] - - module_type: vectordb - vectordb: default - - module_type: hybrid_rrf - weight_range: (4,80) - - module_type: hybrid_cc - normalize_method: [ mm, tmm, z, dbsf ] - weight_range: (0.0, 1.0) - test_weight_size: 101 - - node_type: passage_augmenter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - top_k: 5 - embedding_model: openai - modules: - - module_type: pass_passage_augmenter - - module_type: prev_next_augmenter - mode: next - - node_type: passage_filter - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - speed_threshold: 5 - modules: - - module_type: pass_passage_filter - - module_type: similarity_threshold_cutoff - threshold: 0.85 - - module_type: similarity_percentile_cutoff - percentile: 0.6 - - module_type: threshold_cutoff - threshold: 0.85 - - module_type: percentile_cutoff - percentile: 0.6 - - node_type: passage_compressor - strategy: - metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision] - speed_threshold: 10 - modules: - - module_type: pass_compressor - - module_type: tree_summarize - llm: openai - model: gpt-4o-mini - prompt: | - ์—ฌ๋Ÿฌ ๋ฌธ๋งฅ ์ •๋ณด๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.\n - ---------------------\n - {context_str}\n - ---------------------\n - ์‚ฌ์ „ ์ง€์‹์ด ์•„๋‹Œ ์—ฌ๋Ÿฌ ์ •๋ณด๊ฐ€ ์ฃผ์–ด์กŒ์Šต๋‹ˆ๋‹ค, - ์งˆ๋ฌธ์— ๋Œ€๋‹ตํ•˜์„ธ์š”.\n - ์งˆ๋ฌธ: {query_str}\n - ๋‹ต๋ณ€: - - module_type: refine - llm: openai - model: gpt-4o-mini - prompt: | - ์›๋ž˜ ์งˆ๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {query_str} - ๊ธฐ์กด ๋‹ต๋ณ€์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {existing_answer} - ์•„๋ž˜์—์„œ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ๋Š” ๊ธฐํšŒ๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. - (ํ•„์š”ํ•œ ๊ฒฝ์šฐ์—๋งŒ) ์•„๋ž˜์— ๋ช‡ ๊ฐ€์ง€ ๋งฅ๋ฝ์„ ์ถ”๊ฐ€ํ•˜์—ฌ ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ •์ œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ------------ - {context_msg} - ------------ - ์ƒˆ๋กœ์šด ๋ฌธ๋งฅ์ด ์ฃผ์–ด์ง€๋ฉด ๊ธฐ์กด ๋‹ต๋ณ€์„ ์ˆ˜์ •ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ •์ œํ•ฉ๋‹ˆ๋‹ค. - ๋งฅ๋ฝ์ด ์“ธ๋ชจ ์—†๋‹ค๋ฉด, ๊ธฐ์กด ๋‹ต๋ณ€์„ ๊ทธ๋Œ€๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”. - ์ •์ œ๋œ ๋‹ต๋ณ€: - - module_type: longllmlingua -- node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: - - metric_name: bleu - - metric_name: meteor - - metric_name: rouge - - metric_name: sem_score - embedding_model: openai - speed_threshold: 10 - generator_modules: - - module_type: llama_index_llm - llm: openai - model: [gpt-4o-mini] - modules: - - module_type: fstring - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - module_type: long_context_reorder - prompt: ["์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:"] - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - speed_threshold: 10 - modules: - - module_type: llama_index_llm - llm: [openai] - model: [gpt-4o-mini] - temperature: [0.5, 1.0] diff --git a/autorag-workspace/example/sample_config/rag/korean/non_gpu/simple_korean.yaml b/autorag-workspace/example/sample_config/rag/korean/non_gpu/simple_korean.yaml deleted file mode 100644 index 465baf2..0000000 --- a/autorag-workspace/example/sample_config/rag/korean/non_gpu/simple_korean.yaml +++ /dev/null @@ -1,30 +0,0 @@ -node_lines: - - node_line_name: retrieve_node_line # Arbitrary node line name - nodes: - - node_type: retrieval - strategy: - metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ] - top_k: 3 - modules: - - module_type: vectordb - vectordb: default - - node_line_name: post_retrieve_node_line # Arbitrary node line name - nodes: - - node_type: prompt_maker - strategy: - metrics: [ bleu, meteor, rouge ] - modules: - - module_type: fstring - prompt: "์ฃผ์–ด์ง„ passage๋งŒ์„ ์ด์šฉํ•˜์—ฌ question์— ๋”ฐ๋ผ ๋‹ตํ•˜์‹œ์˜ค passage: {retrieved_contents} \n\n Question: {query} \n\n Answer:" - - node_type: generator - strategy: - metrics: - - metric_name: rouge - - embedding_model: openai - metric_name: sem_score - - metric_name: bert_score - lang: ko - modules: - - module_type: llama_index_llm - llm: openai - model: [ gpt-4o-mini ] diff --git a/autorag-workspace/example/sample_dataset/README.md b/autorag-workspace/example/sample_dataset/README.md deleted file mode 100644 index c249ad7..0000000 --- a/autorag-workspace/example/sample_dataset/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# sample_dataset handling - -The sample_dataset folder does not includes a `qa.parquet`, `corpus.parquet` file that is significantly large and cannot be uploaded directly to Git due to size limitations. - -To prepare and use datasets available in the sample_dataset folder, specifically `triviaqa`, `hotpotqa`, `msmarco` and `eli5`, you can follow the outlined methods below. - -## Usage - - The example provided uses `triviaqa`, but the same approach applies to `msmarco`, `eli5` and `hotpotqa`. - -### 1. Run with a specified save path -To execute the Python script from the terminal and save the dataset to a specified path, use the command: - -```bash -python ./sample_dataset/triviaqa/load_triviaqa_dataset.py --save_path /path/to/save/dataset -``` -This runs the `load_triviaqa_dataset.py` script located in the `./sample_dataset/triviaqa/` directory, -using the `--save_path` argument to specify the dataset's save location. - -### 2. Run without specifying a save path -If you run the script without the `--save_path` argument, the dataset will be saved to a default location, which is the directory containing the `load_triviaqa_dataset.py` file, essentially `./sample_dataset/triviaqa/`: -```bash -python ./sample_dataset/triviaqa/load_triviaqa_dataset.py -``` -This behavior allows for a straightforward execution without needing to specify a path, making it convenient for quick tests or when working directly within the target directory. diff --git a/autorag-workspace/example/sample_dataset/eli5/load_eli5_dataset.py b/autorag-workspace/example/sample_dataset/eli5/load_eli5_dataset.py deleted file mode 100644 index 69a07d0..0000000 --- a/autorag-workspace/example/sample_dataset/eli5/load_eli5_dataset.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -import pathlib - -import click -from datasets import load_dataset - - -@click.command() -@click.option( - "--save_path", - type=str, - default=pathlib.PurePath(__file__).parent, - help="Path to save sample eli5 dataset.", -) -def load_eli5_dataset(save_path): - # set file path - file_path = "MarkrAI/eli5_sample_autorag" - - # load dataset - corpus_dataset = load_dataset(file_path, "corpus")["train"].to_pandas() - qa_train_dataset = load_dataset(file_path, "qa")["train"].to_pandas() - qa_test_dataset = load_dataset(file_path, "qa")["test"].to_pandas() - - # save data - if os.path.exists(os.path.join(save_path, "corpus.parquet")) is True: - raise ValueError("corpus.parquet already exists") - if os.path.exists(os.path.join(save_path, "qa.parquet")) is True: - raise ValueError("qa.parquet already exists") - corpus_dataset.to_parquet(os.path.join(save_path, "corpus.parquet")) - qa_train_dataset.to_parquet(os.path.join(save_path, "qa_train.parquet")) - qa_test_dataset.to_parquet(os.path.join(save_path, "qa_test.parquet")) - - -if __name__ == "__main__": - load_eli5_dataset() diff --git a/autorag-workspace/example/sample_dataset/hotpotqa/load_hotpotqa_dataset.py b/autorag-workspace/example/sample_dataset/hotpotqa/load_hotpotqa_dataset.py deleted file mode 100644 index 973d61c..0000000 --- a/autorag-workspace/example/sample_dataset/hotpotqa/load_hotpotqa_dataset.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -import pathlib - -import click -from datasets import load_dataset - - -@click.command() -@click.option( - "--save_path", - type=str, - default=pathlib.PurePath(__file__).parent, - help="Path to save sample hotpotqa dataset.", -) -def load_hotpotqa_dataset(save_path): - # set file path - file_path = "gnekt/hotpotqa_small_sample_autorag" - - # load dataset - corpus_dataset = load_dataset(file_path, "corpus")["train"].to_pandas() - qa_validation_dataset = load_dataset(file_path, "qa")["validation"].to_pandas() - - # save corpus data - if os.path.exists(os.path.join(save_path, "corpus.parquet")) is True: - raise ValueError("corpus.parquet already exists") - if os.path.exists(os.path.join(save_path, "qa.parquet")) is True: - raise ValueError("qa.parquet already exists") - corpus_dataset.to_parquet(os.path.join(save_path, "corpus.parquet"), index=False) - qa_validation_dataset.to_parquet( - os.path.join(save_path, "qa_validation.parquet"), index=False - ) - - -if __name__ == "__main__": - load_hotpotqa_dataset() diff --git a/autorag-workspace/example/sample_dataset/msmarco/load_msmarco_dataset.py b/autorag-workspace/example/sample_dataset/msmarco/load_msmarco_dataset.py deleted file mode 100644 index 8a8abae..0000000 --- a/autorag-workspace/example/sample_dataset/msmarco/load_msmarco_dataset.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import pathlib - -import click -from datasets import load_dataset - - -@click.command() -@click.option( - "--save_path", - type=str, - default=pathlib.PurePath(__file__).parent, - help="Path to save sample msmarco dataset.", -) -def load_msmarco_dataset(save_path): - # set file path - file_path = "MarkrAI/msmarco_sample_autorag" - - # load dataset - corpus_dataset = load_dataset(file_path, "corpus")["train"].to_pandas() - qa_train_dataset = load_dataset(file_path, "qa")["train"].to_pandas() - qa_test_dataset = load_dataset(file_path, "qa")["test"].to_pandas() - - # save corpus data - if os.path.exists(os.path.join(save_path, "corpus.parquet")) is True: - raise ValueError("corpus.parquet already exists") - if os.path.exists(os.path.join(save_path, "qa.parquet")) is True: - raise ValueError("qa.parquet already exists") - corpus_dataset.to_parquet(os.path.join(save_path, "corpus.parquet"), index=False) - qa_train_dataset.to_parquet( - os.path.join(save_path, "qa_train.parquet"), index=False - ) - qa_test_dataset.to_parquet(os.path.join(save_path, "qa_test.parquet"), index=False) - - -if __name__ == "__main__": - load_msmarco_dataset() diff --git a/autorag-workspace/example/sample_dataset/triviaqa/load_triviaqa_dataset.py b/autorag-workspace/example/sample_dataset/triviaqa/load_triviaqa_dataset.py deleted file mode 100644 index 1067c17..0000000 --- a/autorag-workspace/example/sample_dataset/triviaqa/load_triviaqa_dataset.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import pathlib - -import click -from datasets import load_dataset - - -@click.command() -@click.option( - "--save_path", - type=str, - default=pathlib.PurePath(__file__).parent, - help="Path to save sample triviaqa dataset.", -) -def load_triviaqa_dataset(save_path): - # set file path - file_path = "MarkrAI/triviaqa_sample_autorag" - - # load dataset - corpus_dataset = load_dataset(file_path, "corpus")["train"].to_pandas() - qa_train_dataset = load_dataset(file_path, "qa")["train"].to_pandas() - qa_test_dataset = load_dataset(file_path, "qa")["test"].to_pandas() - - # save corpus data - if os.path.exists(os.path.join(save_path, "corpus.parquet")) is True: - raise ValueError("corpus.parquet already exists") - if os.path.exists(os.path.join(save_path, "qa.parquet")) is True: - raise ValueError("qa.parquet already exists") - corpus_dataset.to_parquet(os.path.join(save_path, "corpus.parquet"), index=False) - qa_train_dataset.to_parquet( - os.path.join(save_path, "qa_train.parquet"), index=False - ) - qa_test_dataset.to_parquet(os.path.join(save_path, "qa_test.parquet"), index=False) - - -if __name__ == "__main__": - load_triviaqa_dataset() diff --git a/autorag-workspace/autorag/VERSION b/autorag/VERSION similarity index 100% rename from autorag-workspace/autorag/VERSION rename to autorag/VERSION diff --git a/autorag-workspace/autorag/__init__.py b/autorag/__init__.py similarity index 100% rename from autorag-workspace/autorag/__init__.py rename to autorag/__init__.py diff --git a/autorag-workspace/autorag/chunker.py b/autorag/chunker.py similarity index 100% rename from autorag-workspace/autorag/chunker.py rename to autorag/chunker.py diff --git a/autorag-workspace/autorag/data/__init__.py b/autorag/data/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/__init__.py rename to autorag/data/__init__.py diff --git a/autorag-workspace/autorag/data/chunk/__init__.py b/autorag/data/chunk/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/chunk/__init__.py rename to autorag/data/chunk/__init__.py diff --git a/autorag-workspace/autorag/data/chunk/base.py b/autorag/data/chunk/base.py similarity index 100% rename from autorag-workspace/autorag/data/chunk/base.py rename to autorag/data/chunk/base.py diff --git a/autorag-workspace/autorag/data/chunk/langchain_chunk.py b/autorag/data/chunk/langchain_chunk.py similarity index 100% rename from autorag-workspace/autorag/data/chunk/langchain_chunk.py rename to autorag/data/chunk/langchain_chunk.py diff --git a/autorag-workspace/autorag/data/chunk/llama_index_chunk.py b/autorag/data/chunk/llama_index_chunk.py similarity index 100% rename from autorag-workspace/autorag/data/chunk/llama_index_chunk.py rename to autorag/data/chunk/llama_index_chunk.py diff --git a/autorag-workspace/autorag/data/chunk/run.py b/autorag/data/chunk/run.py similarity index 100% rename from autorag-workspace/autorag/data/chunk/run.py rename to autorag/data/chunk/run.py diff --git a/autorag-workspace/autorag/data/legacy/__init__.py b/autorag/data/legacy/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/__init__.py rename to autorag/data/legacy/__init__.py diff --git a/autorag-workspace/autorag/data/legacy/corpus/__init__.py b/autorag/data/legacy/corpus/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/corpus/__init__.py rename to autorag/data/legacy/corpus/__init__.py diff --git a/autorag-workspace/autorag/data/legacy/corpus/langchain.py b/autorag/data/legacy/corpus/langchain.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/corpus/langchain.py rename to autorag/data/legacy/corpus/langchain.py diff --git a/autorag-workspace/autorag/data/legacy/corpus/llama_index.py b/autorag/data/legacy/corpus/llama_index.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/corpus/llama_index.py rename to autorag/data/legacy/corpus/llama_index.py diff --git a/autorag-workspace/autorag/data/legacy/qacreation/__init__.py b/autorag/data/legacy/qacreation/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/qacreation/__init__.py rename to autorag/data/legacy/qacreation/__init__.py diff --git a/autorag-workspace/autorag/data/legacy/qacreation/base.py b/autorag/data/legacy/qacreation/base.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/qacreation/base.py rename to autorag/data/legacy/qacreation/base.py diff --git a/autorag-workspace/autorag/data/legacy/qacreation/llama_index.py b/autorag/data/legacy/qacreation/llama_index.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/qacreation/llama_index.py rename to autorag/data/legacy/qacreation/llama_index.py diff --git a/autorag-workspace/autorag/data/legacy/qacreation/llama_index_default_prompt.txt b/autorag/data/legacy/qacreation/llama_index_default_prompt.txt similarity index 100% rename from autorag-workspace/autorag/data/legacy/qacreation/llama_index_default_prompt.txt rename to autorag/data/legacy/qacreation/llama_index_default_prompt.txt diff --git a/autorag-workspace/autorag/data/legacy/qacreation/ragas.py b/autorag/data/legacy/qacreation/ragas.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/qacreation/ragas.py rename to autorag/data/legacy/qacreation/ragas.py diff --git a/autorag-workspace/autorag/data/legacy/qacreation/simple.py b/autorag/data/legacy/qacreation/simple.py similarity index 100% rename from autorag-workspace/autorag/data/legacy/qacreation/simple.py rename to autorag/data/legacy/qacreation/simple.py diff --git a/autorag-workspace/autorag/data/parse/__init__.py b/autorag/data/parse/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/parse/__init__.py rename to autorag/data/parse/__init__.py diff --git a/autorag-workspace/autorag/data/parse/base.py b/autorag/data/parse/base.py similarity index 100% rename from autorag-workspace/autorag/data/parse/base.py rename to autorag/data/parse/base.py diff --git a/autorag-workspace/autorag/data/parse/clova.py b/autorag/data/parse/clova.py similarity index 100% rename from autorag-workspace/autorag/data/parse/clova.py rename to autorag/data/parse/clova.py diff --git a/autorag-workspace/autorag/data/parse/langchain_parse.py b/autorag/data/parse/langchain_parse.py similarity index 100% rename from autorag-workspace/autorag/data/parse/langchain_parse.py rename to autorag/data/parse/langchain_parse.py diff --git a/autorag-workspace/autorag/data/parse/llamaparse.py b/autorag/data/parse/llamaparse.py similarity index 100% rename from autorag-workspace/autorag/data/parse/llamaparse.py rename to autorag/data/parse/llamaparse.py diff --git a/autorag-workspace/autorag/data/parse/run.py b/autorag/data/parse/run.py similarity index 100% rename from autorag-workspace/autorag/data/parse/run.py rename to autorag/data/parse/run.py diff --git a/autorag-workspace/autorag/data/parse/table_hybrid_parse.py b/autorag/data/parse/table_hybrid_parse.py similarity index 100% rename from autorag-workspace/autorag/data/parse/table_hybrid_parse.py rename to autorag/data/parse/table_hybrid_parse.py diff --git a/autorag-workspace/autorag/data/qa/__init__.py b/autorag/data/qa/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/qa/__init__.py rename to autorag/data/qa/__init__.py diff --git a/autorag-workspace/autorag/data/qa/evolve/__init__.py b/autorag/data/qa/evolve/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/qa/evolve/__init__.py rename to autorag/data/qa/evolve/__init__.py diff --git a/autorag-workspace/autorag/data/qa/evolve/llama_index_query_evolve.py b/autorag/data/qa/evolve/llama_index_query_evolve.py similarity index 100% rename from autorag-workspace/autorag/data/qa/evolve/llama_index_query_evolve.py rename to autorag/data/qa/evolve/llama_index_query_evolve.py diff --git a/autorag-workspace/autorag/data/qa/evolve/openai_query_evolve.py b/autorag/data/qa/evolve/openai_query_evolve.py similarity index 100% rename from autorag-workspace/autorag/data/qa/evolve/openai_query_evolve.py rename to autorag/data/qa/evolve/openai_query_evolve.py diff --git a/autorag-workspace/autorag/data/qa/evolve/prompt.py b/autorag/data/qa/evolve/prompt.py similarity index 100% rename from autorag-workspace/autorag/data/qa/evolve/prompt.py rename to autorag/data/qa/evolve/prompt.py diff --git a/autorag-workspace/autorag/data/qa/extract_evidence.py b/autorag/data/qa/extract_evidence.py similarity index 100% rename from autorag-workspace/autorag/data/qa/extract_evidence.py rename to autorag/data/qa/extract_evidence.py diff --git a/autorag-workspace/autorag/data/qa/filter/__init__.py b/autorag/data/qa/filter/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/qa/filter/__init__.py rename to autorag/data/qa/filter/__init__.py diff --git a/autorag-workspace/autorag/data/qa/filter/dontknow.py b/autorag/data/qa/filter/dontknow.py similarity index 100% rename from autorag-workspace/autorag/data/qa/filter/dontknow.py rename to autorag/data/qa/filter/dontknow.py diff --git a/autorag-workspace/autorag/data/qa/filter/passage_dependency.py b/autorag/data/qa/filter/passage_dependency.py similarity index 100% rename from autorag-workspace/autorag/data/qa/filter/passage_dependency.py rename to autorag/data/qa/filter/passage_dependency.py diff --git a/autorag-workspace/autorag/data/qa/filter/prompt.py b/autorag/data/qa/filter/prompt.py similarity index 100% rename from autorag-workspace/autorag/data/qa/filter/prompt.py rename to autorag/data/qa/filter/prompt.py diff --git a/autorag-workspace/autorag/data/qa/generation_gt/__init__.py b/autorag/data/qa/generation_gt/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/qa/generation_gt/__init__.py rename to autorag/data/qa/generation_gt/__init__.py diff --git a/autorag-workspace/autorag/data/qa/generation_gt/base.py b/autorag/data/qa/generation_gt/base.py similarity index 100% rename from autorag-workspace/autorag/data/qa/generation_gt/base.py rename to autorag/data/qa/generation_gt/base.py diff --git a/autorag-workspace/autorag/data/qa/generation_gt/llama_index_gen_gt.py b/autorag/data/qa/generation_gt/llama_index_gen_gt.py similarity index 100% rename from autorag-workspace/autorag/data/qa/generation_gt/llama_index_gen_gt.py rename to autorag/data/qa/generation_gt/llama_index_gen_gt.py diff --git a/autorag-workspace/autorag/data/qa/generation_gt/openai_gen_gt.py b/autorag/data/qa/generation_gt/openai_gen_gt.py similarity index 100% rename from autorag-workspace/autorag/data/qa/generation_gt/openai_gen_gt.py rename to autorag/data/qa/generation_gt/openai_gen_gt.py diff --git a/autorag-workspace/autorag/data/qa/generation_gt/prompt.py b/autorag/data/qa/generation_gt/prompt.py similarity index 100% rename from autorag-workspace/autorag/data/qa/generation_gt/prompt.py rename to autorag/data/qa/generation_gt/prompt.py diff --git a/autorag-workspace/autorag/data/qa/query/__init__.py b/autorag/data/qa/query/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/qa/query/__init__.py rename to autorag/data/qa/query/__init__.py diff --git a/autorag-workspace/autorag/data/qa/query/llama_gen_query.py b/autorag/data/qa/query/llama_gen_query.py similarity index 100% rename from autorag-workspace/autorag/data/qa/query/llama_gen_query.py rename to autorag/data/qa/query/llama_gen_query.py diff --git a/autorag-workspace/autorag/data/qa/query/openai_gen_query.py b/autorag/data/qa/query/openai_gen_query.py similarity index 100% rename from autorag-workspace/autorag/data/qa/query/openai_gen_query.py rename to autorag/data/qa/query/openai_gen_query.py diff --git a/autorag-workspace/autorag/data/qa/query/prompt.py b/autorag/data/qa/query/prompt.py similarity index 100% rename from autorag-workspace/autorag/data/qa/query/prompt.py rename to autorag/data/qa/query/prompt.py diff --git a/autorag-workspace/autorag/data/qa/sample.py b/autorag/data/qa/sample.py similarity index 100% rename from autorag-workspace/autorag/data/qa/sample.py rename to autorag/data/qa/sample.py diff --git a/autorag-workspace/autorag/data/qa/schema.py b/autorag/data/qa/schema.py similarity index 100% rename from autorag-workspace/autorag/data/qa/schema.py rename to autorag/data/qa/schema.py diff --git a/autorag-workspace/autorag/data/utils/__init__.py b/autorag/data/utils/__init__.py similarity index 100% rename from autorag-workspace/autorag/data/utils/__init__.py rename to autorag/data/utils/__init__.py diff --git a/autorag-workspace/autorag/data/utils/util.py b/autorag/data/utils/util.py similarity index 100% rename from autorag-workspace/autorag/data/utils/util.py rename to autorag/data/utils/util.py diff --git a/autorag-workspace/autorag/deploy/__init__.py b/autorag/deploy/__init__.py similarity index 100% rename from autorag-workspace/autorag/deploy/__init__.py rename to autorag/deploy/__init__.py diff --git a/autorag-workspace/autorag/deploy/api.py b/autorag/deploy/api.py similarity index 100% rename from autorag-workspace/autorag/deploy/api.py rename to autorag/deploy/api.py diff --git a/autorag-workspace/autorag/deploy/base.py b/autorag/deploy/base.py similarity index 100% rename from autorag-workspace/autorag/deploy/base.py rename to autorag/deploy/base.py diff --git a/autorag-workspace/autorag/deploy/gradio.py b/autorag/deploy/gradio.py similarity index 100% rename from autorag-workspace/autorag/deploy/gradio.py rename to autorag/deploy/gradio.py diff --git a/autorag-workspace/autorag/deploy/swagger.yml b/autorag/deploy/swagger.yml similarity index 100% rename from autorag-workspace/autorag/deploy/swagger.yml rename to autorag/deploy/swagger.yml diff --git a/autorag-workspace/autorag/embedding/__init__.py b/autorag/embedding/__init__.py similarity index 100% rename from autorag-workspace/autorag/embedding/__init__.py rename to autorag/embedding/__init__.py diff --git a/autorag-workspace/autorag/embedding/base.py b/autorag/embedding/base.py similarity index 100% rename from autorag-workspace/autorag/embedding/base.py rename to autorag/embedding/base.py diff --git a/autorag-workspace/autorag/evaluation/__init__.py b/autorag/evaluation/__init__.py similarity index 100% rename from autorag-workspace/autorag/evaluation/__init__.py rename to autorag/evaluation/__init__.py diff --git a/autorag-workspace/autorag/evaluation/generation.py b/autorag/evaluation/generation.py similarity index 100% rename from autorag-workspace/autorag/evaluation/generation.py rename to autorag/evaluation/generation.py diff --git a/autorag-workspace/autorag/evaluation/metric/__init__.py b/autorag/evaluation/metric/__init__.py similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/__init__.py rename to autorag/evaluation/metric/__init__.py diff --git a/autorag-workspace/autorag/evaluation/metric/deepeval_prompt.py b/autorag/evaluation/metric/deepeval_prompt.py similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/deepeval_prompt.py rename to autorag/evaluation/metric/deepeval_prompt.py diff --git a/autorag-workspace/autorag/evaluation/metric/g_eval_prompts/coh_detailed.txt b/autorag/evaluation/metric/g_eval_prompts/coh_detailed.txt similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/g_eval_prompts/coh_detailed.txt rename to autorag/evaluation/metric/g_eval_prompts/coh_detailed.txt diff --git a/autorag-workspace/autorag/evaluation/metric/g_eval_prompts/con_detailed.txt b/autorag/evaluation/metric/g_eval_prompts/con_detailed.txt similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/g_eval_prompts/con_detailed.txt rename to autorag/evaluation/metric/g_eval_prompts/con_detailed.txt diff --git a/autorag-workspace/autorag/evaluation/metric/g_eval_prompts/flu_detailed.txt b/autorag/evaluation/metric/g_eval_prompts/flu_detailed.txt similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/g_eval_prompts/flu_detailed.txt rename to autorag/evaluation/metric/g_eval_prompts/flu_detailed.txt diff --git a/autorag-workspace/autorag/evaluation/metric/g_eval_prompts/rel_detailed.txt b/autorag/evaluation/metric/g_eval_prompts/rel_detailed.txt similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/g_eval_prompts/rel_detailed.txt rename to autorag/evaluation/metric/g_eval_prompts/rel_detailed.txt diff --git a/autorag-workspace/autorag/evaluation/metric/generation.py b/autorag/evaluation/metric/generation.py similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/generation.py rename to autorag/evaluation/metric/generation.py diff --git a/autorag-workspace/autorag/evaluation/metric/retrieval.py b/autorag/evaluation/metric/retrieval.py similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/retrieval.py rename to autorag/evaluation/metric/retrieval.py diff --git a/autorag-workspace/autorag/evaluation/metric/retrieval_contents.py b/autorag/evaluation/metric/retrieval_contents.py similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/retrieval_contents.py rename to autorag/evaluation/metric/retrieval_contents.py diff --git a/autorag-workspace/autorag/evaluation/metric/util.py b/autorag/evaluation/metric/util.py similarity index 100% rename from autorag-workspace/autorag/evaluation/metric/util.py rename to autorag/evaluation/metric/util.py diff --git a/autorag-workspace/autorag/evaluation/retrieval.py b/autorag/evaluation/retrieval.py similarity index 100% rename from autorag-workspace/autorag/evaluation/retrieval.py rename to autorag/evaluation/retrieval.py diff --git a/autorag-workspace/autorag/evaluation/retrieval_contents.py b/autorag/evaluation/retrieval_contents.py similarity index 100% rename from autorag-workspace/autorag/evaluation/retrieval_contents.py rename to autorag/evaluation/retrieval_contents.py diff --git a/autorag-workspace/autorag/evaluation/util.py b/autorag/evaluation/util.py similarity index 100% rename from autorag-workspace/autorag/evaluation/util.py rename to autorag/evaluation/util.py diff --git a/autorag-workspace/autorag/evaluator.py b/autorag/evaluator.py similarity index 100% rename from autorag-workspace/autorag/evaluator.py rename to autorag/evaluator.py diff --git a/autorag-workspace/autorag/generator_models.py b/autorag/generator_models.py similarity index 100% rename from autorag-workspace/autorag/generator_models.py rename to autorag/generator_models.py diff --git a/autorag-workspace/autorag/node_line.py b/autorag/node_line.py similarity index 100% rename from autorag-workspace/autorag/node_line.py rename to autorag/node_line.py diff --git a/autorag-workspace/autorag/nodes/__init__.py b/autorag/nodes/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/__init__.py rename to autorag/nodes/__init__.py diff --git a/autorag-workspace/autorag/nodes/generator/__init__.py b/autorag/nodes/generator/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/generator/__init__.py rename to autorag/nodes/generator/__init__.py diff --git a/autorag-workspace/autorag/nodes/generator/base.py b/autorag/nodes/generator/base.py similarity index 100% rename from autorag-workspace/autorag/nodes/generator/base.py rename to autorag/nodes/generator/base.py diff --git a/autorag-workspace/autorag/nodes/generator/llama_index_llm.py b/autorag/nodes/generator/llama_index_llm.py similarity index 100% rename from autorag-workspace/autorag/nodes/generator/llama_index_llm.py rename to autorag/nodes/generator/llama_index_llm.py diff --git a/autorag-workspace/autorag/nodes/generator/openai_llm.py b/autorag/nodes/generator/openai_llm.py similarity index 100% rename from autorag-workspace/autorag/nodes/generator/openai_llm.py rename to autorag/nodes/generator/openai_llm.py diff --git a/autorag-workspace/autorag/nodes/generator/run.py b/autorag/nodes/generator/run.py similarity index 100% rename from autorag-workspace/autorag/nodes/generator/run.py rename to autorag/nodes/generator/run.py diff --git a/autorag-workspace/autorag/nodes/generator/vllm.py b/autorag/nodes/generator/vllm.py similarity index 100% rename from autorag-workspace/autorag/nodes/generator/vllm.py rename to autorag/nodes/generator/vllm.py diff --git a/autorag-workspace/autorag/nodes/generator/vllm_api.py b/autorag/nodes/generator/vllm_api.py similarity index 100% rename from autorag-workspace/autorag/nodes/generator/vllm_api.py rename to autorag/nodes/generator/vllm_api.py diff --git a/autorag-workspace/autorag/nodes/passageaugmenter/__init__.py b/autorag/nodes/passageaugmenter/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/passageaugmenter/__init__.py rename to autorag/nodes/passageaugmenter/__init__.py diff --git a/autorag-workspace/autorag/nodes/passageaugmenter/base.py b/autorag/nodes/passageaugmenter/base.py similarity index 100% rename from autorag-workspace/autorag/nodes/passageaugmenter/base.py rename to autorag/nodes/passageaugmenter/base.py diff --git a/autorag-workspace/autorag/nodes/passageaugmenter/pass_passage_augmenter.py b/autorag/nodes/passageaugmenter/pass_passage_augmenter.py similarity index 100% rename from autorag-workspace/autorag/nodes/passageaugmenter/pass_passage_augmenter.py rename to autorag/nodes/passageaugmenter/pass_passage_augmenter.py diff --git a/autorag-workspace/autorag/nodes/passageaugmenter/prev_next_augmenter.py b/autorag/nodes/passageaugmenter/prev_next_augmenter.py similarity index 100% rename from autorag-workspace/autorag/nodes/passageaugmenter/prev_next_augmenter.py rename to autorag/nodes/passageaugmenter/prev_next_augmenter.py diff --git a/autorag-workspace/autorag/nodes/passageaugmenter/run.py b/autorag/nodes/passageaugmenter/run.py similarity index 100% rename from autorag-workspace/autorag/nodes/passageaugmenter/run.py rename to autorag/nodes/passageaugmenter/run.py diff --git a/autorag-workspace/autorag/nodes/passagecompressor/__init__.py b/autorag/nodes/passagecompressor/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagecompressor/__init__.py rename to autorag/nodes/passagecompressor/__init__.py diff --git a/autorag-workspace/autorag/nodes/passagecompressor/base.py b/autorag/nodes/passagecompressor/base.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagecompressor/base.py rename to autorag/nodes/passagecompressor/base.py diff --git a/autorag-workspace/autorag/nodes/passagecompressor/longllmlingua.py b/autorag/nodes/passagecompressor/longllmlingua.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagecompressor/longllmlingua.py rename to autorag/nodes/passagecompressor/longllmlingua.py diff --git a/autorag-workspace/autorag/nodes/passagecompressor/pass_compressor.py b/autorag/nodes/passagecompressor/pass_compressor.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagecompressor/pass_compressor.py rename to autorag/nodes/passagecompressor/pass_compressor.py diff --git a/autorag-workspace/autorag/nodes/passagecompressor/refine.py b/autorag/nodes/passagecompressor/refine.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagecompressor/refine.py rename to autorag/nodes/passagecompressor/refine.py diff --git a/autorag-workspace/autorag/nodes/passagecompressor/run.py b/autorag/nodes/passagecompressor/run.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagecompressor/run.py rename to autorag/nodes/passagecompressor/run.py diff --git a/autorag-workspace/autorag/nodes/passagecompressor/tree_summarize.py b/autorag/nodes/passagecompressor/tree_summarize.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagecompressor/tree_summarize.py rename to autorag/nodes/passagecompressor/tree_summarize.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/__init__.py b/autorag/nodes/passagefilter/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/__init__.py rename to autorag/nodes/passagefilter/__init__.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/base.py b/autorag/nodes/passagefilter/base.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/base.py rename to autorag/nodes/passagefilter/base.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/pass_passage_filter.py b/autorag/nodes/passagefilter/pass_passage_filter.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/pass_passage_filter.py rename to autorag/nodes/passagefilter/pass_passage_filter.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/percentile_cutoff.py b/autorag/nodes/passagefilter/percentile_cutoff.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/percentile_cutoff.py rename to autorag/nodes/passagefilter/percentile_cutoff.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/recency.py b/autorag/nodes/passagefilter/recency.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/recency.py rename to autorag/nodes/passagefilter/recency.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/run.py b/autorag/nodes/passagefilter/run.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/run.py rename to autorag/nodes/passagefilter/run.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/similarity_percentile_cutoff.py b/autorag/nodes/passagefilter/similarity_percentile_cutoff.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/similarity_percentile_cutoff.py rename to autorag/nodes/passagefilter/similarity_percentile_cutoff.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/similarity_threshold_cutoff.py b/autorag/nodes/passagefilter/similarity_threshold_cutoff.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/similarity_threshold_cutoff.py rename to autorag/nodes/passagefilter/similarity_threshold_cutoff.py diff --git a/autorag-workspace/autorag/nodes/passagefilter/threshold_cutoff.py b/autorag/nodes/passagefilter/threshold_cutoff.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagefilter/threshold_cutoff.py rename to autorag/nodes/passagefilter/threshold_cutoff.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/__init__.py b/autorag/nodes/passagereranker/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/__init__.py rename to autorag/nodes/passagereranker/__init__.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/base.py b/autorag/nodes/passagereranker/base.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/base.py rename to autorag/nodes/passagereranker/base.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/cohere.py b/autorag/nodes/passagereranker/cohere.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/cohere.py rename to autorag/nodes/passagereranker/cohere.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/colbert.py b/autorag/nodes/passagereranker/colbert.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/colbert.py rename to autorag/nodes/passagereranker/colbert.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/dragonkue2.py b/autorag/nodes/passagereranker/dragonkue2.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/dragonkue2.py rename to autorag/nodes/passagereranker/dragonkue2.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/flag_embedding.py b/autorag/nodes/passagereranker/flag_embedding.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/flag_embedding.py rename to autorag/nodes/passagereranker/flag_embedding.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/flag_embedding_llm.py b/autorag/nodes/passagereranker/flag_embedding_llm.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/flag_embedding_llm.py rename to autorag/nodes/passagereranker/flag_embedding_llm.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/flashrank.py b/autorag/nodes/passagereranker/flashrank.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/flashrank.py rename to autorag/nodes/passagereranker/flashrank.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/jina.py b/autorag/nodes/passagereranker/jina.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/jina.py rename to autorag/nodes/passagereranker/jina.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/koreranker.py b/autorag/nodes/passagereranker/koreranker.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/koreranker.py rename to autorag/nodes/passagereranker/koreranker.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/mixedbreadai.py b/autorag/nodes/passagereranker/mixedbreadai.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/mixedbreadai.py rename to autorag/nodes/passagereranker/mixedbreadai.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/monot5.py b/autorag/nodes/passagereranker/monot5.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/monot5.py rename to autorag/nodes/passagereranker/monot5.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/openvino.py b/autorag/nodes/passagereranker/openvino.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/openvino.py rename to autorag/nodes/passagereranker/openvino.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/pass_reranker.py b/autorag/nodes/passagereranker/pass_reranker.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/pass_reranker.py rename to autorag/nodes/passagereranker/pass_reranker.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/rankgpt.py b/autorag/nodes/passagereranker/rankgpt.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/rankgpt.py rename to autorag/nodes/passagereranker/rankgpt.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/run.py b/autorag/nodes/passagereranker/run.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/run.py rename to autorag/nodes/passagereranker/run.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/sentence_transformer.py b/autorag/nodes/passagereranker/sentence_transformer.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/sentence_transformer.py rename to autorag/nodes/passagereranker/sentence_transformer.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/tart/__init__.py b/autorag/nodes/passagereranker/tart/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/tart/__init__.py rename to autorag/nodes/passagereranker/tart/__init__.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/tart/modeling_enc_t5.py b/autorag/nodes/passagereranker/tart/modeling_enc_t5.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/tart/modeling_enc_t5.py rename to autorag/nodes/passagereranker/tart/modeling_enc_t5.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/tart/tart.py b/autorag/nodes/passagereranker/tart/tart.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/tart/tart.py rename to autorag/nodes/passagereranker/tart/tart.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/tart/tokenization_enc_t5.py b/autorag/nodes/passagereranker/tart/tokenization_enc_t5.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/tart/tokenization_enc_t5.py rename to autorag/nodes/passagereranker/tart/tokenization_enc_t5.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/time_reranker.py b/autorag/nodes/passagereranker/time_reranker.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/time_reranker.py rename to autorag/nodes/passagereranker/time_reranker.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/upr.py b/autorag/nodes/passagereranker/upr.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/upr.py rename to autorag/nodes/passagereranker/upr.py diff --git a/autorag-workspace/autorag/nodes/passagereranker/voyageai.py b/autorag/nodes/passagereranker/voyageai.py similarity index 100% rename from autorag-workspace/autorag/nodes/passagereranker/voyageai.py rename to autorag/nodes/passagereranker/voyageai.py diff --git a/autorag-workspace/autorag/nodes/promptmaker/__init__.py b/autorag/nodes/promptmaker/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/promptmaker/__init__.py rename to autorag/nodes/promptmaker/__init__.py diff --git a/autorag-workspace/autorag/nodes/promptmaker/base.py b/autorag/nodes/promptmaker/base.py similarity index 100% rename from autorag-workspace/autorag/nodes/promptmaker/base.py rename to autorag/nodes/promptmaker/base.py diff --git a/autorag-workspace/autorag/nodes/promptmaker/fstring.py b/autorag/nodes/promptmaker/fstring.py similarity index 100% rename from autorag-workspace/autorag/nodes/promptmaker/fstring.py rename to autorag/nodes/promptmaker/fstring.py diff --git a/autorag-workspace/autorag/nodes/promptmaker/long_context_reorder.py b/autorag/nodes/promptmaker/long_context_reorder.py similarity index 100% rename from autorag-workspace/autorag/nodes/promptmaker/long_context_reorder.py rename to autorag/nodes/promptmaker/long_context_reorder.py diff --git a/autorag-workspace/autorag/nodes/promptmaker/run.py b/autorag/nodes/promptmaker/run.py similarity index 100% rename from autorag-workspace/autorag/nodes/promptmaker/run.py rename to autorag/nodes/promptmaker/run.py diff --git a/autorag-workspace/autorag/nodes/promptmaker/window_replacement.py b/autorag/nodes/promptmaker/window_replacement.py similarity index 100% rename from autorag-workspace/autorag/nodes/promptmaker/window_replacement.py rename to autorag/nodes/promptmaker/window_replacement.py diff --git a/autorag-workspace/autorag/nodes/queryexpansion/__init__.py b/autorag/nodes/queryexpansion/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/queryexpansion/__init__.py rename to autorag/nodes/queryexpansion/__init__.py diff --git a/autorag-workspace/autorag/nodes/queryexpansion/base.py b/autorag/nodes/queryexpansion/base.py similarity index 100% rename from autorag-workspace/autorag/nodes/queryexpansion/base.py rename to autorag/nodes/queryexpansion/base.py diff --git a/autorag-workspace/autorag/nodes/queryexpansion/hyde.py b/autorag/nodes/queryexpansion/hyde.py similarity index 100% rename from autorag-workspace/autorag/nodes/queryexpansion/hyde.py rename to autorag/nodes/queryexpansion/hyde.py diff --git a/autorag-workspace/autorag/nodes/queryexpansion/multi_query_expansion.py b/autorag/nodes/queryexpansion/multi_query_expansion.py similarity index 100% rename from autorag-workspace/autorag/nodes/queryexpansion/multi_query_expansion.py rename to autorag/nodes/queryexpansion/multi_query_expansion.py diff --git a/autorag-workspace/autorag/nodes/queryexpansion/pass_query_expansion.py b/autorag/nodes/queryexpansion/pass_query_expansion.py similarity index 100% rename from autorag-workspace/autorag/nodes/queryexpansion/pass_query_expansion.py rename to autorag/nodes/queryexpansion/pass_query_expansion.py diff --git a/autorag-workspace/autorag/nodes/queryexpansion/query_decompose.py b/autorag/nodes/queryexpansion/query_decompose.py similarity index 100% rename from autorag-workspace/autorag/nodes/queryexpansion/query_decompose.py rename to autorag/nodes/queryexpansion/query_decompose.py diff --git a/autorag-workspace/autorag/nodes/queryexpansion/run.py b/autorag/nodes/queryexpansion/run.py similarity index 100% rename from autorag-workspace/autorag/nodes/queryexpansion/run.py rename to autorag/nodes/queryexpansion/run.py diff --git a/autorag-workspace/autorag/nodes/retrieval/__init__.py b/autorag/nodes/retrieval/__init__.py similarity index 100% rename from autorag-workspace/autorag/nodes/retrieval/__init__.py rename to autorag/nodes/retrieval/__init__.py diff --git a/autorag-workspace/autorag/nodes/retrieval/base.py b/autorag/nodes/retrieval/base.py similarity index 100% rename from autorag-workspace/autorag/nodes/retrieval/base.py rename to autorag/nodes/retrieval/base.py diff --git a/autorag-workspace/autorag/nodes/retrieval/bm25.py b/autorag/nodes/retrieval/bm25.py similarity index 100% rename from autorag-workspace/autorag/nodes/retrieval/bm25.py rename to autorag/nodes/retrieval/bm25.py diff --git a/autorag-workspace/autorag/nodes/retrieval/hybrid_cc.py b/autorag/nodes/retrieval/hybrid_cc.py similarity index 100% rename from autorag-workspace/autorag/nodes/retrieval/hybrid_cc.py rename to autorag/nodes/retrieval/hybrid_cc.py diff --git a/autorag-workspace/autorag/nodes/retrieval/hybrid_rrf.py b/autorag/nodes/retrieval/hybrid_rrf.py similarity index 100% rename from autorag-workspace/autorag/nodes/retrieval/hybrid_rrf.py rename to autorag/nodes/retrieval/hybrid_rrf.py diff --git a/autorag-workspace/autorag/nodes/retrieval/run.py b/autorag/nodes/retrieval/run.py similarity index 100% rename from autorag-workspace/autorag/nodes/retrieval/run.py rename to autorag/nodes/retrieval/run.py diff --git a/autorag-workspace/autorag/nodes/retrieval/vectordb.py b/autorag/nodes/retrieval/vectordb.py similarity index 100% rename from autorag-workspace/autorag/nodes/retrieval/vectordb.py rename to autorag/nodes/retrieval/vectordb.py diff --git a/autorag-workspace/autorag/nodes/util.py b/autorag/nodes/util.py similarity index 100% rename from autorag-workspace/autorag/nodes/util.py rename to autorag/nodes/util.py diff --git a/autorag-workspace/autorag/parser.py b/autorag/parser.py similarity index 100% rename from autorag-workspace/autorag/parser.py rename to autorag/parser.py diff --git a/autorag-workspace/autorag/schema/__init__.py b/autorag/schema/__init__.py similarity index 100% rename from autorag-workspace/autorag/schema/__init__.py rename to autorag/schema/__init__.py diff --git a/autorag-workspace/autorag/schema/base.py b/autorag/schema/base.py similarity index 100% rename from autorag-workspace/autorag/schema/base.py rename to autorag/schema/base.py diff --git a/autorag-workspace/autorag/schema/metricinput.py b/autorag/schema/metricinput.py similarity index 100% rename from autorag-workspace/autorag/schema/metricinput.py rename to autorag/schema/metricinput.py diff --git a/autorag-workspace/autorag/schema/module.py b/autorag/schema/module.py similarity index 100% rename from autorag-workspace/autorag/schema/module.py rename to autorag/schema/module.py diff --git a/autorag-workspace/autorag/schema/node.py b/autorag/schema/node.py similarity index 100% rename from autorag-workspace/autorag/schema/node.py rename to autorag/schema/node.py diff --git a/autorag-workspace/autorag/strategy.py b/autorag/strategy.py similarity index 100% rename from autorag-workspace/autorag/strategy.py rename to autorag/strategy.py diff --git a/autorag-workspace/autorag/support.py b/autorag/support.py similarity index 100% rename from autorag-workspace/autorag/support.py rename to autorag/support.py diff --git a/autorag-workspace/autorag/utils/__init__.py b/autorag/utils/__init__.py similarity index 100% rename from autorag-workspace/autorag/utils/__init__.py rename to autorag/utils/__init__.py diff --git a/autorag-workspace/autorag/utils/preprocess.py b/autorag/utils/preprocess.py similarity index 100% rename from autorag-workspace/autorag/utils/preprocess.py rename to autorag/utils/preprocess.py diff --git a/autorag-workspace/autorag/utils/util.py b/autorag/utils/util.py similarity index 100% rename from autorag-workspace/autorag/utils/util.py rename to autorag/utils/util.py diff --git a/autorag-workspace/autorag/validator.py b/autorag/validator.py similarity index 100% rename from autorag-workspace/autorag/validator.py rename to autorag/validator.py diff --git a/autorag-workspace/autorag/vectordb/__init__.py b/autorag/vectordb/__init__.py similarity index 100% rename from autorag-workspace/autorag/vectordb/__init__.py rename to autorag/vectordb/__init__.py diff --git a/autorag-workspace/autorag/vectordb/base.py b/autorag/vectordb/base.py similarity index 100% rename from autorag-workspace/autorag/vectordb/base.py rename to autorag/vectordb/base.py diff --git a/autorag-workspace/autorag/vectordb/chroma.py b/autorag/vectordb/chroma.py similarity index 100% rename from autorag-workspace/autorag/vectordb/chroma.py rename to autorag/vectordb/chroma.py diff --git a/autorag-workspace/autorag/vectordb/couchbase.py b/autorag/vectordb/couchbase.py similarity index 100% rename from autorag-workspace/autorag/vectordb/couchbase.py rename to autorag/vectordb/couchbase.py diff --git a/autorag-workspace/autorag/vectordb/milvus.py b/autorag/vectordb/milvus.py similarity index 100% rename from autorag-workspace/autorag/vectordb/milvus.py rename to autorag/vectordb/milvus.py diff --git a/autorag-workspace/autorag/vectordb/pinecone.py b/autorag/vectordb/pinecone.py similarity index 100% rename from autorag-workspace/autorag/vectordb/pinecone.py rename to autorag/vectordb/pinecone.py diff --git a/autorag-workspace/autorag/vectordb/qdrant.py b/autorag/vectordb/qdrant.py similarity index 100% rename from autorag-workspace/autorag/vectordb/qdrant.py rename to autorag/vectordb/qdrant.py diff --git a/autorag-workspace/autorag/vectordb/weaviate.py b/autorag/vectordb/weaviate.py similarity index 100% rename from autorag-workspace/autorag/vectordb/weaviate.py rename to autorag/vectordb/weaviate.py diff --git a/autorag-workspace/autorag/web.py b/autorag/web.py similarity index 100% rename from autorag-workspace/autorag/web.py rename to autorag/web.py diff --git a/docker-compose.yml b/docker-compose.yml index dd74931..7b2488e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,7 @@ services: - autorag-ollama # Ollama๊ฐ€ ๋จผ์ € ์‹คํ–‰๋˜๋„๋ก ์„ค์ • networks: - autorag_network + command: ["bash"] # ๊ธฐ๋ณธ ์‹คํ–‰ ๋ช…๋ น autorag-ollama: image: ollama/ollama diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..f938248 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e # ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ฆ‰์‹œ ์ข…๋ฃŒ + +# ์‹คํ–‰ ๋ชจ๋“œ ์„ค์ • +case "$1" in + data-gen) + echo "๋ฐ์ดํ„ฐ ์ƒ์„ฑ ์ค‘..." + bash /usr/src/app/making.sh + ;; + evaluate) + echo "ํ‰๊ฐ€ ์‹คํ–‰ ์ค‘..." + python /usr/src/app/main.py + ;; + bash) + echo "์ปจํ…Œ์ด๋„ˆ ๋‚ด๋ถ€ ์ ‘๊ทผ" + exec /bin/bash + ;; + *) + echo "๊ธฐ๋ณธ default ์‹คํ–‰ ๋ชจ๋“œ: bash" + echo "์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ์‹คํ–‰ ๋ชจ๋“œ: data-gen | evaluate | bash" + exit 1 + ;; +esac diff --git a/autorag-workspace/main.py b/main.py similarity index 78% rename from autorag-workspace/main.py rename to main.py index 4ddea57..b9021c0 100644 --- a/autorag-workspace/main.py +++ b/main.py @@ -6,13 +6,13 @@ from dotenv import load_dotenv from llama_index.llms.ollama import Ollama -data_path = '../projects/daesan-dangjin_01' # ํด๋”๋ช… ํ™•์ธ +data_path = './projects/example_01' # ํด๋”๋ช… ํ™•์ธ OLLAMA_BASE_URL = "autorag-ollama:11434" autorag.generator_models["ollama"] = autorag.LazyInit(Ollama, base_url=OLLAMA_BASE_URL, model="phi4", request_timeout=300, num_gpus=1) autorag.generator_models["ollama"] = autorag.LazyInit(Ollama, base_url=OLLAMA_BASE_URL, model="gemma3:12b", request_timeout=300, num_gpus=1) -autorag.generator_models["ollama"] = autorag.LazyInit(Ollama, base_url=OLLAMA_BASE_URL, model="deepseek-r1:14b", request_timeout=300, num_gpus=1) -autorag.generator_models["ollama"] = autorag.LazyInit(Ollama, base_url=OLLAMA_BASE_URL, model="aya-expanse:8b", request_timeout=300, num_gpus=1) +# autorag.generator_models["ollama"] = autorag.LazyInit(Ollama, base_url=OLLAMA_BASE_URL, model="deepseek-r1:14b", request_timeout=300, num_gpus=1) +# autorag.generator_models["ollama"] = autorag.LazyInit(Ollama, base_url=OLLAMA_BASE_URL, model="aya-expanse:8b", request_timeout=300, num_gpus=1) # autorag/embedding/base.py ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ์ถ”๊ฐ€ํ•จ diff --git a/autorag-workspace/making.sh b/making.sh similarity index 98% rename from autorag-workspace/making.sh rename to making.sh index adf450f..c15bb0b 100644 --- a/autorag-workspace/making.sh +++ b/making.sh @@ -1,6 +1,6 @@ #!/bin/bash -PROJECT_DIR="../projects/daesan-dangjin_01" +PROJECT_DIR="../projects/example_01" CONFIG_DIR="$PROJECT_DIR/config" RAW_DATA_DIR="$PROJECT_DIR/raw_data" diff --git a/projects/example_01/benchmark/data/corpus.parquet b/projects/example_01/benchmark/data/corpus.parquet new file mode 100644 index 0000000..2b7726f Binary files /dev/null and b/projects/example_01/benchmark/data/corpus.parquet differ diff --git a/projects/example_01/benchmark/data/qa.parquet b/projects/example_01/benchmark/data/qa.parquet new file mode 100644 index 0000000..4a90b0b Binary files /dev/null and b/projects/example_01/benchmark/data/qa.parquet differ diff --git a/projects/src/check_corpus_ids.py b/projects/src/check_corpus_ids.py deleted file mode 100644 index c6e997e..0000000 --- a/projects/src/check_corpus_ids.py +++ /dev/null @@ -1,12 +0,0 @@ -import pandas as pd - -# corpus.parquet ํŒŒ์ผ ๋กœ๋“œ -corpus_path = "./original/corpus.parquet" -corpus_data = pd.read_parquet(corpus_path) - -# ํŠน์ • ๋ฌธ์„œ ID๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ -doc_id = "bac7dea6-9477-4290-b57b-861548f7020d" -print(doc_id in corpus_data['doc_id'].values) # True๋ฉด ์กด์žฌ, False๋ฉด ์—†์Œ - -# corpus_data์˜ ์ฒซ 5๊ฐœ ๋ฐ์ดํ„ฐ ํ™•์ธ -print(corpus_data.head()) \ No newline at end of file diff --git a/projects/src/check_vectordb_corpus.py b/projects/src/check_vectordb_corpus.py deleted file mode 100644 index c9da3c0..0000000 --- a/projects/src/check_vectordb_corpus.py +++ /dev/null @@ -1,14 +0,0 @@ -from chromadb import PersistentClient - -# ChromaDB ํด๋ผ์ด์–ธํŠธ ์—ฐ๊ฒฐ -client = PersistentClient(path="./report_01/chroma") - -# ์ปฌ๋ ‰์…˜ ๋ชฉ๋ก ํ™•์ธ -print(client.list_collections()) - -# 'document_collection' ์ปฌ๋ ‰์…˜์—์„œ ๋ฐ์ดํ„ฐ ์กฐํšŒ -collection = client.get_collection("document_collection") - -# ์ €์žฅ๋œ ๋ชจ๋“  ๋ฌธ์„œ ID ์กฐํšŒ -stored_docs = collection.get(include=["ids"]) -print("Stored Document IDs:", stored_docs) diff --git a/projects/src/check_vectordb_ingestion.py b/projects/src/check_vectordb_ingestion.py deleted file mode 100644 index 78db362..0000000 --- a/projects/src/check_vectordb_ingestion.py +++ /dev/null @@ -1,11 +0,0 @@ -import chromadb - -# ChromaDB ์—ฐ๊ฒฐ -client = chromadb.PersistentClient(path="./report_01/chroma") -collection = client.get_collection("document_collection") - -# ์ €์žฅ๋œ ๋ฌธ์„œ ๊ฐœ์ˆ˜ ํ™•์ธ -print("Stored Document Count:", len(collection.get(include=['ids'])['ids'])) - -# ์ผ๋ถ€ ๋ฌธ์„œ ID ํ™•์ธ -print("Example Document IDs:", collection.get(include=['ids'], limit=5)['ids']) diff --git a/projects/src/convert_parquet_to_json.py b/projects/src/convert_parquet_to_json.py deleted file mode 100644 index 0059d9d..0000000 --- a/projects/src/convert_parquet_to_json.py +++ /dev/null @@ -1,21 +0,0 @@ -import os -import pandas as pd - -SOURCE_DIR = "/usr/src/app/projects/daesan-dangjin_01" -TARGET_DIR = "/usr/src/app/projects/daesan-dangjin_01/json" -os.makedirs(TARGET_DIR, exist_ok=True) - -parquet_files = [f for f in os.listdir(SOURCE_DIR) if f.endswith(".parquet")] - - -for file in parquet_files: - parquet_path = os.path.join(SOURCE_DIR, file) - json_filename = os.path.splitext(file)[0] + ".json" - json_path = os.path.join(TARGET_DIR, json_filename) - - df = pd.read_parquet(parquet_path, engine="pyarrow") - df.to_json(json_path, orient="records", force_ascii=False, indent=2) - - print(f"โœ… ๋ณ€ํ™˜ ์™„๋ฃŒ: {json_path}") - -print(f"๐Ÿ“ ๋ชจ๋“  Parquet ํŒŒ์ผ์ด JSON์œผ๋กœ ๋ณ€ํ™˜๋˜์–ด {TARGET_DIR}์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.") diff --git a/requirements.txt b/requirements.txt index 6378e06..080e917 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,65 +1,362 @@ -pydantic<2.10.0 # incompatible with llama index -numpy<2.0.0 # temporal not using numpy 2.0.0 -pandas>=2.1.0 -tqdm -tiktoken>=0.7.0 # for counting token -openai>=1.0.0 -rank_bm25 # for bm25 retrieval -pyyaml # for yaml file -pyarrow # for pandas with parquet -fastparquet # for pandas with parquet -sacrebleu # for bleu score -evaluate # for meteor and other scores -rouge_score # for rouge score -rich # for pretty logging -click # for cli -cohere>=5.8.0 # for cohere services -tokenlog>=0.0.2 # for token logging -aiohttp # for async http requests -voyageai # for voyageai reranker -mixedbread-ai # for mixedbread-ai reranker -llama-index-llms-bedrock -scikit-learn -emoji - -### Vector DB ### -pymilvus>=2.3.0 # for using milvus vectordb -chromadb>=0.5.0 # for chroma vectordb -weaviate-client # for weaviate vectordb -pinecone[grpc] # for pinecone vectordb -couchbase # for couchbase vectordb -qdrant-client # for qdrant vectordb - -### API server ### -quart -pyngrok -### LlamaIndex ### -llama-index>=0.11.0 -llama-index-core>=0.11.0 -# readers -llama-index-readers-file -# Embeddings -llama-index-embeddings-openai -llama-index-embeddings-ollama -# LLMs -llama-index-llms-openai>=0.2.7 -llama-index-llms-openai-like -# Retriever -llama-index-retrievers-bm25 - -# WebUI -streamlit -gradio - -### Langchain ### -langchain-core==0.3.0 -langchain-unstructured>=0.1.5 -langchain-upstage -langchain-community>=0.3.0 - -# autorag dashboard -panel -seaborn -ipykernel -ipywidgets -ipywidgets_bokeh \ No newline at end of file +about-time==4.2.1 +absl-py==2.1.0 +accelerate==1.5.2 +aiofiles==23.2.1 +aiohappyeyeballs==2.6.1 +aiohttp==3.11.14 +aiolimiter==1.2.1 +aiosignal==1.3.2 +alive-progress==3.2.0 +altair==5.5.0 +annotated-types==0.7.0 +anthropic==0.28.1 +anyio==4.9.0 +asgiref==3.8.1 +asttokens==3.0.0 +async-timeout==4.0.3 +attrs==25.3.0 +Authlib==1.3.1 +autograd==1.7.0 +AutoRAG==0.3.13 +backoff==2.2.1 +bcrypt==4.3.0 +beautifulsoup4==4.13.3 +bert-score==0.3.13 +bleach==6.2.0 +blinker==1.9.0 +bm25s==0.2.9 +bokeh==3.6.3 +boto3==1.37.14 +botocore==1.37.14 +build==1.2.2.post1 +cachetools==5.5.2 +cbor==1.0.0 +certifi==2025.1.31 +cffi==1.17.1 +charset-normalizer==3.4.1 +chroma-hnswlib==0.7.6 +chromadb==0.6.3 +click==8.1.8 +cma==3.2.2 +cohere==5.14.0 +colorama==0.4.6 +coloredlogs==15.0.1 +comm==0.2.2 +contourpy==1.3.1 +couchbase==4.3.5 +cramjam==2.9.1 +cryptography==44.0.2 +cycler==0.12.1 +dataclasses-json==0.6.7 +datasets==3.2.0 +debugpy==1.8.13 +decorator==5.2.1 +Deprecated==1.2.18 +dill==0.3.8 +dirtyjson==1.0.8 +distro==1.9.0 +durationpy==0.9 +emoji==2.14.1 +eval_type_backport==0.2.2 +evaluate==0.4.3 +exceptiongroup==1.2.2 +executing==2.2.0 +fastapi==0.115.11 +fastavro==1.10.0 +fastparquet==2024.11.0 +ffmpy==0.5.0 +filelock==3.18.0 +filetype==1.2.0 +FlagEmbedding==1.3.4 +Flask==3.1.0 +flatbuffers==25.2.10 +fonttools==4.56.0 +frozenlist==1.5.0 +fsspec==2024.9.0 +gitdb==4.0.12 +GitPython==3.1.44 +google-auth==2.38.0 +googleapis-common-protos==1.69.2 +gradio==4.44.1 +gradio_client==1.3.0 +grapheme==0.6.0 +greenlet==3.1.1 +groovy==0.1.2 +grpcio==1.67.1 +grpcio-health-checking==1.67.1 +grpcio-tools==1.67.1 +h11==0.14.0 +h2==4.2.0 +hpack==4.1.0 +httpcore==1.0.7 +httptools==0.6.4 +httpx==0.27.2 +httpx-sse==0.4.0 +huggingface-hub==0.23.5 +humanfriendly==10.0 +Hypercorn==0.17.3 +hyperframe==6.1.0 +idna==3.10 +ijson==3.3.0 +importlib_metadata==8.6.1 +importlib_resources==6.5.2 +inscriptis==2.5.3 +ipykernel==6.29.5 +ipython==8.34.0 +ipywidgets==8.1.5 +ipywidgets_bokeh==1.6.0 +ir_datasets==0.5.10 +itsdangerous==2.2.0 +jedi==0.19.2 +Jinja2==3.1.6 +jiter==0.9.0 +jmespath==1.0.1 +joblib==1.4.2 +jpype1==1.5.2 +jsonpatch==1.33 +jsonpath-python==1.0.6 +jsonpointer==3.0.0 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +jstyleson==0.0.2 +jupyter_client==8.6.3 +jupyter_core==5.7.2 +jupyterlab_widgets==3.0.13 +kiwipiepy==0.20.4 +kiwipiepy_model==0.20.0 +kiwisolver==1.4.8 +konlpy==0.6.0 +kubernetes==32.0.1 +langchain==0.3.20 +langchain-community==0.3.19 +langchain-core==0.3.45 +langchain-openai==0.3.9 +langchain-text-splitters==0.3.6 +langchain-unstructured==0.1.6 +langchain-upstage==0.6.0 +langsmith==0.3.15 +linkify-it-py==2.0.3 +llama-cloud==0.1.14 +llama-cloud-services==0.6.5 +llama-index==0.11.22 +llama-index-agent-openai==0.3.4 +llama-index-cli==0.3.1 +llama-index-core==0.11.23 +llama-index-embeddings-huggingface==0.3.1 +llama-index-embeddings-ollama==0.3.1 +llama-index-embeddings-openai==0.2.5 +llama-index-indices-managed-llama-cloud==0.6.0 +llama-index-legacy==0.9.48.post4 +llama-index-llms-anthropic==0.2.1 +llama-index-llms-bedrock==0.2.6 +llama-index-llms-huggingface==0.3.5 +llama-index-llms-ollama==0.3.6 +llama-index-llms-openai==0.2.16 +llama-index-llms-openai-like==0.2.0 +llama-index-multi-modal-llms-openai==0.2.3 +llama-index-program-openai==0.2.0 +llama-index-question-gen-openai==0.2.0 +llama-index-readers-file==0.2.2 +llama-index-readers-llama-parse==0.3.0 +llama-index-retrievers-bm25==0.4.0 +llama-parse==0.6.4.post1 +llmlingua==0.2.2 +lxml==5.3.1 +lz4==4.4.3 +Markdown==3.7 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +marshmallow==3.26.1 +matplotlib==3.10.1 +matplotlib-inline==0.1.7 +mdit-py-plugins==0.4.2 +mdurl==0.1.2 +milvus-lite==2.4.11 +minijinja==2.8.0 +mixedbread-ai==2.2.6 +mmh3==5.1.0 +monotonic==1.6 +mpmath==1.3.0 +multidict==6.2.0 +multiprocess==0.70.16 +mypy-extensions==1.0.0 +narwhals==1.31.0 +natsort==8.4.0 +nest-asyncio==1.6.0 +networkx==3.3 +ninja==1.11.1.3 +nltk==3.9.1 +nncf==2.15.0 +numpy==1.26.4 +nvidia-cublas-cu12==12.4.5.8 +nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cufft-cu12==11.2.1.3 +nvidia-curand-cu12==10.3.5.147 +nvidia-cusolver-cu12==11.6.1.9 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cusparselt-cu12==0.6.2 +nvidia-nccl-cu12==2.21.5 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.4.127 +oauthlib==3.2.2 +ollama==0.3.3 +onnx==1.17.0 +onnxruntime==1.19.2 +openai==1.66.3 +opentelemetry-api==1.31.0 +opentelemetry-exporter-otlp-proto-common==1.31.0 +opentelemetry-exporter-otlp-proto-grpc==1.31.0 +opentelemetry-instrumentation==0.52b0 +opentelemetry-instrumentation-asgi==0.52b0 +opentelemetry-instrumentation-fastapi==0.52b0 +opentelemetry-proto==1.31.0 +opentelemetry-sdk==1.31.0 +opentelemetry-semantic-conventions==0.52b0 +opentelemetry-util-http==0.52b0 +openvino==2025.0.0 +openvino-telemetry==2025.1.0 +openvino-tokenizers==2025.0.0.0 +optimum==1.24.0 +optimum-intel==1.22.0 +orjson==3.10.15 +overrides==7.7.0 +packaging==24.2 +pandas==2.2.3 +panel==1.6.1 +param==2.2.0 +parso==0.8.4 +pdfminer.six==20231228 +pdfplumber==0.11.5 +peft==0.13.2 +pexpect==4.9.0 +pillow==10.4.0 +pinecone==6.0.2 +pinecone-plugin-interface==0.0.7 +platformdirs==4.3.6 +portalocker==2.10.1 +posthog==3.21.0 +priority==2.0.0 +prompt_toolkit==3.0.50 +propcache==0.3.0 +protobuf==5.29.3 +protoc-gen-openapiv2==0.0.1 +psutil==7.0.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +py-cpuinfo==9.0.0 +pyarrow==19.0.1 +pyasn1==0.6.1 +pyasn1_modules==0.4.1 +pycparser==2.22 +pydantic==2.9.2 +pydantic-settings==2.8.1 +pydantic_core==2.23.4 +pydeck==0.9.1 +pydot==2.0.0 +pydub==0.25.1 +Pygments==2.19.1 +pymilvus==2.5.5 +pymoo==0.6.1.3 +pyngrok==7.2.3 +pyparsing==3.2.1 +pypdf==4.3.1 +pypdfium2==4.30.1 +PyPika==0.48.9 +pyproject_hooks==1.2.0 +PyStemmer==2.2.0.3 +python-dateutil==2.8.2 +python-dotenv==1.0.1 +python-multipart==0.0.20 +pytz==2025.1 +pyviz_comms==3.0.4 +PyYAML==6.0.2 +pyzmq==26.3.0 +qdrant-client==1.13.3 +Quart==0.20.0 +rank-bm25==0.2.2 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +requests-oauthlib==2.0.0 +requests-toolbelt==1.0.0 +rich==13.9.4 +rouge_score==0.1.2 +rpds-py==0.23.1 +rsa==4.9 +ruff==0.11.0 +s3transfer==0.11.4 +sacrebleu==2.5.1 +safehttpx==0.1.6 +safetensors==0.5.3 +scikit-learn==1.6.1 +scipy==1.15.2 +seaborn==0.13.2 +semantic-version==2.10.0 +sentence-transformers==3.4.1 +sentencepiece==0.2.0 +setuptools-scm==8.2.0 +shellingham==1.5.4 +six==1.17.0 +smmap==5.0.2 +sniffio==1.3.1 +soupsieve==2.6 +SQLAlchemy==2.0.39 +stack-data==0.6.3 +starlette==0.46.1 +streamlit==1.43.2 +striprtf==0.0.26 +sympy==1.13.1 +tabulate==0.9.0 +taskgroup==0.2.2 +tenacity==8.5.0 +text-generation==0.7.0 +threadpoolctl==3.6.0 +tiktoken==0.9.0 +tokenizers==0.19.1 +tokenlog==0.0.2 +toml==0.10.2 +tomli==2.2.1 +tomlkit==0.12.0 +torch==2.6.0 +tornado==6.4.2 +tqdm==4.67.1 +traitlets==5.14.3 +transformers==4.44.2 +trec-car-tools==2.6 +triton==3.2.0 +typer==0.15.2 +types-requests==2.32.0.20250306 +typing-inspect==0.9.0 +typing_extensions==4.12.2 +tzdata==2025.1 +uc-micro-py==1.0.3 +ujson==5.10.0 +unlzw3==0.2.3 +unstructured-client==0.27.0 +urllib3==2.3.0 +uvicorn==0.34.0 +uvloop==0.21.0 +validators==0.34.0 +voyageai==0.3.2 +warc3-wet==0.2.5 +warc3-wet-clueweb09==0.2.5 +watchdog==6.0.0 +watchfiles==1.0.4 +wcwidth==0.2.13 +weaviate-client==4.11.1 +webencodings==0.5.1 +websocket-client==1.8.0 +websockets==12.0 +Werkzeug==3.1.3 +widgetsnbextension==4.0.13 +wrapt==1.17.2 +wsproto==1.2.0 +xxhash==3.5.0 +xyzservices==2025.1.0 +yarl==1.18.3 +zipp==3.21.0 +zlib-state==0.1.9 +zstandard==0.23.0 \ No newline at end of file diff --git a/requirements_custom.txt b/requirements_custom.txt deleted file mode 100644 index 35bda56..0000000 --- a/requirements_custom.txt +++ /dev/null @@ -1,9 +0,0 @@ -# added library - ๊น€์šฉ์—ฐ -llama_index.llms.ollama -llama_index.embeddings.huggingface -pdfplumber -pypdfium2 -pymupdf -AutoRAG[gpu] -AutoRAG[ko] -konlpy \ No newline at end of file