support DDP in webui

This commit is contained in:
hiyouga
2024-05-28 19:24:22 +08:00
parent c8765349ba
commit 7c016b22aa
19 changed files with 78 additions and 166 deletions

View File

@@ -6,10 +6,9 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional
import psutil
from transformers.trainer import TRAINING_ARGS_NAME
from transformers.utils import is_torch_cuda_available
from ..extras.constants import TRAINING_STAGES
from ..extras.misc import get_device_count, torch_gc
from ..extras.misc import is_gpu_or_npu_available, torch_gc
from ..extras.packages import is_gradio_available
from .common import get_module, get_save_dir, load_args, load_config, save_args
from .locales import ALERTS
@@ -64,16 +63,13 @@ class Runner:
if not from_preview and self.demo_mode:
return ALERTS["err_demo"][lang]
if not from_preview and get_device_count() > 1:
return ALERTS["err_device_count"][lang]
if do_train:
stage = TRAINING_STAGES[get("train.training_stage")]
reward_model = get("train.reward_model")
if stage == "ppo" and not reward_model:
return ALERTS["err_no_reward_model"][lang]
if not from_preview and not is_torch_cuda_available():
if not from_preview and not is_gpu_or_npu_available():
gr.Warning(ALERTS["warn_no_cuda"][lang])
return ""
@@ -273,7 +269,6 @@ class Runner:
self.do_train, self.running_data = do_train, data
args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
env = deepcopy(os.environ)
env["CUDA_VISIBLE_DEVICES"] = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
env["LLAMABOARD_ENABLED"] = "1"
self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
yield from self.monitor()