change to right-padding, update reward score #803

This commit is contained in:
hiyouga
2023-09-08 20:04:31 +08:00
parent 8aaaa132d4
commit 8ea32e4046
15 changed files with 97 additions and 59 deletions

View File

@@ -1,5 +1,4 @@
# Inspired by:
# https://github.com/lvwerra/trl/blob/main/examples/summarization/scripts/reward_summarization.py
# https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
from typing import TYPE_CHECKING, Optional, List