Files
_Geulbeot/02. Prompts/문서생성/codedomain/리스트_페이지_Python_v01.py

18 lines
540 B
Python

def fetch_articles(
base_url: str,
article_sel: str,
title_sel: str,
date_sel: str,
start_page: int,
end_page: int,
source: str,
url_prefix: str = '',
date_fmt_func=None
) -> list:
"""리스트 페이지 순회하며 메타데이터 및 본문 수집"""
results = []
for page in range(start_page, end_page + 1):
try:
page_url = f"{base_url}{page}"
resp = requests.get(page_url, verify=False, timeout=10)
soup = BeautifulSoup(resp.text, 'html.parser