18 lines
540 B
Python
18 lines
540 B
Python
def fetch_articles(
|
|
base_url: str,
|
|
article_sel: str,
|
|
title_sel: str,
|
|
date_sel: str,
|
|
start_page: int,
|
|
end_page: int,
|
|
source: str,
|
|
url_prefix: str = '',
|
|
date_fmt_func=None
|
|
) -> list:
|
|
"""리스트 페이지 순회하며 메타데이터 및 본문 수집"""
|
|
results = []
|
|
for page in range(start_page, end_page + 1):
|
|
try:
|
|
page_url = f"{base_url}{page}"
|
|
resp = requests.get(page_url, verify=False, timeout=10)
|
|
soup = BeautifulSoup(resp.text, 'html.parser |