Files
_Geulbeot/02. Prompts/문서생성/codedomain/단일_기사_Python_v01.py

12 lines
516 B
Python

def fetch_article_content(url: str, source: str) -> str:
"""단일 기사 본문 추출"""
try:
resp = requests.get(url, verify=False, timeout=10)
resp.encoding = 'utf-8'
resp.raise_for_status()
soup = BeautifulSoup(resp.text, 'html.parser')
paragraphs = soup.find_all('p')
content = ' '.join(clean_text(p.get_text()) for p in paragraphs)
content = content.replace('\n', ' ')
if not content.strip():
logging.warning(f'No content for