12 lines
516 B
Python
12 lines
516 B
Python
def fetch_article_content(url: str, source: str) -> str:
|
|
"""단일 기사 본문 추출"""
|
|
try:
|
|
resp = requests.get(url, verify=False, timeout=10)
|
|
resp.encoding = 'utf-8'
|
|
resp.raise_for_status()
|
|
soup = BeautifulSoup(resp.text, 'html.parser')
|
|
paragraphs = soup.find_all('p')
|
|
content = ' '.join(clean_text(p.get_text()) for p in paragraphs)
|
|
content = content.replace('\n', ' ')
|
|
if not content.strip():
|
|
logging.warning(f'No content for |