def clean_text(text): replacements = { ' ': ' ', '‘': "'", '’': "'", '“': '"', '”': '"', '&': '&', '<': '<', '>': '>', ''': "'", '"' : "'", '·': "'" } for entity, replacement in replacements.items(): text = text.replace(entity, replacement) text = re.sub(r'<[^>]+>', '', text) text = re.sub(r'\(엔지니어링데일리\).*?기자=', '', text) # (엔지니어링데일리) *** 기자= 패턴 삭제 text = re.sub(r'\[국토일보\s.*?