13 lines
556 B
Python
13 lines
556 B
Python
def clean_text(text):
|
|
replacements = {
|
|
' ': ' ', '‘': "'", '’': "'", '“': '"', '”': '"',
|
|
'&': '&', '<': '<', '>': '>', ''': "'",
|
|
'"' : "'", '·': "'"
|
|
}
|
|
|
|
for entity, replacement in replacements.items():
|
|
text = text.replace(entity, replacement)
|
|
|
|
text = re.sub(r'<[^>]+>', '', text)
|
|
text = re.sub(r'\(엔지니어링데일리\).*?기자=', '', text) # (엔지니어링데일리) *** 기자= 패턴 삭제
|
|
text = re.sub(r'\[국토일보\s.*? |