Files
_Geulbeot/02. Prompts/문서생성/codedomain/엔지니어링데일리_기자_Python_v01.py

13 lines
556 B
Python

def clean_text(text):
replacements = {
' ': ' ', '‘': "'", '’': "'", '“': '"', '”': '"',
'&amp;': '&', '&lt;': '<', '&gt;': '>', '&#39;': "'",
'&quot;' : "'", '&middot;': "'"
}
for entity, replacement in replacements.items():
text = text.replace(entity, replacement)
text = re.sub(r'<[^>]+>', '', text)
text = re.sub(r'\(엔지니어링데일리\).*?기자=', '', text) # (엔지니어링데일리) *** 기자= 패턴 삭제
text = re.sub(r'\[국토일보\s.*?