def clean_value(value): """ This function cleans a value to remove illegal characters """ if isinstance(value, str): # Define a regex pattern to match illegal characters illegal_chars_pattern = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]') # Replace illegal characters with an empty string cleaned_value = illegal_chars_pattern.sub('', value) if cleaned_value != value: print(f"Illegal characters found in '{value}'. \nCleaned value: '{cleaned_value}'") return cleaned_value return value
def clean_dataframe(df): """ This function cleans all values in a DataFrame to remove illegal characters """ return df.applymap(clean_value)
data = [] with open(file_path, 'r', encoding='utf-8') as file: for line in file: # Parse each JSON object data.append(json.loads(line))