WebscrapperPython/gelbeseiten_Adresse_bereinigen.py
2025-01-25 00:08:28 +01:00

66 lines
2.0 KiB
Python

import re
import mysql.connector
# MySQL-Verbindungsdetails
DB_HOST = "192.168.178.201"
DB_USER = "gelbeseiten"
DB_PASSWORD = "Gm4bBE62gXCSVVY2"
DB_NAME = "domainchecker"
# Verbindung zur Datenbank herstellen
def connect_to_database():
return mysql.connector.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASSWORD,
database=DB_NAME
)
# Daten auslesen
def get_data_from_database(cursor):
cursor.execute("SELECT id, adresse FROM gelbeseiten WHERE adresse IS NOT NULL AND adresse != ''")
return cursor.fetchall()
# Adresse aufteilen und aktualisieren
def update_address_in_database(cursor, record_id, street, plz, city):
cursor.execute("UPDATE `gelbeseiten` SET adresse = %s, plz = %s, ort = %s WHERE `id` = %s", (street, plz, city, record_id))
# Hauptfunktion zur Verarbeitung
def process_addresses_and_update_db(conn):
cursor = conn.cursor()
# Daten auslesen
records = get_data_from_database(cursor)
print(f"{len(records)} Einträge gefunden.")
# Regex-Pattern
pattern = r"^(.*?),\s+(\d{5})\s+([^0-9]+)\s+\d+,\d+\s+km$"
for record in records:
record_id, address_text = record
match = re.match(pattern, address_text)
if match:
street = match.group(1).strip() # Straße
plz = match.group(2) # PLZ
city = match.group(3).replace("\t", "").replace("\n", "").strip() # Ort
# Adresse in der Datenbank aktualisieren
update_address_in_database(cursor, record_id, street, plz, city)
conn.commit()
else:
print(f"ID {record_id} - Kein Match für Adresse: {address_text}")
conn.commit()
cursor.close()
print("Alle Adressen wurden verarbeitet.")
if __name__ == "__main__":
# Verbindung zur Datenbank
conn = connect_to_database()
try:
process_addresses_and_update_db(conn)
except Exception as e:
print(f"Fehler: {e}")
finally:
conn.close()