Initial commit: PDF zu ICS Konverter
This commit is contained in:
275
pdf_to_ics.py
Normal file
275
pdf_to_ics.py
Normal file
@@ -0,0 +1,275 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PDF zu ICS Konverter für Dienstpläne
|
||||
Extrahiert Schichtinformationen aus PDFs und erstellt iCalendar-Dateien
|
||||
"""
|
||||
|
||||
import pdfplumber
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from icalendar import Calendar, Event
|
||||
from pathlib import Path
|
||||
import pytz
|
||||
|
||||
|
||||
def extract_dienstplan_data(pdf_path):
|
||||
"""
|
||||
Extrahiert Dienstplan-Daten aus einer PDF-Datei
|
||||
"""
|
||||
dienstplan = {
|
||||
'name': None,
|
||||
'vorname': None,
|
||||
'personalnummer': None,
|
||||
'betriebshof': None,
|
||||
'sollarbeitszeit': None,
|
||||
'monat_start': None,
|
||||
'monat_end': None,
|
||||
'events': []
|
||||
}
|
||||
|
||||
with pdfplumber.open(pdf_path) as pdf:
|
||||
if not pdf.pages:
|
||||
return dienstplan
|
||||
|
||||
page = pdf.pages[0]
|
||||
text = page.extract_text()
|
||||
|
||||
# Extrahiere Metadaten
|
||||
match = re.search(r'Nachname\s+(\S+)\s+Sollarbeitszeit\s+([\d:]+)', text)
|
||||
if match:
|
||||
dienstplan['name'] = match.group(1)
|
||||
dienstplan['sollarbeitszeit'] = match.group(2)
|
||||
|
||||
match = re.search(r'Vorname\s+(\S+)', text)
|
||||
if match:
|
||||
dienstplan['vorname'] = match.group(1)
|
||||
|
||||
match = re.search(r'Personalnummer\s+(\d+)', text)
|
||||
if match:
|
||||
dienstplan['personalnummer'] = match.group(1)
|
||||
|
||||
match = re.search(r'Betriebshof\s+(\S+)', text)
|
||||
if match:
|
||||
dienstplan['betriebshof'] = match.group(1)
|
||||
|
||||
# Extrahiere Datum-Range
|
||||
match = re.search(r'(\d+)\.\s+(\w+)\s+(\d{4})\s+-\s+(\d+)\.\s+(\w+)\s+(\d{4})', text)
|
||||
if match:
|
||||
start_date_str = f"{match.group(1)}.{match.group(2)}.{match.group(3)}"
|
||||
dienstplan['monat_start'] = start_date_str
|
||||
|
||||
# Extrahiere Events aus der Tabelle
|
||||
tables = page.extract_tables()
|
||||
if len(tables) >= 2:
|
||||
events = parse_dienstplan_table(tables[1], dienstplan['monat_start'])
|
||||
dienstplan['events'] = events
|
||||
|
||||
return dienstplan
|
||||
|
||||
|
||||
def parse_dienstplan_table(table, month_start_str):
|
||||
"""
|
||||
Parst die Dienstplan-Tabelle und extrahiert Events
|
||||
"""
|
||||
events = []
|
||||
|
||||
if not month_start_str:
|
||||
return events
|
||||
|
||||
# Parse das Startdatum (z.B. "1.März.2026")
|
||||
date_parts = month_start_str.split('.')
|
||||
if len(date_parts) != 3:
|
||||
return events
|
||||
|
||||
try:
|
||||
day = int(date_parts[0])
|
||||
month_name = date_parts[1]
|
||||
year = int(date_parts[2])
|
||||
except:
|
||||
return events
|
||||
|
||||
# Konvertiere Monatsnamen zu Nummern
|
||||
months = {
|
||||
'Januar': 1, 'Februar': 2, 'März': 3, 'April': 4, 'Mai': 5, 'Juni': 6,
|
||||
'Juli': 7, 'August': 8, 'September': 9, 'Oktober': 10,
|
||||
'November': 11, 'Dezember': 12
|
||||
}
|
||||
|
||||
month = months.get(month_name, 1)
|
||||
|
||||
# Erstelle Basis-Datum
|
||||
base_date = datetime(year, month, day)
|
||||
|
||||
# Überspringe die Header-Zeile (Montag, Dienstag, etc.)
|
||||
for row_idx in range(1, len(table)):
|
||||
row = table[row_idx]
|
||||
|
||||
# Iteriere über die 7 Wochentage
|
||||
for day_idx, cell in enumerate(row):
|
||||
if cell is None:
|
||||
continue
|
||||
|
||||
# Zelle kann mehrere Zeilen enthalten (Tag\nDienst\nZeit)
|
||||
lines = cell.strip().split('\n')
|
||||
|
||||
if not lines or not lines[0]:
|
||||
continue
|
||||
|
||||
# Erste Zeile ist der Tag
|
||||
try:
|
||||
day_num = int(lines[0].strip())
|
||||
except:
|
||||
continue
|
||||
|
||||
# Berechne das Datum
|
||||
event_date = base_date + timedelta(days=day_num - 1)
|
||||
|
||||
# Extrahiere Dienstart und Zeit
|
||||
service_code = ""
|
||||
start_time = None
|
||||
end_time = None
|
||||
|
||||
if len(lines) > 1:
|
||||
# Suche nach Zeitangaben (HH:MM-HH:MM)
|
||||
for line in lines[1:]:
|
||||
time_match = re.match(r'(\d{2}):(\d{2})-(\d{2}):(\d{2})', line.strip())
|
||||
if time_match:
|
||||
start_time = f"{time_match.group(1)}:{time_match.group(2)}"
|
||||
end_time = f"{time_match.group(3)}:{time_match.group(4)}"
|
||||
else:
|
||||
# Das ist der Dienstart
|
||||
if not service_code:
|
||||
service_code = line.strip()
|
||||
|
||||
# Erstelle Event
|
||||
event = {
|
||||
'date': event_date,
|
||||
'service': service_code,
|
||||
'start_time': start_time,
|
||||
'end_time': end_time
|
||||
}
|
||||
|
||||
events.append(event)
|
||||
|
||||
return events
|
||||
|
||||
|
||||
def create_ics_from_dienstplan(dienstplan, output_path=None):
|
||||
"""
|
||||
Erstellt eine ICS-Datei aus den Dienstplan-Daten
|
||||
"""
|
||||
# Erstelle Calendar
|
||||
cal = Calendar()
|
||||
cal.add('prodid', '-//Dienstplan Importer//de')
|
||||
cal.add('version', '2.0')
|
||||
cal.add('calscale', 'GREGORIAN')
|
||||
cal.add('method', 'PUBLISH')
|
||||
|
||||
# Timezone
|
||||
tz = pytz.timezone('Europe/Berlin')
|
||||
|
||||
# Füge Events hinzu
|
||||
for event_data in dienstplan['events']:
|
||||
if not event_data['service']:
|
||||
continue
|
||||
|
||||
event = Event()
|
||||
|
||||
# Titel - nur den Dienstart
|
||||
service_type = event_data['service']
|
||||
title = f"Dienst: {service_type}"
|
||||
|
||||
event.add('summary', title)
|
||||
|
||||
# Beschreibung
|
||||
description = f"Dienstart: {service_type}"
|
||||
if dienstplan['betriebshof']:
|
||||
description += f"\nBetriebshof: {dienstplan['betriebshof']}"
|
||||
|
||||
event.add('description', description)
|
||||
|
||||
# Datum und Zeit
|
||||
event_date = event_data['date']
|
||||
|
||||
if event_data['start_time'] and event_data['end_time']:
|
||||
# Mit Uhrzeit
|
||||
try:
|
||||
start_hour = int(event_data['start_time'][:2])
|
||||
start_min = int(event_data['start_time'][3:5])
|
||||
end_hour = int(event_data['end_time'][:2])
|
||||
end_min = int(event_data['end_time'][3:5])
|
||||
|
||||
# Wenn Endzeit kleiner als Startzeit, läuft Schicht in nächsten Tag
|
||||
if end_hour < start_hour:
|
||||
end_date = event_date + timedelta(days=1)
|
||||
else:
|
||||
end_date = event_date
|
||||
|
||||
start_dt = event_date.replace(hour=start_hour, minute=start_min, second=0)
|
||||
end_dt = end_date.replace(hour=end_hour, minute=end_min, second=0)
|
||||
|
||||
event.add('dtstart', tz.localize(start_dt))
|
||||
event.add('dtend', tz.localize(end_dt))
|
||||
except:
|
||||
event.add('dtstart', event_date.date())
|
||||
else:
|
||||
# Nur Datum (Ganztagesveranstaltung)
|
||||
event.add('dtstart', event_date.date())
|
||||
event.add('dtend', (event_date + timedelta(days=1)).date())
|
||||
|
||||
# UID und Metadaten
|
||||
event.add('uid', f"{event_date.isoformat()}-{event_data['service']}-{dienstplan.get('personalnummer', 'unknown')}@dienstplan")
|
||||
event.add('created', datetime.now(tz))
|
||||
event.add('dtstamp', datetime.now(tz))
|
||||
|
||||
cal.add_component(event)
|
||||
|
||||
# Speichere ICS-Datei
|
||||
if not output_path:
|
||||
output_path = 'dienstplan.ics'
|
||||
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(cal.to_ical())
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Hauptfunktion
|
||||
"""
|
||||
import sys
|
||||
|
||||
# Finde alle PDF-Dateien im aktuellen Verzeichnis
|
||||
pdf_files = list(Path('.').glob('*.pdf'))
|
||||
|
||||
if not pdf_files:
|
||||
print("Keine PDF-Dateien gefunden!")
|
||||
return
|
||||
|
||||
for pdf_file in pdf_files:
|
||||
print(f"\nVerarbeite: {pdf_file}")
|
||||
|
||||
try:
|
||||
# Extrahiere Daten
|
||||
dienstplan = extract_dienstplan_data(str(pdf_file))
|
||||
|
||||
print(f"Name: {dienstplan['vorname']} {dienstplan['name']}")
|
||||
print(f"Personalnummer: {dienstplan['personalnummer']}")
|
||||
print(f"Betriebshof: {dienstplan['betriebshof']}")
|
||||
print(f"Anzahl der Events: {len(dienstplan['events'])}")
|
||||
|
||||
# Erstelle ICS-Datei
|
||||
ics_path = pdf_file.with_suffix('.ics')
|
||||
create_ics_from_dienstplan(dienstplan, str(ics_path))
|
||||
|
||||
print(f"✓ ICS-Datei erstellt: {ics_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Fehler bei {pdf_file}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user