276 lines
8.5 KiB
Python
276 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
PDF zu ICS Konverter für Dienstpläne
|
|
Extrahiert Schichtinformationen aus PDFs und erstellt iCalendar-Dateien
|
|
"""
|
|
|
|
import pdfplumber
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from icalendar import Calendar, Event
|
|
from pathlib import Path
|
|
import pytz
|
|
|
|
|
|
def extract_dienstplan_data(pdf_path):
|
|
"""
|
|
Extrahiert Dienstplan-Daten aus einer PDF-Datei
|
|
"""
|
|
dienstplan = {
|
|
'name': None,
|
|
'vorname': None,
|
|
'personalnummer': None,
|
|
'betriebshof': None,
|
|
'sollarbeitszeit': None,
|
|
'monat_start': None,
|
|
'monat_end': None,
|
|
'events': []
|
|
}
|
|
|
|
with pdfplumber.open(pdf_path) as pdf:
|
|
if not pdf.pages:
|
|
return dienstplan
|
|
|
|
page = pdf.pages[0]
|
|
text = page.extract_text()
|
|
|
|
# Extrahiere Metadaten
|
|
match = re.search(r'Nachname\s+(\S+)\s+Sollarbeitszeit\s+([\d:]+)', text)
|
|
if match:
|
|
dienstplan['name'] = match.group(1)
|
|
dienstplan['sollarbeitszeit'] = match.group(2)
|
|
|
|
match = re.search(r'Vorname\s+(\S+)', text)
|
|
if match:
|
|
dienstplan['vorname'] = match.group(1)
|
|
|
|
match = re.search(r'Personalnummer\s+(\d+)', text)
|
|
if match:
|
|
dienstplan['personalnummer'] = match.group(1)
|
|
|
|
match = re.search(r'Betriebshof\s+(\S+)', text)
|
|
if match:
|
|
dienstplan['betriebshof'] = match.group(1)
|
|
|
|
# Extrahiere Datum-Range
|
|
match = re.search(r'(\d+)\.\s+(\w+)\s+(\d{4})\s+-\s+(\d+)\.\s+(\w+)\s+(\d{4})', text)
|
|
if match:
|
|
start_date_str = f"{match.group(1)}.{match.group(2)}.{match.group(3)}"
|
|
dienstplan['monat_start'] = start_date_str
|
|
|
|
# Extrahiere Events aus der Tabelle
|
|
tables = page.extract_tables()
|
|
if len(tables) >= 2:
|
|
events = parse_dienstplan_table(tables[1], dienstplan['monat_start'])
|
|
dienstplan['events'] = events
|
|
|
|
return dienstplan
|
|
|
|
|
|
def parse_dienstplan_table(table, month_start_str):
|
|
"""
|
|
Parst die Dienstplan-Tabelle und extrahiert Events
|
|
"""
|
|
events = []
|
|
|
|
if not month_start_str:
|
|
return events
|
|
|
|
# Parse das Startdatum (z.B. "1.März.2026")
|
|
date_parts = month_start_str.split('.')
|
|
if len(date_parts) != 3:
|
|
return events
|
|
|
|
try:
|
|
day = int(date_parts[0])
|
|
month_name = date_parts[1]
|
|
year = int(date_parts[2])
|
|
except:
|
|
return events
|
|
|
|
# Konvertiere Monatsnamen zu Nummern
|
|
months = {
|
|
'Januar': 1, 'Februar': 2, 'März': 3, 'April': 4, 'Mai': 5, 'Juni': 6,
|
|
'Juli': 7, 'August': 8, 'September': 9, 'Oktober': 10,
|
|
'November': 11, 'Dezember': 12
|
|
}
|
|
|
|
month = months.get(month_name, 1)
|
|
|
|
# Erstelle Basis-Datum
|
|
base_date = datetime(year, month, day)
|
|
|
|
# Überspringe die Header-Zeile (Montag, Dienstag, etc.)
|
|
for row_idx in range(1, len(table)):
|
|
row = table[row_idx]
|
|
|
|
# Iteriere über die 7 Wochentage
|
|
for day_idx, cell in enumerate(row):
|
|
if cell is None:
|
|
continue
|
|
|
|
# Zelle kann mehrere Zeilen enthalten (Tag\nDienst\nZeit)
|
|
lines = cell.strip().split('\n')
|
|
|
|
if not lines or not lines[0]:
|
|
continue
|
|
|
|
# Erste Zeile ist der Tag
|
|
try:
|
|
day_num = int(lines[0].strip())
|
|
except:
|
|
continue
|
|
|
|
# Berechne das Datum
|
|
event_date = base_date + timedelta(days=day_num - 1)
|
|
|
|
# Extrahiere Dienstart und Zeit
|
|
service_code = ""
|
|
start_time = None
|
|
end_time = None
|
|
|
|
if len(lines) > 1:
|
|
# Suche nach Zeitangaben (HH:MM-HH:MM)
|
|
for line in lines[1:]:
|
|
time_match = re.match(r'(\d{2}):(\d{2})-(\d{2}):(\d{2})', line.strip())
|
|
if time_match:
|
|
start_time = f"{time_match.group(1)}:{time_match.group(2)}"
|
|
end_time = f"{time_match.group(3)}:{time_match.group(4)}"
|
|
else:
|
|
# Das ist der Dienstart
|
|
if not service_code:
|
|
service_code = line.strip()
|
|
|
|
# Erstelle Event
|
|
event = {
|
|
'date': event_date,
|
|
'service': service_code,
|
|
'start_time': start_time,
|
|
'end_time': end_time
|
|
}
|
|
|
|
events.append(event)
|
|
|
|
return events
|
|
|
|
|
|
def create_ics_from_dienstplan(dienstplan, output_path=None):
|
|
"""
|
|
Erstellt eine ICS-Datei aus den Dienstplan-Daten
|
|
"""
|
|
# Erstelle Calendar
|
|
cal = Calendar()
|
|
cal.add('prodid', '-//Dienstplan Importer//de')
|
|
cal.add('version', '2.0')
|
|
cal.add('calscale', 'GREGORIAN')
|
|
cal.add('method', 'PUBLISH')
|
|
|
|
# Timezone
|
|
tz = pytz.timezone('Europe/Berlin')
|
|
|
|
# Füge Events hinzu
|
|
for event_data in dienstplan['events']:
|
|
if not event_data['service']:
|
|
continue
|
|
|
|
event = Event()
|
|
|
|
# Titel - nur den Dienstart
|
|
service_type = event_data['service']
|
|
title = f"Dienst: {service_type}"
|
|
|
|
event.add('summary', title)
|
|
|
|
# Beschreibung
|
|
description = f"Dienstart: {service_type}"
|
|
if dienstplan['betriebshof']:
|
|
description += f"\nBetriebshof: {dienstplan['betriebshof']}"
|
|
|
|
event.add('description', description)
|
|
|
|
# Datum und Zeit
|
|
event_date = event_data['date']
|
|
|
|
if event_data['start_time'] and event_data['end_time']:
|
|
# Mit Uhrzeit
|
|
try:
|
|
start_hour = int(event_data['start_time'][:2])
|
|
start_min = int(event_data['start_time'][3:5])
|
|
end_hour = int(event_data['end_time'][:2])
|
|
end_min = int(event_data['end_time'][3:5])
|
|
|
|
# Wenn Endzeit kleiner als Startzeit, läuft Schicht in nächsten Tag
|
|
if end_hour < start_hour:
|
|
end_date = event_date + timedelta(days=1)
|
|
else:
|
|
end_date = event_date
|
|
|
|
start_dt = event_date.replace(hour=start_hour, minute=start_min, second=0)
|
|
end_dt = end_date.replace(hour=end_hour, minute=end_min, second=0)
|
|
|
|
event.add('dtstart', tz.localize(start_dt))
|
|
event.add('dtend', tz.localize(end_dt))
|
|
except:
|
|
event.add('dtstart', event_date.date())
|
|
else:
|
|
# Nur Datum (Ganztagesveranstaltung)
|
|
event.add('dtstart', event_date.date())
|
|
event.add('dtend', (event_date + timedelta(days=1)).date())
|
|
|
|
# UID und Metadaten
|
|
event.add('uid', f"{event_date.isoformat()}-{event_data['service']}-{dienstplan.get('personalnummer', 'unknown')}@dienstplan")
|
|
event.add('created', datetime.now(tz))
|
|
event.add('dtstamp', datetime.now(tz))
|
|
|
|
cal.add_component(event)
|
|
|
|
# Speichere ICS-Datei
|
|
if not output_path:
|
|
output_path = 'dienstplan.ics'
|
|
|
|
with open(output_path, 'wb') as f:
|
|
f.write(cal.to_ical())
|
|
|
|
return output_path
|
|
|
|
|
|
def main():
|
|
"""
|
|
Hauptfunktion
|
|
"""
|
|
import sys
|
|
|
|
# Finde alle PDF-Dateien im aktuellen Verzeichnis
|
|
pdf_files = list(Path('.').glob('*.pdf'))
|
|
|
|
if not pdf_files:
|
|
print("Keine PDF-Dateien gefunden!")
|
|
return
|
|
|
|
for pdf_file in pdf_files:
|
|
print(f"\nVerarbeite: {pdf_file}")
|
|
|
|
try:
|
|
# Extrahiere Daten
|
|
dienstplan = extract_dienstplan_data(str(pdf_file))
|
|
|
|
print(f"Name: {dienstplan['vorname']} {dienstplan['name']}")
|
|
print(f"Personalnummer: {dienstplan['personalnummer']}")
|
|
print(f"Betriebshof: {dienstplan['betriebshof']}")
|
|
print(f"Anzahl der Events: {len(dienstplan['events'])}")
|
|
|
|
# Erstelle ICS-Datei
|
|
ics_path = pdf_file.with_suffix('.ics')
|
|
create_ics_from_dienstplan(dienstplan, str(ics_path))
|
|
|
|
print(f"✓ ICS-Datei erstellt: {ics_path}")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Fehler bei {pdf_file}: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|