From 2cb13a1a1416554762165c50c4deb4af641a4b57 Mon Sep 17 00:00:00 2001 From: Ronni Baslund Date: Sun, 7 Jun 2026 08:49:53 +0200 Subject: [PATCH] feat(scheduling): retry calendar writes for pending bookings A failed Stalwart calendar write during confirmation no longer deletes the booking + SlotLock. The booking stays 'pending' with its lock retained, and a new @Cron worker (every 2 min, max 5 attempts by default) re-drives the write: on success it promotes to 'confirmed' and sends the confirmation email; after the cap it moves to the terminal 'calendar_failed' state and releases the lock. Tracks calendarWriteAttempts + lastCalendarError on the Booking. The public confirm endpoint still throws 503 on a failed first write (preserving the DoD: never surface a confirmed booking without a calendar event); the pending row is left for the background retry to finish. --- .../scheduling/bookings/bookings.service.ts | 127 +++++++++++++++--- .../bookings/calendar-retry.worker.ts | 41 ++++++ .../src/scheduling/scheduling.module.ts | 2 + .../src/schemas/booking.schema.ts | 17 ++- 4 files changed, 163 insertions(+), 24 deletions(-) create mode 100644 services/platform-api/src/scheduling/bookings/calendar-retry.worker.ts diff --git a/services/platform-api/src/scheduling/bookings/bookings.service.ts b/services/platform-api/src/scheduling/bookings/bookings.service.ts index 9fdce3a..2e30538 100644 --- a/services/platform-api/src/scheduling/bookings/bookings.service.ts +++ b/services/platform-api/src/scheduling/bookings/bookings.service.ts @@ -10,10 +10,11 @@ import { ConfigService } from '@nestjs/config' import { InjectModel } from '@nestjs/mongoose' import { randomBytes, randomUUID } from 'node:crypto' import { Model, Types } from 'mongoose' -import { Booking, BookingDocument } from '../../schemas/booking.schema.js' -import { EventTypeDocument } from '../../schemas/event-type.schema.js' -import { HostDocument } from '../../schemas/scheduling-host.schema.js' +import { Booking, BookingDocument, BookingStatus } from '../../schemas/booking.schema.js' +import { EventType, EventTypeDocument } from '../../schemas/event-type.schema.js' +import { Host, HostDocument } from '../../schemas/scheduling-host.schema.js' import { SlotLock, SlotLockDocument } from '../../schemas/slot-lock.schema.js' +import { Tenant, TenantDocument } from '../../schemas/tenant.schema.js' import { confirmationEmail, cancellationEmail } from '../email/booking-templates.js' import { buildBookingIcs } from '../email/ics.js' import { JmapMailer } from '../email/jmap-mailer.service.js' @@ -56,6 +57,9 @@ export class BookingsService { constructor( @InjectModel(Booking.name) private readonly bookingModel: Model, @InjectModel(SlotLock.name) private readonly lockModel: Model, + @InjectModel(Tenant.name) private readonly tenantModel: Model, + @InjectModel(Host.name) private readonly hostModel: Model, + @InjectModel(EventType.name) private readonly eventTypeModel: Model, private readonly slots: SlotService, private readonly provisioner: CredentialProvisioner, private readonly gateway: JmapCalendarGateway, @@ -166,38 +170,117 @@ export class BookingsService { } // (d) Write to the host's Stalwart calendar; promote to confirmed on success. - let access: HostCalendarAccess + // On failure we DO NOT delete the booking/lock anymore: the booking stays + // 'pending' with its SlotLock held so the retry worker (§8.2.4) can re-drive + // the write. We still surface 503 to the synchronous caller so the attendee + // is told to retry immediately — but the slot is now durably reserved and + // will be promoted-and-emailed (or terminally released) in the background. + // + // Public-confirm behavior (DoD: never silently confirm without a calendar + // event): the public endpoint keeps throwing ServiceUnavailable on a failed + // first write rather than returning a 'pending' booking, so the UI never + // tells the attendee "confirmed" before a calendar event exists. The pending + // booking lives on for the background retry to finish the job. + const written = await this.attemptCalendarWrite(ctx, booking) + if (!written) { + throw new ServiceUnavailableException('Could not complete the booking on the calendar — please try again.') + } + return booking + } + + /** + * Drives the Stalwart calendar write for a pending booking and, on success, + * promotes it to 'confirmed' and fires the branded confirmation email. + * + * On failure it increments `calendarWriteAttempts`, records `lastCalendarError` + * and leaves the booking 'pending' with its SlotLock intact for a later retry. + * Returns true iff the booking is now confirmed. The `calendarEventUid` is + * generated up-front and reused across attempts, so a retry after a partial + * failure is idempotent (§9). + */ + private async attemptCalendarWrite(ctx: BookingContext, booking: BookingDocument): Promise { + const { host, eventType } = ctx try { - access = await this.provisioner.resolveAccess(host) + const access = await this.provisioner.resolveAccess(host) const { id } = await this.gateway.createEvent(access, { - uid: calendarEventUid, + uid: booking.calendarEventUid, title: eventType.title, - description: input.attendeeNotes, - startUtc, - endUtc, + description: booking.attendeeNotes, + startUtc: booking.startUtc, + endUtc: booking.endUtc, hostTimezone: host.timezone, - location: location.url, + location: booking.locationUrl, hostEmail: host.email, - attendeeName: input.attendeeName, - attendeeEmail: input.attendeeEmail, + attendeeName: booking.attendeeName, + attendeeEmail: booking.attendeeEmail, }) booking.calendarEventId = id booking.status = 'confirmed' await booking.save() + + // Branded confirmation email — best-effort (booking already valid). + this.sendEmail(ctx, booking, access, 'confirmation').catch((e) => + this.logger.warn(`Confirmation email failed for ${booking.attendeeEmail}: ${e.message}`), + ) + return true } catch (err) { - // Compensate: never leave a confirmed-looking booking with no calendar event. - await this.lockModel.deleteOne({ hostId: host._id, startUtc, bookingId: booking._id }).exec() - await this.bookingModel.deleteOne({ _id: booking._id }).exec() - this.logger.error(`Calendar write failed for ${host.email}: ${(err as Error).message}`) - throw new ServiceUnavailableException('Could not complete the booking on the calendar — please try again.') + booking.calendarWriteAttempts = (booking.calendarWriteAttempts ?? 0) + 1 + booking.lastCalendarError = (err as Error).message + await booking.save().catch(() => undefined) + this.logger.error( + `Calendar write failed for ${host.email} (booking ${booking._id}, attempt ${booking.calendarWriteAttempts}): ${(err as Error).message}`, + ) + return false + } + } + + // ── Retry queue (§8.2.4) ───────────────────────────────────────────────────── + /** + * Re-drive the calendar write for a single pending booking. On success the + * booking is promoted to confirmed and the confirmation email is sent. Once + * attempts reach `maxAttempts` the booking moves to the terminal + * 'calendar_failed' state and its SlotLock is released so the slot frees up. + * Returns the booking's resulting status. Invoked by the retry worker. + */ + async retryPendingCalendarWrite(booking: BookingDocument, maxAttempts: number): Promise { + const tenant = await this.tenantModel.findById(booking.tenantId).exec() + const host = await this.hostModel.findById(booking.hostId).exec() + const eventType = await this.eventTypeModel.findById(booking.eventTypeId).exec() + if (!tenant || !host || !eventType) { + this.logger.warn(`Retry skipped — missing tenant/host/eventType for booking ${booking._id}`) + return booking.status } - // (e) Branded confirmation email — best-effort (booking already valid). - this.sendEmail(ctx, booking, access, 'confirmation').catch((e) => - this.logger.warn(`Confirmation email failed for ${booking.attendeeEmail}: ${e.message}`), - ) + const ctx: BookingContext = { + tenant: { _id: tenant._id, slug: tenant.slug, name: tenant.name, brandColor: tenant.brandColor }, + host, + eventType, + } - return booking + const ok = await this.attemptCalendarWrite(ctx, booking) + if (ok) return booking.status + + if (booking.calendarWriteAttempts >= maxAttempts) { + booking.status = 'calendar_failed' + await booking.save().catch(() => undefined) + await this.lockModel + .deleteOne({ hostId: booking.hostId, startUtc: booking.startUtc, bookingId: booking._id }) + .exec() + .catch(() => undefined) + this.logger.error( + `Booking ${booking._id} reached max calendar-write attempts (${maxAttempts}); marked calendar_failed and released its slot lock. Last error: ${booking.lastCalendarError}`, + ) + } + return booking.status + } + + // Pending bookings still eligible for a calendar-write retry (under the cap). + findPendingForRetry(maxAttempts: number, limit = 100): Promise { + return this.bookingModel + .find({ status: 'pending', calendarWriteAttempts: { $lt: maxAttempts } }) + .sort({ createdAt: 1 }) + .limit(limit) + .exec() } // ── Manage / cancel / reschedule ─────────────────────────────────────────── diff --git a/services/platform-api/src/scheduling/bookings/calendar-retry.worker.ts b/services/platform-api/src/scheduling/bookings/calendar-retry.worker.ts new file mode 100644 index 0000000..3dcd857 --- /dev/null +++ b/services/platform-api/src/scheduling/bookings/calendar-retry.worker.ts @@ -0,0 +1,41 @@ +import { Injectable, Logger } from '@nestjs/common' +import { ConfigService } from '@nestjs/config' +import { Cron } from '@nestjs/schedule' +import { BookingsService } from './bookings.service.js' + +// Calendar-write retry queue (§8.2.4). When the synchronous Stalwart calendar +// write fails during confirmation, the booking is left 'pending' with its +// SlotLock retained (rather than deleted). This cron periodically re-drives the +// write for those pending bookings: on success the booking is promoted to +// 'confirmed' and the confirmation email is sent; once attempts hit the cap the +// booking moves to the terminal 'calendar_failed' state and its lock is +// released. The per-booking attempt counter (and idempotent calendarEventUid) +// lives on the Booking, so this worker is safe to run repeatedly. Max attempts +// is configurable via SCHEDULING_CALENDAR_RETRY_MAX (default 5). +@Injectable() +export class CalendarRetryWorker { + private readonly logger = new Logger(CalendarRetryWorker.name) + private readonly maxAttempts: number + + constructor( + private readonly bookings: BookingsService, + config: ConfigService, + ) { + const raw = Number(config.get('SCHEDULING_CALENDAR_RETRY_MAX')) + this.maxAttempts = Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : 5 + } + + // Every 2 minutes (@nestjs/schedule has no EVERY_2_MINUTES preset). + @Cron('*/2 * * * *', { name: 'calendar-write-retry' }) + async run(): Promise { + const pending = await this.bookings.findPendingForRetry(this.maxAttempts) + if (pending.length === 0) return + + this.logger.log(`Retrying calendar writes for ${pending.length} pending booking(s)`) + for (const booking of pending) { + await this.bookings + .retryPendingCalendarWrite(booking, this.maxAttempts) + .catch((e) => this.logger.warn(`Calendar retry failed for booking ${booking._id}: ${e.message}`)) + } + } +} diff --git a/services/platform-api/src/scheduling/scheduling.module.ts b/services/platform-api/src/scheduling/scheduling.module.ts index 7734e1f..e459ccc 100644 --- a/services/platform-api/src/scheduling/scheduling.module.ts +++ b/services/platform-api/src/scheduling/scheduling.module.ts @@ -14,6 +14,7 @@ import { User, UserSchema } from '../schemas/user.schema.js' import { TenantsModule } from '../tenants/tenants.module.js' import { AvailabilityService } from './availability/availability.service.js' import { BookingsService } from './bookings/bookings.service.js' +import { CalendarRetryWorker } from './bookings/calendar-retry.worker.js' import { JmapMailer } from './email/jmap-mailer.service.js' import { EventTypesService } from './event-types/event-types.service.js' import { HostsService } from './hosts/hosts.service.js' @@ -59,6 +60,7 @@ import { StalwartCalendarModule } from './stalwart-calendar/stalwart-calendar.mo PublicSchedulingService, JmapMailer, BookingReminderWorker, + CalendarRetryWorker, ], }) export class SchedulingModule {} diff --git a/services/platform-api/src/schemas/booking.schema.ts b/services/platform-api/src/schemas/booking.schema.ts index 19063e8..7a9176c 100644 --- a/services/platform-api/src/schemas/booking.schema.ts +++ b/services/platform-api/src/schemas/booking.schema.ts @@ -6,7 +6,9 @@ export type BookingDocument = HydratedDocument // 'pending' is the compensating state when the SlotLock is held but the calendar // write hasn't succeeded yet (§8.2.4) — never surfaced as a confirmed booking. -export type BookingStatus = 'pending' | 'confirmed' | 'cancelled' | 'rescheduled' +// 'calendar_failed' is the terminal state once the retry worker exhausts its +// attempts; the SlotLock is released so the slot frees up. +export type BookingStatus = 'pending' | 'confirmed' | 'cancelled' | 'rescheduled' | 'calendar_failed' // A confirmed appointment. All instants are UTC; attendee/host tz are IANA // strings for display. `calendarEventUid` is generated client-side BEFORE the @@ -23,7 +25,7 @@ export class Booking { @Prop({ type: Types.ObjectId, ref: 'Host', required: true, index: true }) hostId!: Types.ObjectId - @Prop({ enum: ['pending', 'confirmed', 'cancelled', 'rescheduled'], default: 'pending', index: true }) + @Prop({ enum: ['pending', 'confirmed', 'cancelled', 'rescheduled', 'calendar_failed'], default: 'pending', index: true }) status!: BookingStatus @Prop({ required: true, index: true }) @@ -84,6 +86,17 @@ export class Booking { // then atomically appends it — making reminders idempotent across runs. @Prop({ type: [Number], default: [] }) sentReminderOffsets!: number[] + + // Calendar-write retry bookkeeping (§8.2.4). When the synchronous Stalwart + // write fails, the booking stays 'pending' with its SlotLock retained and the + // retry worker drives `calendarWriteAttempts` up to the configured max. The + // last error is kept for diagnostics; on terminal failure status becomes + // 'calendar_failed' and the lock is released. + @Prop({ default: 0 }) + calendarWriteAttempts!: number + + @Prop({ trim: true }) + lastCalendarError?: string } export const BookingSchema = SchemaFactory.createForClass(Booking)