feat(scheduling): retry calendar writes for pending bookings

A failed Stalwart calendar write during confirmation no longer deletes the
booking + SlotLock. The booking stays 'pending' with its lock retained, and a
new @Cron worker (every 2 min, max 5 attempts by default) re-drives the write:
on success it promotes to 'confirmed' and sends the confirmation email; after
the cap it moves to the terminal 'calendar_failed' state and releases the lock.

Tracks calendarWriteAttempts + lastCalendarError on the Booking. The public
confirm endpoint still throws 503 on a failed first write (preserving the DoD:
never surface a confirmed booking without a calendar event); the pending row is
left for the background retry to finish.
This commit is contained in:
Ronni Baslund
2026-06-07 08:49:53 +02:00
parent 9e1defa946
commit 2cb13a1a14
4 changed files with 163 additions and 24 deletions
@@ -10,10 +10,11 @@ import { ConfigService } from '@nestjs/config'
import { InjectModel } from '@nestjs/mongoose'
import { randomBytes, randomUUID } from 'node:crypto'
import { Model, Types } from 'mongoose'
import { Booking, BookingDocument } from '../../schemas/booking.schema.js'
import { EventTypeDocument } from '../../schemas/event-type.schema.js'
import { HostDocument } from '../../schemas/scheduling-host.schema.js'
import { Booking, BookingDocument, BookingStatus } from '../../schemas/booking.schema.js'
import { EventType, EventTypeDocument } from '../../schemas/event-type.schema.js'
import { Host, HostDocument } from '../../schemas/scheduling-host.schema.js'
import { SlotLock, SlotLockDocument } from '../../schemas/slot-lock.schema.js'
import { Tenant, TenantDocument } from '../../schemas/tenant.schema.js'
import { confirmationEmail, cancellationEmail } from '../email/booking-templates.js'
import { buildBookingIcs } from '../email/ics.js'
import { JmapMailer } from '../email/jmap-mailer.service.js'
@@ -56,6 +57,9 @@ export class BookingsService {
constructor(
@InjectModel(Booking.name) private readonly bookingModel: Model<BookingDocument>,
@InjectModel(SlotLock.name) private readonly lockModel: Model<SlotLockDocument>,
@InjectModel(Tenant.name) private readonly tenantModel: Model<TenantDocument>,
@InjectModel(Host.name) private readonly hostModel: Model<HostDocument>,
@InjectModel(EventType.name) private readonly eventTypeModel: Model<EventTypeDocument>,
private readonly slots: SlotService,
private readonly provisioner: CredentialProvisioner,
private readonly gateway: JmapCalendarGateway,
@@ -166,38 +170,117 @@ export class BookingsService {
}
// (d) Write to the host's Stalwart calendar; promote to confirmed on success.
let access: HostCalendarAccess
// On failure we DO NOT delete the booking/lock anymore: the booking stays
// 'pending' with its SlotLock held so the retry worker (§8.2.4) can re-drive
// the write. We still surface 503 to the synchronous caller so the attendee
// is told to retry immediately — but the slot is now durably reserved and
// will be promoted-and-emailed (or terminally released) in the background.
//
// Public-confirm behavior (DoD: never silently confirm without a calendar
// event): the public endpoint keeps throwing ServiceUnavailable on a failed
// first write rather than returning a 'pending' booking, so the UI never
// tells the attendee "confirmed" before a calendar event exists. The pending
// booking lives on for the background retry to finish the job.
const written = await this.attemptCalendarWrite(ctx, booking)
if (!written) {
throw new ServiceUnavailableException('Could not complete the booking on the calendar — please try again.')
}
return booking
}
/**
* Drives the Stalwart calendar write for a pending booking and, on success,
* promotes it to 'confirmed' and fires the branded confirmation email.
*
* On failure it increments `calendarWriteAttempts`, records `lastCalendarError`
* and leaves the booking 'pending' with its SlotLock intact for a later retry.
* Returns true iff the booking is now confirmed. The `calendarEventUid` is
* generated up-front and reused across attempts, so a retry after a partial
* failure is idempotent (§9).
*/
private async attemptCalendarWrite(ctx: BookingContext, booking: BookingDocument): Promise<boolean> {
const { host, eventType } = ctx
try {
access = await this.provisioner.resolveAccess(host)
const access = await this.provisioner.resolveAccess(host)
const { id } = await this.gateway.createEvent(access, {
uid: calendarEventUid,
uid: booking.calendarEventUid,
title: eventType.title,
description: input.attendeeNotes,
startUtc,
endUtc,
description: booking.attendeeNotes,
startUtc: booking.startUtc,
endUtc: booking.endUtc,
hostTimezone: host.timezone,
location: location.url,
location: booking.locationUrl,
hostEmail: host.email,
attendeeName: input.attendeeName,
attendeeEmail: input.attendeeEmail,
attendeeName: booking.attendeeName,
attendeeEmail: booking.attendeeEmail,
})
booking.calendarEventId = id
booking.status = 'confirmed'
await booking.save()
// Branded confirmation email — best-effort (booking already valid).
this.sendEmail(ctx, booking, access, 'confirmation').catch((e) =>
this.logger.warn(`Confirmation email failed for ${booking.attendeeEmail}: ${e.message}`),
)
return true
} catch (err) {
// Compensate: never leave a confirmed-looking booking with no calendar event.
await this.lockModel.deleteOne({ hostId: host._id, startUtc, bookingId: booking._id }).exec()
await this.bookingModel.deleteOne({ _id: booking._id }).exec()
this.logger.error(`Calendar write failed for ${host.email}: ${(err as Error).message}`)
throw new ServiceUnavailableException('Could not complete the booking on the calendar — please try again.')
booking.calendarWriteAttempts = (booking.calendarWriteAttempts ?? 0) + 1
booking.lastCalendarError = (err as Error).message
await booking.save().catch(() => undefined)
this.logger.error(
`Calendar write failed for ${host.email} (booking ${booking._id}, attempt ${booking.calendarWriteAttempts}): ${(err as Error).message}`,
)
return false
}
}
// ── Retry queue (§8.2.4) ─────────────────────────────────────────────────────
/**
* Re-drive the calendar write for a single pending booking. On success the
* booking is promoted to confirmed and the confirmation email is sent. Once
* attempts reach `maxAttempts` the booking moves to the terminal
* 'calendar_failed' state and its SlotLock is released so the slot frees up.
* Returns the booking's resulting status. Invoked by the retry worker.
*/
async retryPendingCalendarWrite(booking: BookingDocument, maxAttempts: number): Promise<BookingStatus> {
const tenant = await this.tenantModel.findById(booking.tenantId).exec()
const host = await this.hostModel.findById(booking.hostId).exec()
const eventType = await this.eventTypeModel.findById(booking.eventTypeId).exec()
if (!tenant || !host || !eventType) {
this.logger.warn(`Retry skipped — missing tenant/host/eventType for booking ${booking._id}`)
return booking.status
}
// (e) Branded confirmation email — best-effort (booking already valid).
this.sendEmail(ctx, booking, access, 'confirmation').catch((e) =>
this.logger.warn(`Confirmation email failed for ${booking.attendeeEmail}: ${e.message}`),
)
const ctx: BookingContext = {
tenant: { _id: tenant._id, slug: tenant.slug, name: tenant.name, brandColor: tenant.brandColor },
host,
eventType,
}
return booking
const ok = await this.attemptCalendarWrite(ctx, booking)
if (ok) return booking.status
if (booking.calendarWriteAttempts >= maxAttempts) {
booking.status = 'calendar_failed'
await booking.save().catch(() => undefined)
await this.lockModel
.deleteOne({ hostId: booking.hostId, startUtc: booking.startUtc, bookingId: booking._id })
.exec()
.catch(() => undefined)
this.logger.error(
`Booking ${booking._id} reached max calendar-write attempts (${maxAttempts}); marked calendar_failed and released its slot lock. Last error: ${booking.lastCalendarError}`,
)
}
return booking.status
}
// Pending bookings still eligible for a calendar-write retry (under the cap).
findPendingForRetry(maxAttempts: number, limit = 100): Promise<BookingDocument[]> {
return this.bookingModel
.find({ status: 'pending', calendarWriteAttempts: { $lt: maxAttempts } })
.sort({ createdAt: 1 })
.limit(limit)
.exec()
}
// ── Manage / cancel / reschedule ───────────────────────────────────────────
@@ -0,0 +1,41 @@
import { Injectable, Logger } from '@nestjs/common'
import { ConfigService } from '@nestjs/config'
import { Cron } from '@nestjs/schedule'
import { BookingsService } from './bookings.service.js'
// Calendar-write retry queue (§8.2.4). When the synchronous Stalwart calendar
// write fails during confirmation, the booking is left 'pending' with its
// SlotLock retained (rather than deleted). This cron periodically re-drives the
// write for those pending bookings: on success the booking is promoted to
// 'confirmed' and the confirmation email is sent; once attempts hit the cap the
// booking moves to the terminal 'calendar_failed' state and its lock is
// released. The per-booking attempt counter (and idempotent calendarEventUid)
// lives on the Booking, so this worker is safe to run repeatedly. Max attempts
// is configurable via SCHEDULING_CALENDAR_RETRY_MAX (default 5).
@Injectable()
export class CalendarRetryWorker {
private readonly logger = new Logger(CalendarRetryWorker.name)
private readonly maxAttempts: number
constructor(
private readonly bookings: BookingsService,
config: ConfigService,
) {
const raw = Number(config.get<string>('SCHEDULING_CALENDAR_RETRY_MAX'))
this.maxAttempts = Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : 5
}
// Every 2 minutes (@nestjs/schedule has no EVERY_2_MINUTES preset).
@Cron('*/2 * * * *', { name: 'calendar-write-retry' })
async run(): Promise<void> {
const pending = await this.bookings.findPendingForRetry(this.maxAttempts)
if (pending.length === 0) return
this.logger.log(`Retrying calendar writes for ${pending.length} pending booking(s)`)
for (const booking of pending) {
await this.bookings
.retryPendingCalendarWrite(booking, this.maxAttempts)
.catch((e) => this.logger.warn(`Calendar retry failed for booking ${booking._id}: ${e.message}`))
}
}
}
@@ -14,6 +14,7 @@ import { User, UserSchema } from '../schemas/user.schema.js'
import { TenantsModule } from '../tenants/tenants.module.js'
import { AvailabilityService } from './availability/availability.service.js'
import { BookingsService } from './bookings/bookings.service.js'
import { CalendarRetryWorker } from './bookings/calendar-retry.worker.js'
import { JmapMailer } from './email/jmap-mailer.service.js'
import { EventTypesService } from './event-types/event-types.service.js'
import { HostsService } from './hosts/hosts.service.js'
@@ -59,6 +60,7 @@ import { StalwartCalendarModule } from './stalwart-calendar/stalwart-calendar.mo
PublicSchedulingService,
JmapMailer,
BookingReminderWorker,
CalendarRetryWorker,
],
})
export class SchedulingModule {}