feat(scheduling): retry calendar writes for pending bookings
A failed Stalwart calendar write during confirmation no longer deletes the booking + SlotLock. The booking stays 'pending' with its lock retained, and a new @Cron worker (every 2 min, max 5 attempts by default) re-drives the write: on success it promotes to 'confirmed' and sends the confirmation email; after the cap it moves to the terminal 'calendar_failed' state and releases the lock. Tracks calendarWriteAttempts + lastCalendarError on the Booking. The public confirm endpoint still throws 503 on a failed first write (preserving the DoD: never surface a confirmed booking without a calendar event); the pending row is left for the background retry to finish.
This commit is contained in:
@@ -10,10 +10,11 @@ import { ConfigService } from '@nestjs/config'
|
||||
import { InjectModel } from '@nestjs/mongoose'
|
||||
import { randomBytes, randomUUID } from 'node:crypto'
|
||||
import { Model, Types } from 'mongoose'
|
||||
import { Booking, BookingDocument } from '../../schemas/booking.schema.js'
|
||||
import { EventTypeDocument } from '../../schemas/event-type.schema.js'
|
||||
import { HostDocument } from '../../schemas/scheduling-host.schema.js'
|
||||
import { Booking, BookingDocument, BookingStatus } from '../../schemas/booking.schema.js'
|
||||
import { EventType, EventTypeDocument } from '../../schemas/event-type.schema.js'
|
||||
import { Host, HostDocument } from '../../schemas/scheduling-host.schema.js'
|
||||
import { SlotLock, SlotLockDocument } from '../../schemas/slot-lock.schema.js'
|
||||
import { Tenant, TenantDocument } from '../../schemas/tenant.schema.js'
|
||||
import { confirmationEmail, cancellationEmail } from '../email/booking-templates.js'
|
||||
import { buildBookingIcs } from '../email/ics.js'
|
||||
import { JmapMailer } from '../email/jmap-mailer.service.js'
|
||||
@@ -56,6 +57,9 @@ export class BookingsService {
|
||||
constructor(
|
||||
@InjectModel(Booking.name) private readonly bookingModel: Model<BookingDocument>,
|
||||
@InjectModel(SlotLock.name) private readonly lockModel: Model<SlotLockDocument>,
|
||||
@InjectModel(Tenant.name) private readonly tenantModel: Model<TenantDocument>,
|
||||
@InjectModel(Host.name) private readonly hostModel: Model<HostDocument>,
|
||||
@InjectModel(EventType.name) private readonly eventTypeModel: Model<EventTypeDocument>,
|
||||
private readonly slots: SlotService,
|
||||
private readonly provisioner: CredentialProvisioner,
|
||||
private readonly gateway: JmapCalendarGateway,
|
||||
@@ -166,38 +170,117 @@ export class BookingsService {
|
||||
}
|
||||
|
||||
// (d) Write to the host's Stalwart calendar; promote to confirmed on success.
|
||||
let access: HostCalendarAccess
|
||||
// On failure we DO NOT delete the booking/lock anymore: the booking stays
|
||||
// 'pending' with its SlotLock held so the retry worker (§8.2.4) can re-drive
|
||||
// the write. We still surface 503 to the synchronous caller so the attendee
|
||||
// is told to retry immediately — but the slot is now durably reserved and
|
||||
// will be promoted-and-emailed (or terminally released) in the background.
|
||||
//
|
||||
// Public-confirm behavior (DoD: never silently confirm without a calendar
|
||||
// event): the public endpoint keeps throwing ServiceUnavailable on a failed
|
||||
// first write rather than returning a 'pending' booking, so the UI never
|
||||
// tells the attendee "confirmed" before a calendar event exists. The pending
|
||||
// booking lives on for the background retry to finish the job.
|
||||
const written = await this.attemptCalendarWrite(ctx, booking)
|
||||
if (!written) {
|
||||
throw new ServiceUnavailableException('Could not complete the booking on the calendar — please try again.')
|
||||
}
|
||||
return booking
|
||||
}
|
||||
|
||||
/**
|
||||
* Drives the Stalwart calendar write for a pending booking and, on success,
|
||||
* promotes it to 'confirmed' and fires the branded confirmation email.
|
||||
*
|
||||
* On failure it increments `calendarWriteAttempts`, records `lastCalendarError`
|
||||
* and leaves the booking 'pending' with its SlotLock intact for a later retry.
|
||||
* Returns true iff the booking is now confirmed. The `calendarEventUid` is
|
||||
* generated up-front and reused across attempts, so a retry after a partial
|
||||
* failure is idempotent (§9).
|
||||
*/
|
||||
private async attemptCalendarWrite(ctx: BookingContext, booking: BookingDocument): Promise<boolean> {
|
||||
const { host, eventType } = ctx
|
||||
try {
|
||||
access = await this.provisioner.resolveAccess(host)
|
||||
const access = await this.provisioner.resolveAccess(host)
|
||||
const { id } = await this.gateway.createEvent(access, {
|
||||
uid: calendarEventUid,
|
||||
uid: booking.calendarEventUid,
|
||||
title: eventType.title,
|
||||
description: input.attendeeNotes,
|
||||
startUtc,
|
||||
endUtc,
|
||||
description: booking.attendeeNotes,
|
||||
startUtc: booking.startUtc,
|
||||
endUtc: booking.endUtc,
|
||||
hostTimezone: host.timezone,
|
||||
location: location.url,
|
||||
location: booking.locationUrl,
|
||||
hostEmail: host.email,
|
||||
attendeeName: input.attendeeName,
|
||||
attendeeEmail: input.attendeeEmail,
|
||||
attendeeName: booking.attendeeName,
|
||||
attendeeEmail: booking.attendeeEmail,
|
||||
})
|
||||
booking.calendarEventId = id
|
||||
booking.status = 'confirmed'
|
||||
await booking.save()
|
||||
} catch (err) {
|
||||
// Compensate: never leave a confirmed-looking booking with no calendar event.
|
||||
await this.lockModel.deleteOne({ hostId: host._id, startUtc, bookingId: booking._id }).exec()
|
||||
await this.bookingModel.deleteOne({ _id: booking._id }).exec()
|
||||
this.logger.error(`Calendar write failed for ${host.email}: ${(err as Error).message}`)
|
||||
throw new ServiceUnavailableException('Could not complete the booking on the calendar — please try again.')
|
||||
}
|
||||
|
||||
// (e) Branded confirmation email — best-effort (booking already valid).
|
||||
// Branded confirmation email — best-effort (booking already valid).
|
||||
this.sendEmail(ctx, booking, access, 'confirmation').catch((e) =>
|
||||
this.logger.warn(`Confirmation email failed for ${booking.attendeeEmail}: ${e.message}`),
|
||||
)
|
||||
return true
|
||||
} catch (err) {
|
||||
booking.calendarWriteAttempts = (booking.calendarWriteAttempts ?? 0) + 1
|
||||
booking.lastCalendarError = (err as Error).message
|
||||
await booking.save().catch(() => undefined)
|
||||
this.logger.error(
|
||||
`Calendar write failed for ${host.email} (booking ${booking._id}, attempt ${booking.calendarWriteAttempts}): ${(err as Error).message}`,
|
||||
)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return booking
|
||||
// ── Retry queue (§8.2.4) ─────────────────────────────────────────────────────
|
||||
/**
|
||||
* Re-drive the calendar write for a single pending booking. On success the
|
||||
* booking is promoted to confirmed and the confirmation email is sent. Once
|
||||
* attempts reach `maxAttempts` the booking moves to the terminal
|
||||
* 'calendar_failed' state and its SlotLock is released so the slot frees up.
|
||||
* Returns the booking's resulting status. Invoked by the retry worker.
|
||||
*/
|
||||
async retryPendingCalendarWrite(booking: BookingDocument, maxAttempts: number): Promise<BookingStatus> {
|
||||
const tenant = await this.tenantModel.findById(booking.tenantId).exec()
|
||||
const host = await this.hostModel.findById(booking.hostId).exec()
|
||||
const eventType = await this.eventTypeModel.findById(booking.eventTypeId).exec()
|
||||
if (!tenant || !host || !eventType) {
|
||||
this.logger.warn(`Retry skipped — missing tenant/host/eventType for booking ${booking._id}`)
|
||||
return booking.status
|
||||
}
|
||||
|
||||
const ctx: BookingContext = {
|
||||
tenant: { _id: tenant._id, slug: tenant.slug, name: tenant.name, brandColor: tenant.brandColor },
|
||||
host,
|
||||
eventType,
|
||||
}
|
||||
|
||||
const ok = await this.attemptCalendarWrite(ctx, booking)
|
||||
if (ok) return booking.status
|
||||
|
||||
if (booking.calendarWriteAttempts >= maxAttempts) {
|
||||
booking.status = 'calendar_failed'
|
||||
await booking.save().catch(() => undefined)
|
||||
await this.lockModel
|
||||
.deleteOne({ hostId: booking.hostId, startUtc: booking.startUtc, bookingId: booking._id })
|
||||
.exec()
|
||||
.catch(() => undefined)
|
||||
this.logger.error(
|
||||
`Booking ${booking._id} reached max calendar-write attempts (${maxAttempts}); marked calendar_failed and released its slot lock. Last error: ${booking.lastCalendarError}`,
|
||||
)
|
||||
}
|
||||
return booking.status
|
||||
}
|
||||
|
||||
// Pending bookings still eligible for a calendar-write retry (under the cap).
|
||||
findPendingForRetry(maxAttempts: number, limit = 100): Promise<BookingDocument[]> {
|
||||
return this.bookingModel
|
||||
.find({ status: 'pending', calendarWriteAttempts: { $lt: maxAttempts } })
|
||||
.sort({ createdAt: 1 })
|
||||
.limit(limit)
|
||||
.exec()
|
||||
}
|
||||
|
||||
// ── Manage / cancel / reschedule ───────────────────────────────────────────
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
import { Injectable, Logger } from '@nestjs/common'
|
||||
import { ConfigService } from '@nestjs/config'
|
||||
import { Cron } from '@nestjs/schedule'
|
||||
import { BookingsService } from './bookings.service.js'
|
||||
|
||||
// Calendar-write retry queue (§8.2.4). When the synchronous Stalwart calendar
|
||||
// write fails during confirmation, the booking is left 'pending' with its
|
||||
// SlotLock retained (rather than deleted). This cron periodically re-drives the
|
||||
// write for those pending bookings: on success the booking is promoted to
|
||||
// 'confirmed' and the confirmation email is sent; once attempts hit the cap the
|
||||
// booking moves to the terminal 'calendar_failed' state and its lock is
|
||||
// released. The per-booking attempt counter (and idempotent calendarEventUid)
|
||||
// lives on the Booking, so this worker is safe to run repeatedly. Max attempts
|
||||
// is configurable via SCHEDULING_CALENDAR_RETRY_MAX (default 5).
|
||||
@Injectable()
|
||||
export class CalendarRetryWorker {
|
||||
private readonly logger = new Logger(CalendarRetryWorker.name)
|
||||
private readonly maxAttempts: number
|
||||
|
||||
constructor(
|
||||
private readonly bookings: BookingsService,
|
||||
config: ConfigService,
|
||||
) {
|
||||
const raw = Number(config.get<string>('SCHEDULING_CALENDAR_RETRY_MAX'))
|
||||
this.maxAttempts = Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : 5
|
||||
}
|
||||
|
||||
// Every 2 minutes (@nestjs/schedule has no EVERY_2_MINUTES preset).
|
||||
@Cron('*/2 * * * *', { name: 'calendar-write-retry' })
|
||||
async run(): Promise<void> {
|
||||
const pending = await this.bookings.findPendingForRetry(this.maxAttempts)
|
||||
if (pending.length === 0) return
|
||||
|
||||
this.logger.log(`Retrying calendar writes for ${pending.length} pending booking(s)`)
|
||||
for (const booking of pending) {
|
||||
await this.bookings
|
||||
.retryPendingCalendarWrite(booking, this.maxAttempts)
|
||||
.catch((e) => this.logger.warn(`Calendar retry failed for booking ${booking._id}: ${e.message}`))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,6 +14,7 @@ import { User, UserSchema } from '../schemas/user.schema.js'
|
||||
import { TenantsModule } from '../tenants/tenants.module.js'
|
||||
import { AvailabilityService } from './availability/availability.service.js'
|
||||
import { BookingsService } from './bookings/bookings.service.js'
|
||||
import { CalendarRetryWorker } from './bookings/calendar-retry.worker.js'
|
||||
import { JmapMailer } from './email/jmap-mailer.service.js'
|
||||
import { EventTypesService } from './event-types/event-types.service.js'
|
||||
import { HostsService } from './hosts/hosts.service.js'
|
||||
@@ -59,6 +60,7 @@ import { StalwartCalendarModule } from './stalwart-calendar/stalwart-calendar.mo
|
||||
PublicSchedulingService,
|
||||
JmapMailer,
|
||||
BookingReminderWorker,
|
||||
CalendarRetryWorker,
|
||||
],
|
||||
})
|
||||
export class SchedulingModule {}
|
||||
|
||||
@@ -6,7 +6,9 @@ export type BookingDocument = HydratedDocument<Booking>
|
||||
|
||||
// 'pending' is the compensating state when the SlotLock is held but the calendar
|
||||
// write hasn't succeeded yet (§8.2.4) — never surfaced as a confirmed booking.
|
||||
export type BookingStatus = 'pending' | 'confirmed' | 'cancelled' | 'rescheduled'
|
||||
// 'calendar_failed' is the terminal state once the retry worker exhausts its
|
||||
// attempts; the SlotLock is released so the slot frees up.
|
||||
export type BookingStatus = 'pending' | 'confirmed' | 'cancelled' | 'rescheduled' | 'calendar_failed'
|
||||
|
||||
// A confirmed appointment. All instants are UTC; attendee/host tz are IANA
|
||||
// strings for display. `calendarEventUid` is generated client-side BEFORE the
|
||||
@@ -23,7 +25,7 @@ export class Booking {
|
||||
@Prop({ type: Types.ObjectId, ref: 'Host', required: true, index: true })
|
||||
hostId!: Types.ObjectId
|
||||
|
||||
@Prop({ enum: ['pending', 'confirmed', 'cancelled', 'rescheduled'], default: 'pending', index: true })
|
||||
@Prop({ enum: ['pending', 'confirmed', 'cancelled', 'rescheduled', 'calendar_failed'], default: 'pending', index: true })
|
||||
status!: BookingStatus
|
||||
|
||||
@Prop({ required: true, index: true })
|
||||
@@ -84,6 +86,17 @@ export class Booking {
|
||||
// then atomically appends it — making reminders idempotent across runs.
|
||||
@Prop({ type: [Number], default: [] })
|
||||
sentReminderOffsets!: number[]
|
||||
|
||||
// Calendar-write retry bookkeeping (§8.2.4). When the synchronous Stalwart
|
||||
// write fails, the booking stays 'pending' with its SlotLock retained and the
|
||||
// retry worker drives `calendarWriteAttempts` up to the configured max. The
|
||||
// last error is kept for diagnostics; on terminal failure status becomes
|
||||
// 'calendar_failed' and the lock is released.
|
||||
@Prop({ default: 0 })
|
||||
calendarWriteAttempts!: number
|
||||
|
||||
@Prop({ trim: true })
|
||||
lastCalendarError?: string
|
||||
}
|
||||
|
||||
export const BookingSchema = SchemaFactory.createForClass(Booking)
|
||||
|
||||
Reference in New Issue
Block a user