feat(scheduling): retry calendar writes for pending bookings
A failed Stalwart calendar write during confirmation no longer deletes the booking + SlotLock. The booking stays 'pending' with its lock retained, and a new @Cron worker (every 2 min, max 5 attempts by default) re-drives the write: on success it promotes to 'confirmed' and sends the confirmation email; after the cap it moves to the terminal 'calendar_failed' state and releases the lock. Tracks calendarWriteAttempts + lastCalendarError on the Booking. The public confirm endpoint still throws 503 on a failed first write (preserving the DoD: never surface a confirmed booking without a calendar event); the pending row is left for the background retry to finish.
This commit is contained in:
@@ -10,10 +10,11 @@ import { ConfigService } from '@nestjs/config'
|
|||||||
import { InjectModel } from '@nestjs/mongoose'
|
import { InjectModel } from '@nestjs/mongoose'
|
||||||
import { randomBytes, randomUUID } from 'node:crypto'
|
import { randomBytes, randomUUID } from 'node:crypto'
|
||||||
import { Model, Types } from 'mongoose'
|
import { Model, Types } from 'mongoose'
|
||||||
import { Booking, BookingDocument } from '../../schemas/booking.schema.js'
|
import { Booking, BookingDocument, BookingStatus } from '../../schemas/booking.schema.js'
|
||||||
import { EventTypeDocument } from '../../schemas/event-type.schema.js'
|
import { EventType, EventTypeDocument } from '../../schemas/event-type.schema.js'
|
||||||
import { HostDocument } from '../../schemas/scheduling-host.schema.js'
|
import { Host, HostDocument } from '../../schemas/scheduling-host.schema.js'
|
||||||
import { SlotLock, SlotLockDocument } from '../../schemas/slot-lock.schema.js'
|
import { SlotLock, SlotLockDocument } from '../../schemas/slot-lock.schema.js'
|
||||||
|
import { Tenant, TenantDocument } from '../../schemas/tenant.schema.js'
|
||||||
import { confirmationEmail, cancellationEmail } from '../email/booking-templates.js'
|
import { confirmationEmail, cancellationEmail } from '../email/booking-templates.js'
|
||||||
import { buildBookingIcs } from '../email/ics.js'
|
import { buildBookingIcs } from '../email/ics.js'
|
||||||
import { JmapMailer } from '../email/jmap-mailer.service.js'
|
import { JmapMailer } from '../email/jmap-mailer.service.js'
|
||||||
@@ -56,6 +57,9 @@ export class BookingsService {
|
|||||||
constructor(
|
constructor(
|
||||||
@InjectModel(Booking.name) private readonly bookingModel: Model<BookingDocument>,
|
@InjectModel(Booking.name) private readonly bookingModel: Model<BookingDocument>,
|
||||||
@InjectModel(SlotLock.name) private readonly lockModel: Model<SlotLockDocument>,
|
@InjectModel(SlotLock.name) private readonly lockModel: Model<SlotLockDocument>,
|
||||||
|
@InjectModel(Tenant.name) private readonly tenantModel: Model<TenantDocument>,
|
||||||
|
@InjectModel(Host.name) private readonly hostModel: Model<HostDocument>,
|
||||||
|
@InjectModel(EventType.name) private readonly eventTypeModel: Model<EventTypeDocument>,
|
||||||
private readonly slots: SlotService,
|
private readonly slots: SlotService,
|
||||||
private readonly provisioner: CredentialProvisioner,
|
private readonly provisioner: CredentialProvisioner,
|
||||||
private readonly gateway: JmapCalendarGateway,
|
private readonly gateway: JmapCalendarGateway,
|
||||||
@@ -166,38 +170,117 @@ export class BookingsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// (d) Write to the host's Stalwart calendar; promote to confirmed on success.
|
// (d) Write to the host's Stalwart calendar; promote to confirmed on success.
|
||||||
let access: HostCalendarAccess
|
// On failure we DO NOT delete the booking/lock anymore: the booking stays
|
||||||
|
// 'pending' with its SlotLock held so the retry worker (§8.2.4) can re-drive
|
||||||
|
// the write. We still surface 503 to the synchronous caller so the attendee
|
||||||
|
// is told to retry immediately — but the slot is now durably reserved and
|
||||||
|
// will be promoted-and-emailed (or terminally released) in the background.
|
||||||
|
//
|
||||||
|
// Public-confirm behavior (DoD: never silently confirm without a calendar
|
||||||
|
// event): the public endpoint keeps throwing ServiceUnavailable on a failed
|
||||||
|
// first write rather than returning a 'pending' booking, so the UI never
|
||||||
|
// tells the attendee "confirmed" before a calendar event exists. The pending
|
||||||
|
// booking lives on for the background retry to finish the job.
|
||||||
|
const written = await this.attemptCalendarWrite(ctx, booking)
|
||||||
|
if (!written) {
|
||||||
|
throw new ServiceUnavailableException('Could not complete the booking on the calendar — please try again.')
|
||||||
|
}
|
||||||
|
return booking
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drives the Stalwart calendar write for a pending booking and, on success,
|
||||||
|
* promotes it to 'confirmed' and fires the branded confirmation email.
|
||||||
|
*
|
||||||
|
* On failure it increments `calendarWriteAttempts`, records `lastCalendarError`
|
||||||
|
* and leaves the booking 'pending' with its SlotLock intact for a later retry.
|
||||||
|
* Returns true iff the booking is now confirmed. The `calendarEventUid` is
|
||||||
|
* generated up-front and reused across attempts, so a retry after a partial
|
||||||
|
* failure is idempotent (§9).
|
||||||
|
*/
|
||||||
|
private async attemptCalendarWrite(ctx: BookingContext, booking: BookingDocument): Promise<boolean> {
|
||||||
|
const { host, eventType } = ctx
|
||||||
try {
|
try {
|
||||||
access = await this.provisioner.resolveAccess(host)
|
const access = await this.provisioner.resolveAccess(host)
|
||||||
const { id } = await this.gateway.createEvent(access, {
|
const { id } = await this.gateway.createEvent(access, {
|
||||||
uid: calendarEventUid,
|
uid: booking.calendarEventUid,
|
||||||
title: eventType.title,
|
title: eventType.title,
|
||||||
description: input.attendeeNotes,
|
description: booking.attendeeNotes,
|
||||||
startUtc,
|
startUtc: booking.startUtc,
|
||||||
endUtc,
|
endUtc: booking.endUtc,
|
||||||
hostTimezone: host.timezone,
|
hostTimezone: host.timezone,
|
||||||
location: location.url,
|
location: booking.locationUrl,
|
||||||
hostEmail: host.email,
|
hostEmail: host.email,
|
||||||
attendeeName: input.attendeeName,
|
attendeeName: booking.attendeeName,
|
||||||
attendeeEmail: input.attendeeEmail,
|
attendeeEmail: booking.attendeeEmail,
|
||||||
})
|
})
|
||||||
booking.calendarEventId = id
|
booking.calendarEventId = id
|
||||||
booking.status = 'confirmed'
|
booking.status = 'confirmed'
|
||||||
await booking.save()
|
await booking.save()
|
||||||
} catch (err) {
|
|
||||||
// Compensate: never leave a confirmed-looking booking with no calendar event.
|
|
||||||
await this.lockModel.deleteOne({ hostId: host._id, startUtc, bookingId: booking._id }).exec()
|
|
||||||
await this.bookingModel.deleteOne({ _id: booking._id }).exec()
|
|
||||||
this.logger.error(`Calendar write failed for ${host.email}: ${(err as Error).message}`)
|
|
||||||
throw new ServiceUnavailableException('Could not complete the booking on the calendar — please try again.')
|
|
||||||
}
|
|
||||||
|
|
||||||
// (e) Branded confirmation email — best-effort (booking already valid).
|
// Branded confirmation email — best-effort (booking already valid).
|
||||||
this.sendEmail(ctx, booking, access, 'confirmation').catch((e) =>
|
this.sendEmail(ctx, booking, access, 'confirmation').catch((e) =>
|
||||||
this.logger.warn(`Confirmation email failed for ${booking.attendeeEmail}: ${e.message}`),
|
this.logger.warn(`Confirmation email failed for ${booking.attendeeEmail}: ${e.message}`),
|
||||||
)
|
)
|
||||||
|
return true
|
||||||
|
} catch (err) {
|
||||||
|
booking.calendarWriteAttempts = (booking.calendarWriteAttempts ?? 0) + 1
|
||||||
|
booking.lastCalendarError = (err as Error).message
|
||||||
|
await booking.save().catch(() => undefined)
|
||||||
|
this.logger.error(
|
||||||
|
`Calendar write failed for ${host.email} (booking ${booking._id}, attempt ${booking.calendarWriteAttempts}): ${(err as Error).message}`,
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return booking
|
// ── Retry queue (§8.2.4) ─────────────────────────────────────────────────────
|
||||||
|
/**
|
||||||
|
* Re-drive the calendar write for a single pending booking. On success the
|
||||||
|
* booking is promoted to confirmed and the confirmation email is sent. Once
|
||||||
|
* attempts reach `maxAttempts` the booking moves to the terminal
|
||||||
|
* 'calendar_failed' state and its SlotLock is released so the slot frees up.
|
||||||
|
* Returns the booking's resulting status. Invoked by the retry worker.
|
||||||
|
*/
|
||||||
|
async retryPendingCalendarWrite(booking: BookingDocument, maxAttempts: number): Promise<BookingStatus> {
|
||||||
|
const tenant = await this.tenantModel.findById(booking.tenantId).exec()
|
||||||
|
const host = await this.hostModel.findById(booking.hostId).exec()
|
||||||
|
const eventType = await this.eventTypeModel.findById(booking.eventTypeId).exec()
|
||||||
|
if (!tenant || !host || !eventType) {
|
||||||
|
this.logger.warn(`Retry skipped — missing tenant/host/eventType for booking ${booking._id}`)
|
||||||
|
return booking.status
|
||||||
|
}
|
||||||
|
|
||||||
|
const ctx: BookingContext = {
|
||||||
|
tenant: { _id: tenant._id, slug: tenant.slug, name: tenant.name, brandColor: tenant.brandColor },
|
||||||
|
host,
|
||||||
|
eventType,
|
||||||
|
}
|
||||||
|
|
||||||
|
const ok = await this.attemptCalendarWrite(ctx, booking)
|
||||||
|
if (ok) return booking.status
|
||||||
|
|
||||||
|
if (booking.calendarWriteAttempts >= maxAttempts) {
|
||||||
|
booking.status = 'calendar_failed'
|
||||||
|
await booking.save().catch(() => undefined)
|
||||||
|
await this.lockModel
|
||||||
|
.deleteOne({ hostId: booking.hostId, startUtc: booking.startUtc, bookingId: booking._id })
|
||||||
|
.exec()
|
||||||
|
.catch(() => undefined)
|
||||||
|
this.logger.error(
|
||||||
|
`Booking ${booking._id} reached max calendar-write attempts (${maxAttempts}); marked calendar_failed and released its slot lock. Last error: ${booking.lastCalendarError}`,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return booking.status
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pending bookings still eligible for a calendar-write retry (under the cap).
|
||||||
|
findPendingForRetry(maxAttempts: number, limit = 100): Promise<BookingDocument[]> {
|
||||||
|
return this.bookingModel
|
||||||
|
.find({ status: 'pending', calendarWriteAttempts: { $lt: maxAttempts } })
|
||||||
|
.sort({ createdAt: 1 })
|
||||||
|
.limit(limit)
|
||||||
|
.exec()
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Manage / cancel / reschedule ───────────────────────────────────────────
|
// ── Manage / cancel / reschedule ───────────────────────────────────────────
|
||||||
|
|||||||
@@ -0,0 +1,41 @@
|
|||||||
|
import { Injectable, Logger } from '@nestjs/common'
|
||||||
|
import { ConfigService } from '@nestjs/config'
|
||||||
|
import { Cron } from '@nestjs/schedule'
|
||||||
|
import { BookingsService } from './bookings.service.js'
|
||||||
|
|
||||||
|
// Calendar-write retry queue (§8.2.4). When the synchronous Stalwart calendar
|
||||||
|
// write fails during confirmation, the booking is left 'pending' with its
|
||||||
|
// SlotLock retained (rather than deleted). This cron periodically re-drives the
|
||||||
|
// write for those pending bookings: on success the booking is promoted to
|
||||||
|
// 'confirmed' and the confirmation email is sent; once attempts hit the cap the
|
||||||
|
// booking moves to the terminal 'calendar_failed' state and its lock is
|
||||||
|
// released. The per-booking attempt counter (and idempotent calendarEventUid)
|
||||||
|
// lives on the Booking, so this worker is safe to run repeatedly. Max attempts
|
||||||
|
// is configurable via SCHEDULING_CALENDAR_RETRY_MAX (default 5).
|
||||||
|
@Injectable()
|
||||||
|
export class CalendarRetryWorker {
|
||||||
|
private readonly logger = new Logger(CalendarRetryWorker.name)
|
||||||
|
private readonly maxAttempts: number
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
private readonly bookings: BookingsService,
|
||||||
|
config: ConfigService,
|
||||||
|
) {
|
||||||
|
const raw = Number(config.get<string>('SCHEDULING_CALENDAR_RETRY_MAX'))
|
||||||
|
this.maxAttempts = Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : 5
|
||||||
|
}
|
||||||
|
|
||||||
|
// Every 2 minutes (@nestjs/schedule has no EVERY_2_MINUTES preset).
|
||||||
|
@Cron('*/2 * * * *', { name: 'calendar-write-retry' })
|
||||||
|
async run(): Promise<void> {
|
||||||
|
const pending = await this.bookings.findPendingForRetry(this.maxAttempts)
|
||||||
|
if (pending.length === 0) return
|
||||||
|
|
||||||
|
this.logger.log(`Retrying calendar writes for ${pending.length} pending booking(s)`)
|
||||||
|
for (const booking of pending) {
|
||||||
|
await this.bookings
|
||||||
|
.retryPendingCalendarWrite(booking, this.maxAttempts)
|
||||||
|
.catch((e) => this.logger.warn(`Calendar retry failed for booking ${booking._id}: ${e.message}`))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -14,6 +14,7 @@ import { User, UserSchema } from '../schemas/user.schema.js'
|
|||||||
import { TenantsModule } from '../tenants/tenants.module.js'
|
import { TenantsModule } from '../tenants/tenants.module.js'
|
||||||
import { AvailabilityService } from './availability/availability.service.js'
|
import { AvailabilityService } from './availability/availability.service.js'
|
||||||
import { BookingsService } from './bookings/bookings.service.js'
|
import { BookingsService } from './bookings/bookings.service.js'
|
||||||
|
import { CalendarRetryWorker } from './bookings/calendar-retry.worker.js'
|
||||||
import { JmapMailer } from './email/jmap-mailer.service.js'
|
import { JmapMailer } from './email/jmap-mailer.service.js'
|
||||||
import { EventTypesService } from './event-types/event-types.service.js'
|
import { EventTypesService } from './event-types/event-types.service.js'
|
||||||
import { HostsService } from './hosts/hosts.service.js'
|
import { HostsService } from './hosts/hosts.service.js'
|
||||||
@@ -59,6 +60,7 @@ import { StalwartCalendarModule } from './stalwart-calendar/stalwart-calendar.mo
|
|||||||
PublicSchedulingService,
|
PublicSchedulingService,
|
||||||
JmapMailer,
|
JmapMailer,
|
||||||
BookingReminderWorker,
|
BookingReminderWorker,
|
||||||
|
CalendarRetryWorker,
|
||||||
],
|
],
|
||||||
})
|
})
|
||||||
export class SchedulingModule {}
|
export class SchedulingModule {}
|
||||||
|
|||||||
@@ -6,7 +6,9 @@ export type BookingDocument = HydratedDocument<Booking>
|
|||||||
|
|
||||||
// 'pending' is the compensating state when the SlotLock is held but the calendar
|
// 'pending' is the compensating state when the SlotLock is held but the calendar
|
||||||
// write hasn't succeeded yet (§8.2.4) — never surfaced as a confirmed booking.
|
// write hasn't succeeded yet (§8.2.4) — never surfaced as a confirmed booking.
|
||||||
export type BookingStatus = 'pending' | 'confirmed' | 'cancelled' | 'rescheduled'
|
// 'calendar_failed' is the terminal state once the retry worker exhausts its
|
||||||
|
// attempts; the SlotLock is released so the slot frees up.
|
||||||
|
export type BookingStatus = 'pending' | 'confirmed' | 'cancelled' | 'rescheduled' | 'calendar_failed'
|
||||||
|
|
||||||
// A confirmed appointment. All instants are UTC; attendee/host tz are IANA
|
// A confirmed appointment. All instants are UTC; attendee/host tz are IANA
|
||||||
// strings for display. `calendarEventUid` is generated client-side BEFORE the
|
// strings for display. `calendarEventUid` is generated client-side BEFORE the
|
||||||
@@ -23,7 +25,7 @@ export class Booking {
|
|||||||
@Prop({ type: Types.ObjectId, ref: 'Host', required: true, index: true })
|
@Prop({ type: Types.ObjectId, ref: 'Host', required: true, index: true })
|
||||||
hostId!: Types.ObjectId
|
hostId!: Types.ObjectId
|
||||||
|
|
||||||
@Prop({ enum: ['pending', 'confirmed', 'cancelled', 'rescheduled'], default: 'pending', index: true })
|
@Prop({ enum: ['pending', 'confirmed', 'cancelled', 'rescheduled', 'calendar_failed'], default: 'pending', index: true })
|
||||||
status!: BookingStatus
|
status!: BookingStatus
|
||||||
|
|
||||||
@Prop({ required: true, index: true })
|
@Prop({ required: true, index: true })
|
||||||
@@ -84,6 +86,17 @@ export class Booking {
|
|||||||
// then atomically appends it — making reminders idempotent across runs.
|
// then atomically appends it — making reminders idempotent across runs.
|
||||||
@Prop({ type: [Number], default: [] })
|
@Prop({ type: [Number], default: [] })
|
||||||
sentReminderOffsets!: number[]
|
sentReminderOffsets!: number[]
|
||||||
|
|
||||||
|
// Calendar-write retry bookkeeping (§8.2.4). When the synchronous Stalwart
|
||||||
|
// write fails, the booking stays 'pending' with its SlotLock retained and the
|
||||||
|
// retry worker drives `calendarWriteAttempts` up to the configured max. The
|
||||||
|
// last error is kept for diagnostics; on terminal failure status becomes
|
||||||
|
// 'calendar_failed' and the lock is released.
|
||||||
|
@Prop({ default: 0 })
|
||||||
|
calendarWriteAttempts!: number
|
||||||
|
|
||||||
|
@Prop({ trim: true })
|
||||||
|
lastCalendarError?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export const BookingSchema = SchemaFactory.createForClass(Booking)
|
export const BookingSchema = SchemaFactory.createForClass(Booking)
|
||||||
|
|||||||
Reference in New Issue
Block a user