feat(provisioning): orchestrate Authentik/Stalwart/OCIS on tenant create

Phase 4 from docs/NEXT-STEPS.md. POST /tenants now writes Mongo AND drives
external service provisioning. A new POST /tenants/:slug/reconcile endpoint
retries the orchestration — useful when an upstream was down at create time
or external state drifted out of band.

Integration clients (services/provisioning/src/integrations/):
- AuthentikClient: real implementation. ensureGroup() is idempotent — looks
  up the group by name, creates if missing, returns either way. Group
  attributes record the tenant slug + Mongo id so we can trace back
- StalwartClient: stubbed. v0.16 removed the REST management API in favor
  of JMAP, which is significantly more work to wrap. TODO comment points
  to https://stalw.art/docs/api/management/overview for the follow-up
- OcisClient: stubbed. Needs libregraph /drives endpoint with service-to-
  service auth via OIDC client_credentials

Orchestration (provisioning.service.ts):
- Each step runs independently; one failure doesn't roll back the others
- Per-step state recorded on Tenant.provisioningStatus (ok/skipped/error/
  pending) plus error message on Tenant.provisioningErrors
- Steps return their own terminal state — 'skipped' for stubs, void
  defaults to 'ok' for real integrations
- Mongoose markModified() required for nested subdoc mutations to persist
- Tenant auto-flips status: pending → active when all steps are ok|skipped

Portal proxy routes (apps/portal/server/api/tenants/):
- POST /api/tenants and POST /api/tenants/:slug/reconcile forward the
  signed-in user's access token to the provisioning service. Lets the
  browser drive provisioning without minting tokens by hand. Will be
  replaced by a real "create workspace" flow with UI later

docker-compose: AUTHENTIK_API_URL/STALWART_API_URL/OCIS_API_URL now point
at the public Traefik-routed hostnames (with mkcert CA mounted into the
provisioning container so Node fetch trusts them). Previously these
pointed at internal Docker hostnames which doesn't work for Authentik
because of TLS issuer mismatch against the JWT.
This commit is contained in:
Ronni Baslund
2026-05-24 00:06:40 +02:00
parent 4bf6a85517
commit 28766b80c2
12 changed files with 357 additions and 6 deletions
@@ -0,0 +1,15 @@
import { getUserSession } from 'nuxt-oidc-auth/runtime/server/utils/session.js'
export default defineEventHandler(async (event) => {
const session = await getUserSession(event).catch(() => null)
const accessToken = (session as { accessToken?: string } | null)?.accessToken
if (!accessToken) {
throw createError({ statusCode: 401, statusMessage: 'Not signed in' })
}
const slug = getRouterParam(event, 'slug')
const base = process.env.PROVISIONING_INTERNAL_URL ?? 'http://provisioning:3001'
return $fetch(`${base}/tenants/${slug}/reconcile`, {
method: 'POST',
headers: { Authorization: `Bearer ${accessToken}` },
})
})
@@ -0,0 +1,23 @@
// Dev/scaffolding: proxies POST /tenants to the provisioning service with the
// logged-in user's access token. Lets you create a tenant from the browser
// without minting tokens by hand. Will be replaced by a real "create workspace"
// flow with proper UI later.
import { getUserSession } from 'nuxt-oidc-auth/runtime/server/utils/session.js'
export default defineEventHandler(async (event) => {
const session = await getUserSession(event).catch(() => null)
const accessToken = (session as { accessToken?: string } | null)?.accessToken
if (!accessToken) {
throw createError({ statusCode: 401, statusMessage: 'Not signed in' })
}
const body = await readBody(event)
const base = process.env.PROVISIONING_INTERNAL_URL ?? 'http://provisioning:3001'
return $fetch(`${base}/tenants`, {
method: 'POST',
headers: { Authorization: `Bearer ${accessToken}` },
body,
})
})
@@ -401,12 +401,12 @@ services:
NODE_ENV: development NODE_ENV: development
PORT: 3001 PORT: 3001
MONGODB_URI: mongodb://root:${MONGO_ROOT_PASSWORD}@mongo:27017/dezky?authSource=admin MONGODB_URI: mongodb://root:${MONGO_ROOT_PASSWORD}@mongo:27017/dezky?authSource=admin
AUTHENTIK_API_URL: http://authentik-server:9000/api/v3 AUTHENTIK_API_URL: https://auth.dezky.local/api/v3
AUTHENTIK_API_TOKEN: ${AUTHENTIK_BOOTSTRAP_TOKEN} AUTHENTIK_API_TOKEN: ${AUTHENTIK_BOOTSTRAP_TOKEN}
STALWART_API_URL: http://stalwart:8080 STALWART_API_URL: https://mail.dezky.local
STALWART_ADMIN_USER: admin STALWART_ADMIN_USER: admin
STALWART_ADMIN_PASSWORD: ${STALWART_ADMIN_PASSWORD} STALWART_ADMIN_PASSWORD: ${STALWART_ADMIN_PASSWORD}
OCIS_API_URL: http://ocis:9200 OCIS_API_URL: https://files.dezky.local
# JWT validation against Authentik for portal-issued access tokens # JWT validation against Authentik for portal-issued access tokens
AUTHENTIK_ISSUER: https://auth.dezky.local/application/o/dezky-portal/ AUTHENTIK_ISSUER: https://auth.dezky.local/application/o/dezky-portal/
AUTHENTIK_AUDIENCE: dezky-portal AUTHENTIK_AUDIENCE: dezky-portal
@@ -0,0 +1,72 @@
import { Injectable, Logger } from '@nestjs/common'
import { ConfigService } from '@nestjs/config'
interface AuthentikGroup {
pk: string
name: string
attributes?: Record<string, unknown>
}
// Thin wrapper around the Authentik API for the operations the provisioning
// service needs. We never expose raw Authentik errors to API callers — they
// surface as provisioningErrors.authentik strings.
@Injectable()
export class AuthentikClient {
private readonly logger = new Logger(AuthentikClient.name)
private readonly base: string
private readonly token: string
constructor(config: ConfigService) {
this.base = config.getOrThrow<string>('AUTHENTIK_API_URL')
this.token = config.getOrThrow<string>('AUTHENTIK_API_TOKEN')
}
private async request<T>(path: string, init: RequestInit = {}): Promise<T> {
const res = await fetch(`${this.base}${path}`, {
...init,
headers: {
Authorization: `Bearer ${this.token}`,
'Content-Type': 'application/json',
Accept: 'application/json',
...(init.headers ?? {}),
},
})
if (!res.ok) {
const body = await res.text().catch(() => '')
throw new Error(`Authentik ${init.method ?? 'GET'} ${path}${res.status}: ${body.slice(0, 200)}`)
}
return (await res.json()) as T
}
// Idempotent: returns existing group if name is taken, creates otherwise.
async ensureGroup(slug: string, attributes: Record<string, unknown> = {}): Promise<AuthentikGroup> {
const search = await this.request<{ results: AuthentikGroup[] }>(
`/core/groups/?name=${encodeURIComponent(slug)}`,
)
if (search.results.length > 0) {
this.logger.log(`Authentik group "${slug}" already exists (pk=${search.results[0].pk})`)
return search.results[0]
}
const created = await this.request<AuthentikGroup>('/core/groups/', {
method: 'POST',
body: JSON.stringify({
name: slug,
attributes: { role: 'tenant', slug, ...attributes },
}),
})
this.logger.log(`Created Authentik group "${slug}" (pk=${created.pk})`)
return created
}
async deleteGroup(groupId: string): Promise<void> {
const res = await fetch(`${this.base}/core/groups/${groupId}/`, {
method: 'DELETE',
headers: { Authorization: `Bearer ${this.token}` },
})
if (!res.ok && res.status !== 404) {
const body = await res.text().catch(() => '')
throw new Error(`Authentik DELETE group ${groupId}${res.status}: ${body.slice(0, 200)}`)
}
this.logger.log(`Deleted Authentik group ${groupId}`)
}
}
@@ -0,0 +1,10 @@
import { Module } from '@nestjs/common'
import { AuthentikClient } from './authentik.client.js'
import { OcisClient } from './ocis.client.js'
import { StalwartClient } from './stalwart.client.js'
@Module({
providers: [AuthentikClient, StalwartClient, OcisClient],
exports: [AuthentikClient, StalwartClient, OcisClient],
})
export class IntegrationsModule {}
@@ -0,0 +1,26 @@
import { Injectable, Logger } from '@nestjs/common'
import { ConfigService } from '@nestjs/config'
// OCIS provisioning is stubbed for now. Real implementation needs:
// 1. Service-to-service auth via OIDC client_credentials (or admin user)
// 2. Call the libregraph /graph/v1.0/drives endpoint to create a project space
// 3. Assign the space to the tenant's group / users
// Phase 4 ships the orchestration; OCIS hooks up in a follow-up.
@Injectable()
export class OcisClient {
private readonly logger = new Logger(OcisClient.name)
private readonly base: string
constructor(config: ConfigService) {
this.base = config.getOrThrow<string>('OCIS_API_URL')
}
async ensureSpace(slug: string): Promise<{ id: string }> {
this.logger.warn(`OCIS space provisioning is stubbed — would create space for "${slug}" at ${this.base}`)
return { id: `stub-${slug}` }
}
async deleteSpace(spaceId: string): Promise<void> {
this.logger.warn(`OCIS space delete is stubbed — would delete ${spaceId} at ${this.base}`)
}
}
@@ -0,0 +1,30 @@
import { Injectable, Logger } from '@nestjs/common'
import { ConfigService } from '@nestjs/config'
// Stalwart v0.16 removed the REST management API — all admin operations now go
// through the JMAP /jmap endpoint with Principal/set, Domain/set, etc. method
// calls. Implementing a JMAP client is meaningful work and out of scope for
// Phase 4. Stubbed for now; the orchestration code records this as 'skipped'.
//
// TODO (follow-up): Build a minimal JMAP client that wraps Principal/set + the
// DKIM key generation method. See https://stalw.art/docs/api/management/overview
@Injectable()
export class StalwartClient {
private readonly logger = new Logger(StalwartClient.name)
private readonly base: string
constructor(config: ConfigService) {
this.base = config.getOrThrow<string>('STALWART_API_URL')
}
async ensureDomain(domain: string, _description?: string): Promise<{ name: string }> {
this.logger.warn(
`Stalwart domain provisioning is stubbed — would create "${domain}" via JMAP at ${this.base}/jmap`,
)
return { name: domain }
}
async deleteDomain(domain: string): Promise<void> {
this.logger.warn(`Stalwart domain delete is stubbed — would delete "${domain}"`)
}
}
@@ -6,6 +6,10 @@ export type TenantDocument = HydratedDocument<Tenant>
export type TenantStatus = 'pending' | 'active' | 'suspended' | 'deleted' export type TenantStatus = 'pending' | 'active' | 'suspended' | 'deleted'
export type TenantPlan = 'mvp' | 'pro' | 'enterprise' export type TenantPlan = 'mvp' | 'pro' | 'enterprise'
// One field per external integration. 'pending' = not yet tried; 'ok' = synced;
// 'error' = last attempt failed (see provisioningErrors for detail).
export type IntegrationState = 'pending' | 'ok' | 'error' | 'skipped'
@Schema({ collection: 'tenants', timestamps: true }) @Schema({ collection: 'tenants', timestamps: true })
export class Tenant { export class Tenant {
// URL-safe identifier, also used as Authentik group name. Lowercase, hyphenated. // URL-safe identifier, also used as Authentik group name. Lowercase, hyphenated.
@@ -51,6 +55,30 @@ export class Tenant {
country?: string country?: string
contactEmail?: string contactEmail?: string
} }
// Per-integration provisioning state. Each one is updated independently when its
// upstream API call succeeds or fails — orchestration is best-effort, not atomic.
@Prop({
type: {
authentik: { type: String, enum: ['pending', 'ok', 'error', 'skipped'], default: 'pending' },
stalwart: { type: String, enum: ['pending', 'ok', 'error', 'skipped'], default: 'pending' },
ocis: { type: String, enum: ['pending', 'ok', 'error', 'skipped'], default: 'pending' },
},
default: () => ({ authentik: 'pending', stalwart: 'pending', ocis: 'pending' }),
})
provisioningStatus!: {
authentik: IntegrationState
stalwart: IntegrationState
ocis: IntegrationState
}
// Last error message per integration. Cleared when a subsequent attempt succeeds.
@Prop({ type: Object, default: {} })
provisioningErrors!: {
authentik?: string
stalwart?: string
ocis?: string
}
} }
export const TenantSchema = SchemaFactory.createForClass(Tenant) export const TenantSchema = SchemaFactory.createForClass(Tenant)
@@ -0,0 +1,119 @@
import { Injectable, Logger } from '@nestjs/common'
import { InjectModel } from '@nestjs/mongoose'
import { Model } from 'mongoose'
import { AuthentikClient } from '../integrations/authentik.client.js'
import { OcisClient } from '../integrations/ocis.client.js'
import { StalwartClient } from '../integrations/stalwart.client.js'
import {
IntegrationState,
Tenant,
TenantDocument,
} from '../schemas/tenant.schema.js'
// Orchestrates provisioning across Authentik / Stalwart / OCIS. Each step is
// independent — one failure doesn't roll back the others — and the per-step
// status is recorded on the tenant document so the operation is idempotent
// when retried.
@Injectable()
export class ProvisioningService {
private readonly logger = new Logger(ProvisioningService.name)
constructor(
@InjectModel(Tenant.name) private readonly tenantModel: Model<TenantDocument>,
private readonly authentik: AuthentikClient,
private readonly stalwart: StalwartClient,
private readonly ocis: OcisClient,
) {}
// Runs all integrations and writes back per-step state. Returns the refreshed
// tenant doc so the controller can return it to the caller.
async reconcile(tenant: TenantDocument): Promise<TenantDocument> {
this.logger.log(`Reconciling tenant "${tenant.slug}"`)
await this.runStep(tenant, 'authentik', async () => {
const group = await this.authentik.ensureGroup(tenant.slug, { tenantId: tenant.id })
tenant.authentikGroupId = String(group.pk)
})
// Stalwart + OCIS are stubbed — the upstream call no-ops and we record the
// honest 'skipped' state by returning it from the step.
await this.runStep(tenant, 'stalwart', async () => {
const domain = this.domainFor(tenant.slug)
await this.stalwart.ensureDomain(domain, `Mail domain for tenant ${tenant.slug}`)
tenant.stalwartDomain = domain
return 'skipped'
})
await this.runStep(tenant, 'ocis', async () => {
const space = await this.ocis.ensureSpace(tenant.slug)
tenant.ocisSpaceId = space.id
return 'skipped'
})
// If every required integration is either 'ok' or 'skipped' (not 'error' /
// 'pending'), activate the tenant. Skipped steps don't block activation —
// they just won't have their resources wired up yet.
const keys = ['authentik', 'stalwart', 'ocis'] as const
const allSettled = keys.every((k) => {
const s = tenant.provisioningStatus[k]
return s === 'ok' || s === 'skipped'
})
if (allSettled && tenant.status === 'pending') {
tenant.status = 'active'
}
// Mongoose doesn't auto-detect mutations inside nested subdocuments — flag
// these paths as modified so the save() actually persists our changes.
tenant.markModified('provisioningStatus')
tenant.markModified('provisioningErrors')
await tenant.save()
return tenant
}
// Step returns its terminal state explicitly. Returning void means "this step
// ran a real upstream call successfully" — that's mapped to 'ok'. Returning a
// specific state ('skipped', etc.) lets stub integrations be honest about
// not actually doing the work.
private async runStep(
tenant: TenantDocument,
key: 'authentik' | 'stalwart' | 'ocis',
work: () => Promise<IntegrationState | void>,
): Promise<void> {
try {
const result = await work()
tenant.provisioningStatus[key] = result ?? 'ok'
if (tenant.provisioningErrors[key]) delete tenant.provisioningErrors[key]
} catch (err) {
const msg = (err as Error).message
tenant.provisioningStatus[key] = 'error'
tenant.provisioningErrors[key] = msg
this.logger.error(`Tenant "${tenant.slug}" — ${key} step failed: ${msg}`)
}
}
// Maps tenant slug → mail domain. Production should use a real registered
// domain (e.g. acme.dezky.com); locally we use the .local hierarchy.
private domainFor(slug: string): string {
return `${slug}.dezky.local`
}
// Best-effort cleanup. Called when a tenant is hard-deleted (not soft-deleted).
async tearDown(tenant: TenantDocument): Promise<void> {
if (tenant.authentikGroupId) {
await this.authentik.deleteGroup(tenant.authentikGroupId).catch((err) => {
this.logger.error(`Failed to delete Authentik group: ${(err as Error).message}`)
})
}
if (tenant.stalwartDomain) {
await this.stalwart.deleteDomain(tenant.stalwartDomain).catch((err) => {
this.logger.error(`Failed to delete Stalwart domain: ${(err as Error).message}`)
})
}
if (tenant.ocisSpaceId) {
await this.ocis.deleteSpace(tenant.ocisSpaceId).catch((err) => {
this.logger.error(`Failed to delete OCIS space: ${(err as Error).message}`)
})
}
}
}
@@ -75,4 +75,17 @@ export class TenantsController {
} }
await this.tenants.softDelete(slug) await this.tenants.softDelete(slug)
} }
// Manually re-run provisioning. Useful when an integration was down at create
// time, or when external state drifted (someone deleted the Authentik group
// out of band). Idempotent — already-OK steps no-op.
@Post(':slug/reconcile')
async reconcile(@Param('slug') slug: string, @CurrentUser() jwt: AuthentikJwtPayload) {
const actor = await this.actor.resolve(jwt)
const tenant = await this.tenants.findOneBySlug(slug)
if (!actor.platformAdmin && !actor.tenantIds.some((id) => id.equals(tenant._id))) {
throw new ForbiddenException(`No access to tenant "${slug}"`)
}
return this.tenants.reconcile(slug)
}
} }
@@ -1,7 +1,9 @@
import { Module } from '@nestjs/common' import { Module } from '@nestjs/common'
import { MongooseModule } from '@nestjs/mongoose' import { MongooseModule } from '@nestjs/mongoose'
import { AuthModule } from '../auth/auth.module.js' import { AuthModule } from '../auth/auth.module.js'
import { IntegrationsModule } from '../integrations/integrations.module.js'
import { Tenant, TenantSchema } from '../schemas/tenant.schema.js' import { Tenant, TenantSchema } from '../schemas/tenant.schema.js'
import { ProvisioningService } from './provisioning.service.js'
import { TenantsController } from './tenants.controller.js' import { TenantsController } from './tenants.controller.js'
import { TenantsService } from './tenants.service.js' import { TenantsService } from './tenants.service.js'
@@ -9,9 +11,10 @@ import { TenantsService } from './tenants.service.js'
imports: [ imports: [
MongooseModule.forFeature([{ name: Tenant.name, schema: TenantSchema }]), MongooseModule.forFeature([{ name: Tenant.name, schema: TenantSchema }]),
AuthModule, AuthModule,
IntegrationsModule,
], ],
controllers: [TenantsController], controllers: [TenantsController],
providers: [TenantsService], providers: [TenantsService, ProvisioningService],
exports: [TenantsService], exports: [TenantsService],
}) })
export class TenantsModule {} export class TenantsModule {}
@@ -4,15 +4,27 @@ import { Model, Types } from 'mongoose'
import { Tenant, TenantDocument } from '../schemas/tenant.schema.js' import { Tenant, TenantDocument } from '../schemas/tenant.schema.js'
import type { CreateTenantDto } from './dto/create-tenant.dto.js' import type { CreateTenantDto } from './dto/create-tenant.dto.js'
import type { UpdateTenantDto } from './dto/update-tenant.dto.js' import type { UpdateTenantDto } from './dto/update-tenant.dto.js'
import { ProvisioningService } from './provisioning.service.js'
@Injectable() @Injectable()
export class TenantsService { export class TenantsService {
constructor(@InjectModel(Tenant.name) private readonly tenantModel: Model<TenantDocument>) {} constructor(
@InjectModel(Tenant.name) private readonly tenantModel: Model<TenantDocument>,
private readonly provisioning: ProvisioningService,
) {}
async create(dto: CreateTenantDto): Promise<TenantDocument> { async create(dto: CreateTenantDto): Promise<TenantDocument> {
const exists = await this.tenantModel.exists({ slug: dto.slug }) const exists = await this.tenantModel.exists({ slug: dto.slug })
if (exists) throw new ConflictException(`Tenant with slug "${dto.slug}" already exists`) if (exists) throw new ConflictException(`Tenant with slug "${dto.slug}" already exists`)
return this.tenantModel.create({ ...dto, status: 'pending' }) const tenant = await this.tenantModel.create({ ...dto, status: 'pending' })
// Provision external resources best-effort. Errors are recorded on the doc;
// the caller can re-POST or call /tenants/:slug/reconcile to retry.
return this.provisioning.reconcile(tenant)
}
async reconcile(slug: string): Promise<TenantDocument> {
const tenant = await this.findOneBySlug(slug)
return this.provisioning.reconcile(tenant)
} }
async findAll(): Promise<TenantDocument[]> { async findAll(): Promise<TenantDocument[]> {