The Source stage transforms raw Notion API responses into normalized content records. Notion's data model is block-based -- each page is a tree of blocks, where each block has a type (paragraph, heading, code, image, callout) and type-specific content. This model is powerful for editing but awkward for downstream consumption. The Source stage flattens the block tree into a structured content record with extracted properties and rendered content.
interface ContentRecord {
id: string
slug: string
title: string
subtitle: string
excerpt: string
status: 'draft' | 'review' | 'approved' | 'published'
pillar: string
type: string
tags: string[]
author: string
date: string
wordCount: number
readTime: number
content: ContentBlock[]
metadata: Record<string, unknown>
}
interface ContentBlock {
type: 'paragraph' | 'heading' | 'code' | 'image' | 'callout' | 'quote'
level?: number // for headings
language?: string // for code blocks
text: string
annotations?: {
bold: boolean
italic: boolean
code: boolean
}
}
function normalizeRecord(notionPage: any): ContentRecord {
const props = notionPage.properties
return {
id: notionPage.id,
slug: extractPlainText(props.Slug),
title: extractPlainText(props.Title),
subtitle: extractPlainText(props.Subtitle),
excerpt: extractPlainText(props.Excerpt),
status: props.Status.select?.name?.toLowerCase() ?? 'draft',
pillar: props.Pillar.select?.name ?? '',
type: props.Type.select?.name ?? '',
tags: props.Tags.multi_select?.map((t: any) => t.name) ?? [],
author: extractPlainText(props.Author),
date: props.Date.date?.start ?? '',
wordCount: props.WordCount?.number ?? 0,
readTime: props.ReadTime?.number ?? 0,
content: [], // populated by block extraction
metadata: {},
}
}
The status field deserves attention. Content moves through four lifecycle states: draft, review, approved, and published. Only content with a status of "approved" or "published" enters the Creative stage. Drafts and content under review are visible in the pipeline dashboard but are excluded from downstream processing. This gate prevents incomplete content from reaching the public web.
The status lifecycle is a quality gate, not a workflow preference. Content that has not been explicitly approved does not enter the pipeline. There are no exceptions and no overrides.