From d70582411ebbe5bd6e44fa8cf4adf8cff034e3ac Mon Sep 17 00:00:00 2001 From: Arik Chakma Date: Wed, 20 Aug 2025 13:57:24 +0600 Subject: [PATCH] chore: sync repo to database --- .github/workflows/sync-content-to-repo.yml | 16 +- .github/workflows/sync-repo-to-database.yml | 66 +++++++ package.json | 1 + scripts/sync-content-to-repo.ts | 7 +- scripts/sync-repo-to-database.ts | 196 ++++++++++++++++++++ src/queries/official-roadmap.ts | 5 +- 6 files changed, 284 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/sync-repo-to-database.yml create mode 100644 scripts/sync-repo-to-database.ts diff --git a/.github/workflows/sync-content-to-repo.yml b/.github/workflows/sync-content-to-repo.yml index 4313d0f1f..a4372a87b 100644 --- a/.github/workflows/sync-content-to-repo.yml +++ b/.github/workflows/sync-content-to-repo.yml @@ -28,14 +28,26 @@ jobs: - name: Install Dependencies and Sync Content run: | + echo "Installing Dependencies" pnpm install - npm run sync:content-to-repo --roadmap-slug=${{ inputs.roadmap_slug }} --secret=${{ secrets.SYNC_CONTENT_TO_REPO_SECRET }} + echo "Syncing Content to Repo" + npm run sync:content-to-repo -- --roadmap-slug=${{ inputs.roadmap_slug }} --secret=${{ secrets.TOPIC_CONTENT_SYNC_SECRET }} + + - name: Check for changes + id: verify-changed-files + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "changed=true" >> $GITHUB_OUTPUT + else + echo "changed=false" >> $GITHUB_OUTPUT + fi - name: Create PR + if: steps.verify-changed-files.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: delete-branch: false - branch: "chore/sync-content-to-repo" + branch: "chore/sync-content-to-repo-${{ inputs.roadmap_slug }}" base: "master" labels: | dependencies diff --git a/.github/workflows/sync-repo-to-database.yml b/.github/workflows/sync-repo-to-database.yml new file mode 100644 index 000000000..482e1aec7 --- /dev/null +++ b/.github/workflows/sync-repo-to-database.yml @@ -0,0 +1,66 @@ +name: Sync on Roadmap Changes + +on: + push: + branches: + - master + paths: + - 'src/data/roadmaps/**' + +jobs: + sync-on-changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 2 # Fetch previous commit to compare changes + + - name: Setup pnpm@v9 + uses: pnpm/action-setup@v4 + with: + version: 9 + run_install: false + + - name: Setup Node.js Version 20 (LTS) + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'pnpm' + + - name: Get changed files + id: changed-files + run: | + echo "Getting changed files in /src/data/roadmaps/" + + # Get changed files between HEAD and previous commit + CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD -- src/data/roadmaps/) + + if [ -z "$CHANGED_FILES" ]; then + echo "No changes found in roadmaps directory" + echo "has_changes=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "Changed files:" + echo "$CHANGED_FILES" + + # Convert to space-separated list for the script + CHANGED_FILES_LIST=$(echo "$CHANGED_FILES" | tr '\n' ' ') + + echo "has_changes=true" >> $GITHUB_OUTPUT + echo "changed_files=$CHANGED_FILES_LIST" >> $GITHUB_OUTPUT + + - name: Install Dependencies + if: steps.changed-files.outputs.has_changes == 'true' + run: | + echo "Installing Dependencies" + pnpm install + + - name: Run sync script with changed files + if: steps.changed-files.outputs.has_changes == 'true' + run: | + echo "Running sync script for changed roadmap files" + echo "Changed files: ${{ steps.changed-files.outputs.changed_files }}" + + # Run your script with the changed file paths + npm run sync:repo-to-database -- --files="${{ steps.changed-files.outputs.changed_files }}" --secret=${{ secrets.TOPIC_CONTENT_SYNC_SECRET }} diff --git a/package.json b/package.json index 044c518d2..69c90e7df 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "generate:roadmap-content-json": "tsx ./scripts/editor-roadmap-content-json.ts", "migrate:editor-roadmaps": "tsx ./scripts/migrate-editor-roadmap.ts", "sync:content-to-repo": "tsx ./scripts/sync-content-to-repo.ts", + "sync:repo-to-database": "tsx ./scripts/sync-repo-to-database.ts", "test:e2e": "playwright test" }, "dependencies": { diff --git a/scripts/sync-content-to-repo.ts b/scripts/sync-content-to-repo.ts index 158f568eb..8cc61898f 100644 --- a/scripts/sync-content-to-repo.ts +++ b/scripts/sync-content-to-repo.ts @@ -19,6 +19,7 @@ if (!roadmapSlug || roadmapSlug === '__default__') { console.log(`🚀 Starting ${roadmapSlug}`); export const allowedOfficialRoadmapTopicResourceType = [ + 'roadmap', 'official', 'opensource', 'article', @@ -26,11 +27,12 @@ export const allowedOfficialRoadmapTopicResourceType = [ 'podcast', 'video', 'book', + 'feed', ] as const; export type AllowedOfficialRoadmapTopicResourceType = (typeof allowedOfficialRoadmapTopicResourceType)[number]; -type OfficialRoadmapTopicResource = { +export type OfficialRoadmapTopicResource = { _id?: string; type: AllowedOfficialRoadmapTopicResourceType; title: string; @@ -97,8 +99,7 @@ for (const topic of allTopics) { function prepareTopicContent(topic: OfficialRoadmapTopicContentDocument) { const { description, resources = [] } = topic; - const content = ` -${description} + const content = `${description} Visit the following resources to learn more: diff --git a/scripts/sync-repo-to-database.ts b/scripts/sync-repo-to-database.ts new file mode 100644 index 000000000..f1e05a75a --- /dev/null +++ b/scripts/sync-repo-to-database.ts @@ -0,0 +1,196 @@ +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { slugify } from '../src/lib/slugger'; +import type { OfficialRoadmapDocument } from '../src/queries/official-roadmap'; +import { parse } from 'node-html-parser'; +import { markdownToHtml } from '../src/lib/markdown'; +import { htmlToMarkdown } from '../src/lib/html'; +import type { + OfficialRoadmapTopicContentDocument, + OfficialRoadmapTopicResource, +} from './sync-content-to-repo'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +const args = process.argv.slice(2); +const allFiles = args?.[0]?.replace('--files=', ''); +const secret = args?.[1]?.replace('--secret=', ''); +if (!secret) { + throw new Error('Secret is required'); +} + +let roadmapJsonCache: Map = new Map(); +export async function fetchRoadmapJson( + roadmapId: string, +): Promise { + if (roadmapJsonCache.has(roadmapId)) { + return roadmapJsonCache.get(roadmapId)!; + } + + const response = await fetch( + `https://roadmap.sh/api/v1-official-roadmap/${roadmapId}`, + ); + + if (!response.ok) { + throw new Error(`Failed to fetch roadmap json: ${response.statusText}`); + } + + const data = await response.json(); + if (data.error) { + throw new Error(`Failed to fetch roadmap json: ${data.error}`); + } + + roadmapJsonCache.set(roadmapId, data); + return data; +} + +export const allowedOfficialRoadmapTopicResourceType = [ + 'official', + 'opensource', + 'article', + 'course', + 'podcast', + 'video', + 'book', + 'feed', +] as const; +export type AllowedOfficialRoadmapTopicResourceType = + (typeof allowedOfficialRoadmapTopicResourceType)[number]; + +const files = allFiles.split(' '); +console.log(`🚀 Starting ${files.length} files`); + +const ROADMAP_CONTENT_DIR = path.join(__dirname, '../src/data/roadmaps'); + +const topics: Omit< + OfficialRoadmapTopicContentDocument, + 'createdAt' | 'updatedAt' | '_id' +>[] = []; + +for (const file of files) { + const isContentFile = file.endsWith('.md') && file.includes('content/'); + if (!isContentFile) { + console.log(`🚨 Skipping ${file} because it is not a content file`); + continue; + } + + const pathParts = file.replace('src/data/roadmaps/', '').split('/'); + const roadmapSlug = pathParts?.[0]; + if (!roadmapSlug) { + console.error(`🚨 Roadmap slug is required: ${file}`); + continue; + } + const nodeSlug = pathParts?.[2]?.replace('.md', ''); + if (!nodeSlug) { + console.error(`🚨 Node id is required: ${file}`); + continue; + } + + const nodeId = nodeSlug.split('@')?.[1]; + if (!nodeId) { + console.error(`🚨 Node id is required: ${file}`); + continue; + } + + const roadmap = await fetchRoadmapJson(roadmapSlug); + const node = roadmap.nodes.find((node) => node.id === nodeId); + if (!node) { + console.error(`🚨 Node not found: ${file}`); + continue; + } + + const filePath = path.join( + ROADMAP_CONTENT_DIR, + roadmapSlug, + 'content', + `${nodeSlug}.md`, + ); + + const content = await fs.readFile(filePath, 'utf8'); + const html = markdownToHtml(content, false); + const rootHtml = parse(html); + + let ulWithLinks: HTMLElement | undefined; + rootHtml.querySelectorAll('ul').forEach((ul) => { + const listWithJustLinks = Array.from(ul.querySelectorAll('li')).filter( + (li) => { + const link = li.querySelector('a'); + return link && link.textContent?.trim() === li.textContent?.trim(); + }, + ); + + if (listWithJustLinks.length > 0) { + // @ts-expect-error - TODO: fix this + ulWithLinks = ul; + } + }); + + const listLinks: Omit[] = + ulWithLinks !== undefined + ? Array.from(ulWithLinks.querySelectorAll('li > a')) + .map((link) => { + const typePattern = /@([a-z.]+)@/; + let linkText = link.textContent || ''; + const linkHref = link.getAttribute('href') || ''; + let linkType = linkText.match(typePattern)?.[1] || 'article'; + linkType = allowedOfficialRoadmapTopicResourceType.includes( + linkType as any, + ) + ? linkType + : 'article'; + + linkText = linkText.replace(typePattern, ''); + + return { + title: linkText, + url: linkHref, + type: linkType as AllowedOfficialRoadmapTopicResourceType, + }; + }) + .sort((a, b) => { + const order = [ + 'official', + 'opensource', + 'article', + 'video', + 'feed', + ]; + return order.indexOf(a.type) - order.indexOf(b.type); + }) + : []; + + const title = rootHtml.querySelector('h1'); + ulWithLinks?.remove(); + title?.remove(); + + if (listLinks.length > 0) { + const lastParagraph = rootHtml.querySelector('p:last-child'); + console.log(lastParagraph?.textContent); + lastParagraph?.remove(); + } + + const htmlStringWithoutLinks = rootHtml.toString(); + const description = htmlToMarkdown(htmlStringWithoutLinks); + + const updatedDescription = `# ${title?.textContent} + +${description}`.trim(); + + const label = node?.data?.label as string; + if (!label) { + console.error(`🚨 Label is required: ${file}`); + continue; + } + + topics.push({ + roadmapSlug, + nodeId, + title: label, + description: updatedDescription, + resources: listLinks, + }); +} + +console.log(JSON.stringify(topics, null, 2)); diff --git a/src/queries/official-roadmap.ts b/src/queries/official-roadmap.ts index 73fd73d68..349abb946 100644 --- a/src/queries/official-roadmap.ts +++ b/src/queries/official-roadmap.ts @@ -1,13 +1,14 @@ import { queryOptions } from '@tanstack/react-query'; import { httpGet } from '../lib/query-http'; +import type { Node, Edge } from '@roadmapsh/editor'; export interface OfficialRoadmapDocument { _id: string; title: string; description?: string; slug: string; - nodes: any[]; - edges: any[]; + nodes: Node[]; + edges: Edge[]; createdAt: Date; updatedAt: Date;