From 885e95399e083f8a98b72f05b5f670790700f83e Mon Sep 17 00:00:00 2001 From: Arik Chakma Date: Wed, 20 Aug 2025 14:38:44 +0600 Subject: [PATCH] fix: sync repo to db --- scripts/sync-repo-to-database.ts | 278 +++++++++++++++++-------------- 1 file changed, 150 insertions(+), 128 deletions(-) diff --git a/scripts/sync-repo-to-database.ts b/scripts/sync-repo-to-database.ts index f1e05a75a..b5c4f2eaa 100644 --- a/scripts/sync-repo-to-database.ts +++ b/scripts/sync-repo-to-database.ts @@ -6,9 +6,11 @@ import type { OfficialRoadmapDocument } from '../src/queries/official-roadmap'; import { parse } from 'node-html-parser'; import { markdownToHtml } from '../src/lib/markdown'; import { htmlToMarkdown } from '../src/lib/html'; -import type { - OfficialRoadmapTopicContentDocument, - OfficialRoadmapTopicResource, +import { + allowedOfficialRoadmapTopicResourceType, + type AllowedOfficialRoadmapTopicResourceType, + type OfficialRoadmapTopicContentDocument, + type OfficialRoadmapTopicResource, } from './sync-content-to-repo'; const __filename = fileURLToPath(import.meta.url); @@ -46,151 +48,171 @@ export async function fetchRoadmapJson( return data; } -export const allowedOfficialRoadmapTopicResourceType = [ - 'official', - 'opensource', - 'article', - 'course', - 'podcast', - 'video', - 'book', - 'feed', -] as const; -export type AllowedOfficialRoadmapTopicResourceType = - (typeof allowedOfficialRoadmapTopicResourceType)[number]; +export async function syncContentToDatabase( + topics: Omit< + OfficialRoadmapTopicContentDocument, + 'createdAt' | 'updatedAt' | '_id' + >[], +) { + const response = await fetch( + `https://roadmap.sh/api/v1-sync-official-roadmap-topics`, + { + method: 'POST', + body: JSON.stringify({ + topics, + secret, + }), + }, + ); + + if (!response.ok) { + throw new Error( + `Failed to sync content to database: ${response.statusText}`, + ); + } + + return response.json(); +} const files = allFiles.split(' '); console.log(`🚀 Starting ${files.length} files`); const ROADMAP_CONTENT_DIR = path.join(__dirname, '../src/data/roadmaps'); -const topics: Omit< - OfficialRoadmapTopicContentDocument, - 'createdAt' | 'updatedAt' | '_id' ->[] = []; +try { + const topics: Omit< + OfficialRoadmapTopicContentDocument, + 'createdAt' | 'updatedAt' | '_id' + >[] = []; -for (const file of files) { - const isContentFile = file.endsWith('.md') && file.includes('content/'); - if (!isContentFile) { - console.log(`🚨 Skipping ${file} because it is not a content file`); - continue; - } + for (const file of files) { + const isContentFile = file.endsWith('.md') && file.includes('content/'); + if (!isContentFile) { + console.log(`🚨 Skipping ${file} because it is not a content file`); + continue; + } - const pathParts = file.replace('src/data/roadmaps/', '').split('/'); - const roadmapSlug = pathParts?.[0]; - if (!roadmapSlug) { - console.error(`🚨 Roadmap slug is required: ${file}`); - continue; - } - const nodeSlug = pathParts?.[2]?.replace('.md', ''); - if (!nodeSlug) { - console.error(`🚨 Node id is required: ${file}`); - continue; - } + const pathParts = file.replace('src/data/roadmaps/', '').split('/'); + const roadmapSlug = pathParts?.[0]; + if (!roadmapSlug) { + console.error(`🚨 Roadmap slug is required: ${file}`); + continue; + } - const nodeId = nodeSlug.split('@')?.[1]; - if (!nodeId) { - console.error(`🚨 Node id is required: ${file}`); - continue; - } + const nodeSlug = pathParts?.[2]?.replace('.md', ''); + if (!nodeSlug) { + console.error(`🚨 Node id is required: ${file}`); + continue; + } - const roadmap = await fetchRoadmapJson(roadmapSlug); - const node = roadmap.nodes.find((node) => node.id === nodeId); - if (!node) { - console.error(`🚨 Node not found: ${file}`); - continue; - } + const nodeId = nodeSlug.split('@')?.[1]; + if (!nodeId) { + console.error(`🚨 Node id is required: ${file}`); + continue; + } - const filePath = path.join( - ROADMAP_CONTENT_DIR, - roadmapSlug, - 'content', - `${nodeSlug}.md`, - ); + const roadmap = await fetchRoadmapJson(roadmapSlug); + const node = roadmap.nodes.find((node) => node.id === nodeId); + if (!node) { + console.error(`🚨 Node not found: ${file}`); + continue; + } - const content = await fs.readFile(filePath, 'utf8'); - const html = markdownToHtml(content, false); - const rootHtml = parse(html); - - let ulWithLinks: HTMLElement | undefined; - rootHtml.querySelectorAll('ul').forEach((ul) => { - const listWithJustLinks = Array.from(ul.querySelectorAll('li')).filter( - (li) => { - const link = li.querySelector('a'); - return link && link.textContent?.trim() === li.textContent?.trim(); - }, + const filePath = path.join( + ROADMAP_CONTENT_DIR, + roadmapSlug, + 'content', + `${nodeSlug}.md`, ); - if (listWithJustLinks.length > 0) { - // @ts-expect-error - TODO: fix this - ulWithLinks = ul; + const content = await fs.readFile(filePath, 'utf8'); + const html = markdownToHtml(content, false); + const rootHtml = parse(html); + + let ulWithLinks: HTMLElement | undefined; + rootHtml.querySelectorAll('ul').forEach((ul) => { + const listWithJustLinks = Array.from(ul.querySelectorAll('li')).filter( + (li) => { + const link = li.querySelector('a'); + return link && link.textContent?.trim() === li.textContent?.trim(); + }, + ); + + if (listWithJustLinks.length > 0) { + // @ts-expect-error - TODO: fix this + ulWithLinks = ul; + } + }); + + const listLinks: Omit[] = + ulWithLinks !== undefined + ? Array.from(ulWithLinks.querySelectorAll('li > a')) + .map((link) => { + const typePattern = /@([a-z.]+)@/; + let linkText = link.textContent || ''; + const linkHref = link.getAttribute('href') || ''; + let linkType = linkText.match(typePattern)?.[1] || 'article'; + linkType = allowedOfficialRoadmapTopicResourceType.includes( + linkType as any, + ) + ? linkType + : 'article'; + + linkText = linkText.replace(typePattern, ''); + + return { + title: linkText, + url: linkHref, + type: linkType as AllowedOfficialRoadmapTopicResourceType, + }; + }) + .sort((a, b) => { + const order = [ + 'official', + 'opensource', + 'article', + 'video', + 'feed', + ]; + return order.indexOf(a.type) - order.indexOf(b.type); + }) + : []; + + const title = rootHtml.querySelector('h1'); + ulWithLinks?.remove(); + title?.remove(); + + const allParagraphs = rootHtml.querySelectorAll('p'); + if (listLinks.length > 0 && allParagraphs.length > 0) { + // to remove the view more see more from the description + const lastParagraph = allParagraphs[allParagraphs.length - 1]; + lastParagraph?.remove(); } - }); - const listLinks: Omit[] = - ulWithLinks !== undefined - ? Array.from(ulWithLinks.querySelectorAll('li > a')) - .map((link) => { - const typePattern = /@([a-z.]+)@/; - let linkText = link.textContent || ''; - const linkHref = link.getAttribute('href') || ''; - let linkType = linkText.match(typePattern)?.[1] || 'article'; - linkType = allowedOfficialRoadmapTopicResourceType.includes( - linkType as any, - ) - ? linkType - : 'article'; + const htmlStringWithoutLinks = rootHtml.toString(); + const description = htmlToMarkdown(htmlStringWithoutLinks); - linkText = linkText.replace(typePattern, ''); - - return { - title: linkText, - url: linkHref, - type: linkType as AllowedOfficialRoadmapTopicResourceType, - }; - }) - .sort((a, b) => { - const order = [ - 'official', - 'opensource', - 'article', - 'video', - 'feed', - ]; - return order.indexOf(a.type) - order.indexOf(b.type); - }) - : []; - - const title = rootHtml.querySelector('h1'); - ulWithLinks?.remove(); - title?.remove(); - - if (listLinks.length > 0) { - const lastParagraph = rootHtml.querySelector('p:last-child'); - console.log(lastParagraph?.textContent); - lastParagraph?.remove(); - } - - const htmlStringWithoutLinks = rootHtml.toString(); - const description = htmlToMarkdown(htmlStringWithoutLinks); - - const updatedDescription = `# ${title?.textContent} + const updatedDescription = `# ${title?.textContent} ${description}`.trim(); - const label = node?.data?.label as string; - if (!label) { - console.error(`🚨 Label is required: ${file}`); - continue; + const label = node?.data?.label as string; + if (!label) { + console.error(`🚨 Label is required: ${file}`); + continue; + } + + topics.push({ + roadmapSlug, + nodeId, + title: label, + description: updatedDescription, + resources: listLinks, + }); } - topics.push({ - roadmapSlug, - nodeId, - title: label, - description: updatedDescription, - resources: listLinks, - }); + await syncContentToDatabase(topics); +} catch (error) { + console.error(error); + process.exit(1); } - -console.log(JSON.stringify(topics, null, 2));