Press n or j to go to the next uncovered block, b, p or k for the previous block.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | 1x 1x 1x 1x 1x 2x 2x 2x 2x 2x 6x 3x 2x 2x 1x 3x 3x 2x | import { load } from 'cheerio'; import marked from 'marked'; import { MarkdownParsedContent, MarkdownMetadataContent, MarkdownExtractorResult, MarkdownExtractorOptions, } from './interface'; import { extract } from './metadata-extractor'; const defaultOptions: MarkdownExtractorOptions = { selectors: [], metadataDelimiter: '---', cheerioOptions: { ignoreWhitespace: true, lowerCaseTags: true, lowerCaseAttributeNames: true, xmlMode: false, }, }; /** * Parse a markdown text and extract parts of it using DOM selectors. * The markdown can also contain a metadata section on top which will be extracted separately as a metadata section * * @param data Markdown text as string * @param options Markdown extractor options * @param options.selectors An array of jquery style dom selectors for which data will be automatically extracted * from the markdown. Extraction can be done as html or text only. * @param options.metadataDelimiter The delimiter demarking the metadata section of the markdown. Defaults to `---` * @param options.cheerioOptions Internally we use cheerio to parse the html. * You can freely configure it by setting the options here. */ export function parseMarkdown(data: string, options?: MarkdownExtractorOptions): MarkdownExtractorResult { const opt: MarkdownExtractorOptions = { selectors: options?.selectors || [], metadataDelimiter: options?.metadataDelimiter || defaultOptions.metadataDelimiter, cheerioOptions: { ...defaultOptions.cheerioOptions, ...(options?.cheerioOptions || {}) }, }; const parsed: MarkdownMetadataContent = extract(data, opt.metadataDelimiter); const html: string = marked(parsed.content); const $ = load(html, opt.cheerioOptions); const content: MarkdownParsedContent = opt.selectors .map(domSelector => { const { selector, parseHtml } = domSelector; if (parseHtml) { const parsedContent = $.html($(selector).next()); return { selector, content: parsedContent, }; } return { selector, content: $(selector).next().text().trim(), }; }) .reduce((obj, val) => { Object.assign(obj, { [val.selector]: val.content }); return obj; }, {} as MarkdownParsedContent); return { metadata: parsed.metadata, content, html, }; } |