All files extractor.ts

100% Statements 18/18
100% Branches 20/20
100% Functions 3/3
100% Lines 18/18

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 771x 1x                 1x   1x                                             1x 2x           2x 2x 2x   2x   6x   3x 2x   2x           1x           3x   3x     2x            
import { load } from 'cheerio';
import marked from 'marked';
 
import {
  MarkdownParsedContent,
  MarkdownMetadataContent,
  MarkdownExtractorResult,
  MarkdownExtractorOptions,
} from './interface';
 
import { extract } from './metadata-extractor';
 
const defaultOptions: MarkdownExtractorOptions = {
  selectors: [],
  metadataDelimiter: '---',
  cheerioOptions: {
    ignoreWhitespace: true,
    lowerCaseTags: true,
    lowerCaseAttributeNames: true,
    xmlMode: false,
  },
};
 
/**
 * Parse a markdown text and extract parts of it using DOM selectors.
 * The markdown can also contain a metadata section on top which will be extracted separately as a metadata section
 *
 * @param data Markdown text as string
 * @param options Markdown extractor options
 * @param options.selectors An array of jquery style dom selectors for which data will be automatically extracted
 *                          from the markdown. Extraction can be done as html or text only.
 * @param options.metadataDelimiter The delimiter demarking the metadata section of the markdown. Defaults to `---`
 * @param options.cheerioOptions Internally we use cheerio to parse the html.
 *                               You can freely configure it by setting the options here.
 */
export function parseMarkdown(data: string, options?: MarkdownExtractorOptions): MarkdownExtractorResult {
  const opt: MarkdownExtractorOptions = {
    selectors: options?.selectors || [],
    metadataDelimiter: options?.metadataDelimiter || defaultOptions.metadataDelimiter,
    cheerioOptions: { ...defaultOptions.cheerioOptions, ...(options?.cheerioOptions || {}) },
  };
 
  const parsed: MarkdownMetadataContent = extract(data, opt.metadataDelimiter);
  const html: string = marked(parsed.content);
  const $ = load(html, opt.cheerioOptions);
 
  const content: MarkdownParsedContent = opt.selectors
    .map(domSelector => {
      const { selector, parseHtml } = domSelector;
 
      if (parseHtml) {
        const parsedContent = $.html($(selector).next());
 
        return {
          selector,
          content: parsedContent,
        };
      }
 
      return {
        selector,
        content: $(selector).next().text().trim(),
      };
    })
    .reduce((obj, val) => {
      Object.assign(obj, { [val.selector]: val.content });
 
      return obj;
    }, {} as MarkdownParsedContent);
 
  return {
    metadata: parsed.metadata,
    content,
    html,
  };
}