diff --git a/src/services/import/utils.spec.ts b/src/services/import/utils.spec.ts new file mode 100644 index 000000000..abd33e025 --- /dev/null +++ b/src/services/import/utils.spec.ts @@ -0,0 +1,103 @@ +import { describe, it, expect } from "vitest"; +import importUtils from "./utils.js"; + +type TestCase any> = [desc: string, fnParams: Parameters, expected: ReturnType]; + +describe("#extractHtmlTitle", () => { + const htmlWithNoTitle = ` + + +
abc
+ + `; + + const htmlWithTitle = ` + + Test Title + + +
abc
+ + `; + + const htmlWithTitleWOpeningBracket = ` + + Test < Title + + +
abc
+ + `; + + // prettier-ignore + const testCases: TestCase[] = [ + [ + "w/ existing tag, it should return the content of the title tag", + [htmlWithTitle], + "Test Title" + ], + [ + // @TriliumNextTODO: this seems more like an unwanted behaviour to me – check if this needs rather fixing + "with existing <title> tag, that includes an opening HTML tag '<', it should return null", + [htmlWithTitleWOpeningBracket], + null + ], + [ + "w/o an existing <title> tag, it should reutrn null", + [htmlWithNoTitle], + null + ], + [ + "w/ empty string content, it should return null", + [""], + null + ] + ]; + + testCases.forEach((testCase) => { + const [desc, fnParams, expected] = testCase; + return it(desc, () => { + const actual = importUtils.extractHtmlTitle(...fnParams); + expect(actual).toStrictEqual(expected); + }); + }); +}); + +describe("#handleH1", () => { + // prettier-ignore + const testCases: TestCase<typeof importUtils.handleH1>[] = [ + [ + "w/ single <h1> tag w/ identical text content as the title tag: the <h1> tag should be stripped", + ["<h1>Title</h1>", "Title"], + "" + ], + [ + "w/ multiple <h1> tags, with the fist matching the title tag: the first <h1> tag should be stripped and subsequent tags converted to <h2>", + ["<h1>Title</h1><h1>Header 1</h1><h1>Header 2</h1>", "Title"], + "<h2>Header 1</h2><h2>Header 2</h2>" + ], + [ + "w/ no <h1> tag and only <h2> tags, it should not cause any changes and return the same content", + ["<h2>Heading 1</h2><h2>Heading 2</h2>", "Title"], + "<h2>Heading 1</h2><h2>Heading 2</h2>" + ], + [ + "w/ multiple <h1> tags, and the 1st matching the title tag, it should strip ONLY the very first occurence of the <h1> tags in the returned content", + ["<h1>Topic ABC</h1><h1>Heading 1</h1><h1>Topic ABC</h1>", "Topic ABC"], + "<h2>Heading 1</h2><h2>Topic ABC</h2>" + ], + [ + "w/ multiple <h1> tags, and the 1st matching NOT the title tag, it should NOT strip any other <h1> tags", + ["<h1>Introduction</h1><h1>Topic ABC</h1><h1>Summary</h1>", "Topic ABC"], + "<h2>Introduction</h2><h2>Topic ABC</h2><h2>Summary</h2>" + ] + ]; + + testCases.forEach((testCase) => { + const [desc, fnParams, expected] = testCase; + return it(desc, () => { + const actual = importUtils.handleH1(...fnParams); + expect(actual).toStrictEqual(expected); + }); + }); +}); diff --git a/src/services/import/utils.ts b/src/services/import/utils.ts index ec4bbf35a..41c42ae22 100644 --- a/src/services/import/utils.ts +++ b/src/services/import/utils.ts @@ -1,14 +1,19 @@ "use strict"; function handleH1(content: string, title: string) { - content = content.replace(/<h1[^>]*>([^<]*)<\/h1>/gi, (match, text) => { - if (title.trim() === text.trim()) { - return ""; // remove whole H1 tag - } else { - return `<h2>${text}</h2>`; + let isFirstH1Handled = false; + + return content.replace(/<h1[^>]*>([^<]*)<\/h1>/gi, (match, text) => { + const convertedContent = `<h2>${text}</h2>`; + + // strip away very first found h1 tag, if it matches the title + if (!isFirstH1Handled) { + isFirstH1Handled = true; + return title.trim() === text.trim() ? "" : convertedContent; } + + return convertedContent; }); - return content; } function extractHtmlTitle(content: string): string | null {