diff --git a/packages/app-cli/tests/support/onenote/remove_hyperlink_on_title.zip b/packages/app-cli/tests/support/onenote/remove_hyperlink_on_title.zip new file mode 100644 index 00000000000..ce992dfd735 Binary files /dev/null and b/packages/app-cli/tests/support/onenote/remove_hyperlink_on_title.zip differ diff --git a/packages/lib/services/interop/InteropService_Importer_OneNote.test.ts b/packages/lib/services/interop/InteropService_Importer_OneNote.test.ts index ce722377b4e..3bcdb85fbef 100644 --- a/packages/lib/services/interop/InteropService_Importer_OneNote.test.ts +++ b/packages/lib/services/interop/InteropService_Importer_OneNote.test.ts @@ -176,4 +176,15 @@ describe('InteropService_Importer_OneNote', () => { BaseModel.setIdGenerator(originalIdGenerator); }); + + skipIfNotCI('should remove hyperlink from title', async () => { + let idx = 0; + const originalIdGenerator = BaseModel.setIdGenerator(() => String(idx++)); + const notes = await importNote(`${supportDir}/onenote/remove_hyperlink_on_title.zip`); + + for (const note of notes) { + expect(note.body).toMatchSnapshot(note.title); + } + BaseModel.setIdGenerator(originalIdGenerator); + }); }); diff --git a/packages/lib/services/interop/__snapshots__/InteropService_Importer_OneNote.test.js.snap b/packages/lib/services/interop/__snapshots__/InteropService_Importer_OneNote.test.js.snap index a70037a0b3e..46a176baacc 100644 --- a/packages/lib/services/interop/__snapshots__/InteropService_Importer_OneNote.test.js.snap +++ b/packages/lib/services/interop/__snapshots__/InteropService_Importer_OneNote.test.js.snap @@ -766,3 +766,122 @@ exports[`InteropService_Importer_OneNote should import a simple OneNote notebook " `; + +exports[`InteropService_Importer_OneNote should remove hyperlink from title: wikipedia link 1`] = ` +" + + + + wikipedia link + + + + +
 
+
Sunday, January 05, 2025
+
10:15 PM
+

wikipedia link

+
+ + + +" +`; + +exports[`InteropService_Importer_OneNote should remove hyperlink from title: 风景 (Web view) 1`] = ` +" + + + + 风景 (Web view) + + + + +
 
+
Sunday, January 5, 2025
+
10:13 PM
+

风景 (Web view)

+
+ + + +" +`; + +exports[`InteropService_Importer_OneNote should remove hyperlink from title: 风景 1`] = ` +" + + + + 风景 + + + + +
风景
+
Sunday, January 05, 2025
+
10:14 PM
+
+ + + +" +`; diff --git a/packages/onenote-converter/src/notebook.rs b/packages/onenote-converter/src/notebook.rs index 82ab8cf252b..460d690460b 100644 --- a/packages/onenote-converter/src/notebook.rs +++ b/packages/onenote-converter/src/notebook.rs @@ -84,7 +84,7 @@ impl Renderer { let section_path = renderer.render(section, notebook_dir)?; log!("section_path: {:?}", section_path); - let path_from_base_dir = unsafe { remove_prefix(section_path.as_str(), base_dir.as_str()) } + let path_from_base_dir = unsafe { remove_prefix(section_path, base_dir.as_str()) } .unwrap() .as_string() .unwrap(); diff --git a/packages/onenote-converter/src/page/mod.rs b/packages/onenote-converter/src/page/mod.rs index e66571e9107..10ead671d0b 100644 --- a/packages/onenote-converter/src/page/mod.rs +++ b/packages/onenote-converter/src/page/mod.rs @@ -35,7 +35,7 @@ impl<'a> Renderer<'a> { } pub(crate) fn render_page(&mut self, page: &Page) -> Result { - let title_text = page.title_text().unwrap_or("Untitled Page"); + let title_text = page.title_text().unwrap_or("Untitled Page".to_string()); let mut content = String::new(); @@ -70,7 +70,7 @@ impl<'a> Renderer<'a> { content.push_str(&page_content); - crate::templates::page::render(title_text, &content, &self.global_styles) + crate::templates::page::render(&title_text, &content, &self.global_styles) } pub(crate) fn gen_class(&mut self, prefix: &str) -> String { diff --git a/packages/onenote-converter/src/page/rich_text.rs b/packages/onenote-converter/src/page/rich_text.rs index c095b0f2552..b7b8a8fd2b4 100644 --- a/packages/onenote-converter/src/page/rich_text.rs +++ b/packages/onenote-converter/src/page/rich_text.rs @@ -74,7 +74,7 @@ impl<'a> Renderer<'a> { // all the styles to be shifted by minus one. // A better solution would be to look if there isn't anything wrong with the parser, // but I haven't found what could be causing this yet. - if text.starts_with("\u{000B}") && !indices.is_empty(){ + if text.starts_with("\u{000B}") && !indices.is_empty() { indices.remove(0); styles.pop(); } diff --git a/packages/onenote-converter/src/parser/one/property_set/page_metadata.rs b/packages/onenote-converter/src/parser/one/property_set/page_metadata.rs index 2523f7fd140..786bbb99722 100644 --- a/packages/onenote-converter/src/parser/one/property_set/page_metadata.rs +++ b/packages/onenote-converter/src/parser/one/property_set/page_metadata.rs @@ -32,10 +32,13 @@ pub(crate) fn parse(object: &Object) -> Result { let entity_guid = simple::parse_guid(PropertyType::NotebookManagementEntityGuid, object)? .ok_or_else(|| ErrorKind::MalformedOneNoteFileData("page metadata has no guid".into()))?; - let cached_title = - simple::parse_string(PropertyType::CachedTitleString, object)?.ok_or_else(|| { - ErrorKind::MalformedOneNoteFileData("page metadata has no cached title".into()) - })?; + // The page might not have a title but we can use the first Section outline from the body as the fallback later + let cached_title = simple::parse_string(PropertyType::CachedTitleString, object)? + .ok_or_else(|| { + let guid = simple::parse_guid(PropertyType::NotebookManagementEntityGuid, object); + return guid.map(|g| g.unwrap().to_string()); + }) + .unwrap_or("Untitled Page".to_string()); let schema_revision_in_order_to_read = simple::parse_u32(PropertyType::SchemaRevisionInOrderToRead, object)?; let schema_revision_in_order_to_write = diff --git a/packages/onenote-converter/src/parser/onenote/page.rs b/packages/onenote-converter/src/parser/onenote/page.rs index 1a6ae603224..485a2370a9e 100644 --- a/packages/onenote-converter/src/parser/onenote/page.rs +++ b/packages/onenote-converter/src/parser/onenote/page.rs @@ -62,16 +62,23 @@ impl Page { /// The page's title text. /// /// This is calculated using a heuristic similar to the one OneNote uses. - pub fn title_text(&self) -> Option<&str> { + pub fn title_text(&self) -> Option { self.title .as_ref() .and_then(|title| title.contents.first()) .and_then(Self::outline_text) + .and_then(|t| Some(Self::remove_hyperlink(t.to_owned()))) .or_else(|| { self.contents .iter() .filter_map(|page_content| page_content.outline()) - .filter_map(Self::outline_text) + .filter_map(|t| { + let v = Self::outline_text(t); + if v.is_none() { + return None; + } + return Some(Self::remove_hyperlink(v.unwrap().to_owned())); + }) .next() }) } @@ -85,6 +92,33 @@ impl Page { .and_then(|content| content.rich_text()) .and_then(|text| Some(&*text.text).filter(|s| !s.is_empty())) } + + fn remove_hyperlink(title: String) -> String { + const HYPERLINK_MARKER: &str = "\u{fddf}HYPERLINK \""; + + let mut title_copy = title.clone(); + + loop { + // Find the first hyperlink mark + if let Some(marker_start) = title_copy.find(HYPERLINK_MARKER) { + let hyperlink_part = &title_copy[marker_start + HYPERLINK_MARKER.len()..]; + + // Find the closing double quote of the hyperlink + if let Some(quote_end) = hyperlink_part.find('"') { + let before_hyperlink = &title_copy[..marker_start]; + let after_hyperlink = &hyperlink_part[quote_end + 1..]; + title_copy = format!("{}{}", before_hyperlink, after_hyperlink); + } else { + // Sometimes links are broken, in these cases we only consider what is before the mark + title_copy = title[..marker_start].to_string(); + } + } else { + break; + } + } + + title_copy + } } /// A page title. diff --git a/packages/onenote-converter/src/section.rs b/packages/onenote-converter/src/section.rs index 2edc29cf4a6..8c3dc22fc5a 100644 --- a/packages/onenote-converter/src/section.rs +++ b/packages/onenote-converter/src/section.rs @@ -64,7 +64,7 @@ impl Renderer { let _ = unsafe { write_file(&page_path, page_html.as_bytes()) }; let page_path_without_basedir = - unsafe { remove_prefix(page_path.as_str(), output_dir.as_str()) } + unsafe { remove_prefix(page_path, output_dir.as_str()) } .unwrap() .as_string() .unwrap(); @@ -72,7 +72,6 @@ impl Renderer { } } - log!("Section finished rendering: {:?}", section.display_name()); let toc_html = templates::section::render(section.display_name(), toc)?; let toc_file = unsafe { join_path( diff --git a/packages/onenote-converter/src/utils.rs b/packages/onenote-converter/src/utils.rs index 1f8f6553caa..5371e9f1ea7 100644 --- a/packages/onenote-converter/src/utils.rs +++ b/packages/onenote-converter/src/utils.rs @@ -82,7 +82,7 @@ extern "C" { #[wasm_bindgen(js_name = removePrefix, catch)] pub unsafe fn remove_prefix( - base_path: &str, + base_path: String, prefix: &str, ) -> std::result::Result;