Skip to content

Commit

Permalink
Desktop: Fixes #11597: OneNote Importer should only use text on fallb…
Browse files Browse the repository at this point in the history
…ack title (#11598)
  • Loading branch information
pedr authored Jan 9, 2025
1 parent 72575e3 commit a81af07
Show file tree
Hide file tree
Showing 10 changed files with 179 additions and 13 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,15 @@ describe('InteropService_Importer_OneNote', () => {

BaseModel.setIdGenerator(originalIdGenerator);
});

skipIfNotCI('should remove hyperlink from title', async () => {
let idx = 0;
const originalIdGenerator = BaseModel.setIdGenerator(() => String(idx++));
const notes = await importNote(`${supportDir}/onenote/remove_hyperlink_on_title.zip`);

for (const note of notes) {
expect(note.body).toMatchSnapshot(note.title);
}
BaseModel.setIdGenerator(originalIdGenerator);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -766,3 +766,122 @@ exports[`InteropService_Importer_OneNote should import a simple OneNote notebook
</body>
</html>"
`;
exports[`InteropService_Importer_OneNote should remove hyperlink from title: wikipedia link 1`] = `
"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>wikipedia link</title>
<style>
* { margin: 0; padding: 0; font-weight: normal; }
table, tr, td { border-color: #A3A3A3; }
ul, ol { padding: 0; }
.title .outline-element { display: inline; }
.title .outline-element:nth-child(2) { margin-left: 10px !important; }
.container-outline { font-family: Calibri, sans-serif; font-size: 6pt; }
.ink-text, .ink-space { display: inline-block; position: relative; vertical-align: bottom; }
.ink-text { top: 0; left: 0; }
.note-tag-icon { position: relative; }
.note-tag-icon > svg { position: absolute; }
.icon-secondary > svg { position: absolute; fill: black; filter: drop-shadow(0 0 2px white); height: 12px; top: -1px; }
.icon-secondary > .content { position: absolute; color: black; filter: drop-shadow(0 0 2px white); font-size: 10px; color: black; top: -1px; user-select: none; }
</style>
</head>
<body>
<div class="title" style="left: 48px; position: absolute; top: 24px;"><div class="container-outline" style="width: 624px;"><div class="outline-element" style="margin-left: 0px;"><span style="font-family: Calibri Light; font-size: 20pt;">&nbsp;</span></div>
</div><div class="container-outline"><div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(102,102,102); font-family: Calibri; font-size: 10pt;">Sunday, January 05, 2025</span></div>
<div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(102,102,102); font-family: Calibri; font-size: 10pt;">10:15 PM</span></div>
</div></div><div class="container-outline" style="left: 48px; position: absolute; top: 115px; width: 624px;"><div class="outline-element" style="margin-left: 0px;"><p style="font-family: Calibri; font-size: 11pt;"><a href="https://zh.wikipedia.org/zh-hans/%E9%A3%8E%E6%99%AF" style="">wikipedia link</a></p></div>
</div>
<script>
if (window.parent !== null) {
window.parent.postMessage(window.location.href, '*');
}
</script>
</body>
</html>"
`;
exports[`InteropService_Importer_OneNote should remove hyperlink from title: 风景 (Web view) 1`] = `
"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>风景 (Web view)</title>
<style>
* { margin: 0; padding: 0; font-weight: normal; }
table, tr, td { border-color: #A3A3A3; }
ul, ol { padding: 0; }
.title .outline-element { display: inline; }
.title .outline-element:nth-child(2) { margin-left: 10px !important; }
.container-outline { font-family: Calibri, sans-serif; font-size: 6pt; }
.ink-text, .ink-space { display: inline-block; position: relative; vertical-align: bottom; }
.ink-text { top: 0; left: 0; }
.note-tag-icon { position: relative; }
.note-tag-icon > svg { position: absolute; }
.icon-secondary > svg { position: absolute; fill: black; filter: drop-shadow(0 0 2px white); height: 12px; top: -1px; }
.icon-secondary > .content { position: absolute; color: black; filter: drop-shadow(0 0 2px white); font-size: 10px; color: black; top: -1px; user-select: none; }
</style>
</head>
<body>
<div class="title" style="left: 48px; position: absolute; top: 24px;"><div class="container-outline" style="width: 624px;"><div class="outline-element" style="margin-left: 0px;"><span style="font-family: Calibri Light; font-size: 20pt;">&nbsp;</span></div>
</div><div class="container-outline" style="width: 624px;"><div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(128,128,128); font-family: Calibri; font-size: 10pt;">Sunday, January 5, 2025</span></div>
<div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(128,128,128); font-family: Calibri; font-size: 10pt;">10:13 PM</span></div>
</div></div><div class="container-outline" style="left: 48px; position: absolute; top: 115px; width: 624px;"><div class="outline-element" style="margin-left: 0px;"><p style="font-family: Calibri; font-size: 11pt;"><a href="onenote:#风景&section-id={75256889-9e75-4ec2-82ed-fc799557e1b9}&page-id={d099b6f3-7f5a-4c08-aed7-e8d42c59523f}&end" style="">风景</a><span style="font-family: Calibri; font-size: 11pt;"> (</span><a href="https://onedrive.live.com/edit.aspx?resid=193EE54E3252492D!s9b62db4219f740709f444bc0129de4e9&migratedtospo=true&wd=target%28Quick%20Notes.one%7C75256889-9e75-4ec2-82ed-fc799557e1b9%2F%E9%A3%8E%E6%99%AF%7Cd099b6f3-7f5a-4c08-aed7-e8d42c59523f%2F%29&wdorigin=703&wdpreservelink=1" style="">Web view</a><span style="font-family: Calibri; font-size: 11pt;">)</span></p></div>
</div>
<script>
if (window.parent !== null) {
window.parent.postMessage(window.location.href, '*');
}
</script>
</body>
</html>"
`;
exports[`InteropService_Importer_OneNote should remove hyperlink from title: 风景 1`] = `
"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>风景</title>
<style>
* { margin: 0; padding: 0; font-weight: normal; }
table, tr, td { border-color: #A3A3A3; }
ul, ol { padding: 0; }
.title .outline-element { display: inline; }
.title .outline-element:nth-child(2) { margin-left: 10px !important; }
.container-outline { font-family: Calibri, sans-serif; font-size: 6pt; }
.ink-text, .ink-space { display: inline-block; position: relative; vertical-align: bottom; }
.ink-text { top: 0; left: 0; }
.note-tag-icon { position: relative; }
.note-tag-icon > svg { position: absolute; }
.icon-secondary > svg { position: absolute; fill: black; filter: drop-shadow(0 0 2px white); height: 12px; top: -1px; }
.icon-secondary > .content { position: absolute; color: black; filter: drop-shadow(0 0 2px white); font-size: 10px; color: black; top: -1px; user-select: none; }
</style>
</head>
<body>
<div class="title" style="left: 48px; position: absolute; top: 24px;"><div class="container-outline" style="width: 624px;"><div class="outline-element" style="margin-left: 0px;"><span style="font-family: Calibri Light; font-size: 20pt;">风景</span></div>
</div><div class="container-outline"><div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(102,102,102); font-family: Calibri; font-size: 10pt;">Sunday, January 05, 2025</span></div>
<div class="outline-element" style="margin-left: 0px;"><span style="color: rgb(102,102,102); font-family: Calibri; font-size: 10pt;">10:14 PM</span></div>
</div></div>
<script>
if (window.parent !== null) {
window.parent.postMessage(window.location.href, '*');
}
</script>
</body>
</html>"
`;
2 changes: 1 addition & 1 deletion packages/onenote-converter/src/notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl Renderer {
let section_path = renderer.render(section, notebook_dir)?;
log!("section_path: {:?}", section_path);

let path_from_base_dir = unsafe { remove_prefix(section_path.as_str(), base_dir.as_str()) }
let path_from_base_dir = unsafe { remove_prefix(section_path, base_dir.as_str()) }
.unwrap()
.as_string()
.unwrap();
Expand Down
4 changes: 2 additions & 2 deletions packages/onenote-converter/src/page/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl<'a> Renderer<'a> {
}

pub(crate) fn render_page(&mut self, page: &Page) -> Result<String> {
let title_text = page.title_text().unwrap_or("Untitled Page");
let title_text = page.title_text().unwrap_or("Untitled Page".to_string());

let mut content = String::new();

Expand Down Expand Up @@ -70,7 +70,7 @@ impl<'a> Renderer<'a> {

content.push_str(&page_content);

crate::templates::page::render(title_text, &content, &self.global_styles)
crate::templates::page::render(&title_text, &content, &self.global_styles)
}

pub(crate) fn gen_class(&mut self, prefix: &str) -> String {
Expand Down
2 changes: 1 addition & 1 deletion packages/onenote-converter/src/page/rich_text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ impl<'a> Renderer<'a> {
// all the styles to be shifted by minus one.
// A better solution would be to look if there isn't anything wrong with the parser,
// but I haven't found what could be causing this yet.
if text.starts_with("\u{000B}") && !indices.is_empty(){
if text.starts_with("\u{000B}") && !indices.is_empty() {
indices.remove(0);
styles.pop();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@ pub(crate) fn parse(object: &Object) -> Result<Data> {

let entity_guid = simple::parse_guid(PropertyType::NotebookManagementEntityGuid, object)?
.ok_or_else(|| ErrorKind::MalformedOneNoteFileData("page metadata has no guid".into()))?;
let cached_title =
simple::parse_string(PropertyType::CachedTitleString, object)?.ok_or_else(|| {
ErrorKind::MalformedOneNoteFileData("page metadata has no cached title".into())
})?;
// The page might not have a title but we can use the first Section outline from the body as the fallback later
let cached_title = simple::parse_string(PropertyType::CachedTitleString, object)?
.ok_or_else(|| {
let guid = simple::parse_guid(PropertyType::NotebookManagementEntityGuid, object);
return guid.map(|g| g.unwrap().to_string());
})
.unwrap_or("Untitled Page".to_string());
let schema_revision_in_order_to_read =
simple::parse_u32(PropertyType::SchemaRevisionInOrderToRead, object)?;
let schema_revision_in_order_to_write =
Expand Down
38 changes: 36 additions & 2 deletions packages/onenote-converter/src/parser/onenote/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,23 @@ impl Page {
/// The page's title text.
///
/// This is calculated using a heuristic similar to the one OneNote uses.
pub fn title_text(&self) -> Option<&str> {
pub fn title_text(&self) -> Option<String> {
self.title
.as_ref()
.and_then(|title| title.contents.first())
.and_then(Self::outline_text)
.and_then(|t| Some(Self::remove_hyperlink(t.to_owned())))
.or_else(|| {
self.contents
.iter()
.filter_map(|page_content| page_content.outline())
.filter_map(Self::outline_text)
.filter_map(|t| {
let v = Self::outline_text(t);
if v.is_none() {
return None;
}
return Some(Self::remove_hyperlink(v.unwrap().to_owned()));
})
.next()
})
}
Expand All @@ -85,6 +92,33 @@ impl Page {
.and_then(|content| content.rich_text())
.and_then(|text| Some(&*text.text).filter(|s| !s.is_empty()))
}

fn remove_hyperlink(title: String) -> String {
const HYPERLINK_MARKER: &str = "\u{fddf}HYPERLINK \"";

let mut title_copy = title.clone();

loop {
// Find the first hyperlink mark
if let Some(marker_start) = title_copy.find(HYPERLINK_MARKER) {
let hyperlink_part = &title_copy[marker_start + HYPERLINK_MARKER.len()..];

// Find the closing double quote of the hyperlink
if let Some(quote_end) = hyperlink_part.find('"') {
let before_hyperlink = &title_copy[..marker_start];
let after_hyperlink = &hyperlink_part[quote_end + 1..];
title_copy = format!("{}{}", before_hyperlink, after_hyperlink);
} else {
// Sometimes links are broken, in these cases we only consider what is before the mark
title_copy = title[..marker_start].to_string();
}
} else {
break;
}
}

title_copy
}
}

/// A page title.
Expand Down
3 changes: 1 addition & 2 deletions packages/onenote-converter/src/section.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,14 @@ impl Renderer {
let _ = unsafe { write_file(&page_path, page_html.as_bytes()) };

let page_path_without_basedir =
unsafe { remove_prefix(page_path.as_str(), output_dir.as_str()) }
unsafe { remove_prefix(page_path, output_dir.as_str()) }
.unwrap()
.as_string()
.unwrap();
toc.push((title, page_path_without_basedir, page.level()))
}
}

log!("Section finished rendering: {:?}", section.display_name());
let toc_html = templates::section::render(section.display_name(), toc)?;
let toc_file = unsafe {
join_path(
Expand Down
2 changes: 1 addition & 1 deletion packages/onenote-converter/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ extern "C" {

#[wasm_bindgen(js_name = removePrefix, catch)]
pub unsafe fn remove_prefix(
base_path: &str,
base_path: String,
prefix: &str,
) -> std::result::Result<JsValue, JsValue>;

Expand Down

0 comments on commit a81af07

Please sign in to comment.