diff --git a/DESCRIPTION b/DESCRIPTION index 53a7f3e..bb0cd52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sentencepiece Type: Package Title: Text Tokenization using Byte Pair Encoding and Unigram Modelling -Version: 0.2 +Version: 0.2.1 Authors@R: c( person('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = 'jwijffels@bnosac.be', comment = "R wrapper"), person('BNOSAC', role = 'cph', comment = "R wrapper"), diff --git a/NEWS.md b/NEWS.md index 7e945c1..2d082a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +## CHANGES IN sentencepiece VERSION 0.2.1 + +- Fix for clang-UBSAN error + ## CHANGES IN sentencepiece VERSION 0.2 - Fix wordpiece bug for 1-character words. (@jonthegeek, #4) diff --git a/src/sentencepiece/src/unicode_script.h b/src/sentencepiece/src/unicode_script.h index 917c49d..67042c0 100644 --- a/src/sentencepiece/src/unicode_script.h +++ b/src/sentencepiece/src/unicode_script.h @@ -19,7 +19,7 @@ namespace sentencepiece { namespace unicode_script { -enum ScriptType { +enum ScriptType : int32_t { U_Adlam, U_Ahom, U_Anatolian_Hieroglyphs,