Skip to content

Commit

Permalink
[BUG] Include system prompt in Phi3 by default
Browse files Browse the repository at this point in the history
  • Loading branch information
joecummings committed Oct 9, 2024
1 parent 89f21c2 commit c65f4b9
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 3 deletions.
241 changes: 241 additions & 0 deletions tests/torchtune/models/phi3/test_phi3_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def tokenizer(self):

def test_tokenize_messages(self, tokenizer):
messages = [
Message(role="system", content="You are a helpful assistant", masked=True),
Message(
role="user",
content="Below is an instruction that describes a task. Write a response "
Expand All @@ -40,6 +41,246 @@ def test_tokenize_messages(self, tokenizer):
),
]
tokens, mask = tokenizer.tokenize_messages(messages)
expected_tokens = [
1,
32006,
272,
84,
9,
615,
454,
1974,
19,
32007,
32010,
323,
418,
202,
31,
128,
15,
120,
47,
88,
584,
23,
1665,
182,
9,
434,
295,
85,
4,
780,
47,
636,
9,
1094,
213,
23,
9,
69,
69,
164,
1153,
299,
35,
961,
132,
237,
7,
5,
761,
4,
12,
0,
313,
120,
47,
88,
584,
166,
493,
171,
54,
299,
9,
906,
244,
19,
186,
767,
303,
671,
92,
209,
24,
190,
52,
38,
4,
12,
0,
1243,
7,
69,
135,
213,
166,
32007,
32001,
6,
21,
45,
128,
71,
58,
38,
14,
10,
652,
35,
462,
101,
1306,
7,
341,
171,
20,
14,
127,
26,
652,
7,
10,
1268,
4,
6,
21,
45,
591,
9,
566,
22,
994,
913,
38,
20,
52,
24,
10,
1306,
734,
14,
71,
365,
1382,
7,
10,
801,
105,
88,
244,
985,
7,
4,
6,
21,
45,
9,
566,
126,
180,
11,
5,
1137,
7,
10,
1089,
151,
8,
1156,
213,
342,
7,
10,
384,
104,
54,
470,
4,
6,
21,
45,
287,
14,
33,
125,
135,
24,
101,
512,
66,
7,
28,
822,
15,
542,
69,
59,
110,
14,
365,
229,
7,
3,
36,
267,
36,
125,
135,
24,
101,
1503,
182,
9,
222,
1661,
191,
332,
92,
92,
24,
24,
4,
32007,
]

expected_mask = [True] * 86 + [False] * 126
assert expected_tokens == tokens
assert expected_mask == mask

def test_tokenize_messages_no_system_prompt(self, tokenizer):
messages = [
Message(role="system", content="You are a helpful assistant", masked=True),
Message(
role="user",
content="Below is an instruction that describes a task. Write a response "
"that appropriately completes the request.\n\n### Instruction:\nGenerate "
"a realistic dating profile bio.\n\n### Response:\n",
masked=True,
),
Message(
role="assistant",
content="I'm an outgoing and friendly person who loves spending time with "
"friends and family. I'm also a big-time foodie and love trying out new "
"restaurants and different cuisines. I'm a big fan of the arts and enjoy "
"going to museums and galleries. I'm looking for someone who shares my "
"interest in exploring new places, as well as someone who appreciates a "
"good conversation over coffee.",
),
]
tokens, mask = tokenizer.tokenize_messages(messages, ignore_system_prompt=True)
expected_tokens = [
1,
32010,
Expand Down
6 changes: 3 additions & 3 deletions torchtune/models/phi3/_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def tokenize_messages(
messages: List[Message],
*,
add_eos: bool = False,
ignore_system_prompts: bool = True,
ignore_system_prompt: bool = False,
) -> Tuple[List[int], List[bool]]:
r"""Tokenize a list of messages one at a time then concatenate them,
returning a list of tokens and a list of masks.
Expand All @@ -151,7 +151,7 @@ def tokenize_messages(
messages (List[Message]): A list of messages, each containing role, content,
and masked attributes.
add_eos (bool): Whether to append EOS after assistant message, default to False
ignore_system_prompts (bool): Whether to ignore system prompts. This matches the HF implementation, default to True.
ignore_system_prompt (bool): Whether to ignore system prompt, defaults to False.
Raises:
ValueError: If the role is not "user", "assistant", or "system".
Expand All @@ -175,7 +175,7 @@ def tokenize_messages(

for message in templated_messages:
# Skip system prompt
if ignore_system_prompts and message.role == "system":
if ignore_system_prompt and message.role == "system":
continue

# Prepend BOS on start of new turns
Expand Down

0 comments on commit c65f4b9

Please sign in to comment.