Skip to content

Commit

Permalink
syntax: add a LangVariant parameter to Quote
Browse files Browse the repository at this point in the history
I had missed that $'' expansions are non-POSIX,
and only implemented by Bash and mksh.
So, in POSIX mode, we can't quote non-printable characters.

Moreover, fuzzing uncovered that mksh implements \x differently,
meaning that we require extra logic to follow its rules.
Keep all the fuzz crashers that we found in the process.

Finally, document what versions of Bash and mksh we develop with.
This matters, because some systems ship with very old versions,
which can implement slightly different quoting or escaping rules.
  • Loading branch information
mvdan committed Sep 29, 2021
1 parent 1ee509b commit c8af746
Show file tree
Hide file tree
Showing 11 changed files with 130 additions and 31 deletions.
4 changes: 3 additions & 1 deletion syntax/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ func ExampleQuote() {
"invalid-\xe2'",
"nonprint-\x0b\x1b",
} {
quoted, ok := syntax.Quote(s)
// We assume Bash syntax here.
// For general shell syntax quoting, use syntax.LangPOSIX.
quoted, ok := syntax.Quote(s, syntax.LangBash)
if !ok {
fmt.Printf("%q cannot be quoted", s)
} else {
Expand Down
55 changes: 41 additions & 14 deletions syntax/fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package syntax

import (
"fmt"
"io"
"os/exec"
"strings"
Expand All @@ -19,21 +20,46 @@ func FuzzQuote(f *testing.F) {
}

// Keep in sync with ExampleQuote.
f.Add("foo")
f.Add("bar $baz")
f.Add(`"won't"`)
f.Add(`~/home`)
f.Add("#1304")
f.Add("name=value")
f.Add(`glob-*`)
f.Add("invalid-\xe2'")
f.Add("nonprint-\x0b\x1b")
f.Fuzz(func(t *testing.T, s string) {
quoted, ok := Quote(s)
f.Add("foo", uint8(LangBash))
f.Add("bar $baz", uint8(LangBash))
f.Add(`"won't"`, uint8(LangBash))
f.Add(`~/home`, uint8(LangBash))
f.Add("#1304", uint8(LangBash))
f.Add("name=value", uint8(LangBash))
f.Add(`glob-*`, uint8(LangBash))
f.Add("invalid-\xe2'", uint8(LangBash))
f.Add("nonprint-\x0b\x1b", uint8(LangBash))
f.Fuzz(func(t *testing.T, s string, langVariant uint8) {
if langVariant > 3 {
t.Skip() // lang variants are 0-3
}
lang := LangVariant(langVariant)
quoted, ok := Quote(s, lang)
if !ok {
// Contains a null byte; not interesting.
t.Skip()
}

var shellProgram string
switch lang {
case LangBash:
hasBash51(t)
shellProgram = "bash"
case LangPOSIX:
hasDash059(t)
shellProgram = "dash"
case LangMirBSDKorn:
hasMksh59(t)
shellProgram = "mksh"
case LangBats:
t.Skip() // bats has no shell and its syntax is just bash
default:
panic(fmt.Sprintf("unknown lang variant: %d", lang))
}

// TODO: Also double-check with our parser.
// That should allow us to fuzz Bats too, for instance.

// Beware that this might run arbitrary code
// if Quote is too naive and allows ';' or '$'.
//
Expand All @@ -43,13 +69,14 @@ func FuzzQuote(f *testing.F) {
//
// We could consider ways to fully sandbox the bash process,
// but for now that feels overkill.
out, err := exec.Command("bash", "-c", "printf %s "+quoted).CombinedOutput()
out, err := exec.Command(shellProgram, "-c", "printf %s "+quoted).CombinedOutput()
if err != nil {
t.Fatalf("bash error on %q quoted as %s: %v: %s", s, quoted, err, out)
t.Fatalf("%s error on %q quoted as %s: %v: %s", shellProgram, s, quoted, err, out)
}
want, got := s, string(out)
if want != got {
t.Fatalf("output mismatch on %q quoted as %s: got %q (len=%d)", want, quoted, got, len(got))
t.Fatalf("%s output mismatch on %q quoted as %s: got %q (len=%d)",
shellProgram, want, quoted, got, len(got))
}
})
}
Expand Down
77 changes: 61 additions & 16 deletions syntax/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"bytes"
"fmt"
"io"
"strconv"
"strings"
"unicode"
"unicode/utf8"
Expand Down Expand Up @@ -1149,24 +1148,25 @@ func testBinaryOp(val string) BinTestOperator {
}

// Quote returns a quoted version of the input string,
// so that the quoted version is always expanded or interpreted
// as the original string.
// so that the quoted version is expanded or interpreted
// as the original string in the given language variant.
//
// When the boolean result is false,
// the input string cannot be quoted to satisfy the rule above.
// For example, an expanded shell string can't contain a null byte.
// For instance, POSIX lacks escape sequences for non-printable characters,
// and no language variant can represent a string containing null bytes.
//
// Quoting is necessary when using arbitrary literal strings
// as words in a shell script or command.
// Without quoting, one could run into syntax errors,
// Without quoting, one can run into syntax errors,
// as well as the possibility of running unintended code.
//
// The quoting strategy is chosen on a best-effort basis,
// to minimize the amount of extra bytes necessary.
//
// Some strings do not require any quoting and are returned unchanged.
// Those strings can be directly surrounded in single quotes.
func Quote(s string) (_ string, ok bool) {
// Those strings can be directly surrounded in single quotes as well.
func Quote(s string, lang LangVariant) (_ string, ok bool) {
shellChars := false
nonPrintable := false
for _, r := range s {
Expand All @@ -1193,6 +1193,11 @@ func Quote(s string) (_ string, ok bool) {
return "", false
}
if r == utf8.RuneError || !unicode.IsPrint(r) {
if lang == LangPOSIX {
// POSIX shell lacks $'.
// Without it, we can't use escape sequences such as \x.
return "", false
}
nonPrintable = true
}
}
Expand All @@ -1209,24 +1214,58 @@ func Quote(s string) (_ string, ok bool) {
var b strings.Builder
if nonPrintable {
b.WriteString("$'")
quoteBuf := make([]byte, 0, 16)
lastRequoteIfHex := false
for rem := s; len(rem) > 0; {
nextRequoteIfHex := false
r, size := utf8.DecodeRuneInString(rem)
switch {
case r == utf8.RuneError && size == 1:
fmt.Fprintf(&b, "\\x%x", rem[0])
case !unicode.IsPrint(r):
quoteBuf = quoteBuf[:0]
quoteBuf = strconv.AppendQuoteRuneToASCII(quoteBuf, r)
// We don't want the single quotes from strconv.
b.Write(quoteBuf[1 : len(quoteBuf)-1])
case r == '\'', r == '\\':
b.WriteByte('\\')
b.WriteRune(r)
default:
case unicode.IsPrint(r) && r != utf8.RuneError:
if lastRequoteIfHex && isHex(r) {
b.WriteString("'$'")
}
b.WriteRune(r)
case r == '\a':
b.WriteString(`\a`)
case r == '\b':
b.WriteString(`\b`)
case r == '\f':
b.WriteString(`\f`)
case r == '\n':
b.WriteString(`\n`)
case r == '\r':
b.WriteString(`\r`)
case r == '\t':
b.WriteString(`\t`)
case r == '\v':
b.WriteString(`\v`)
case r < utf8.RuneSelf, r == utf8.RuneError && size == 1:
// \xXX, fixed at two hexadecimal characters.
fmt.Fprintf(&b, "\\x%02x", rem[0])
// Unfortunately, mksh allows \x to consume more hex characters.
// Ensure that we don't allow it to read more than two.
if lang == LangMirBSDKorn {
nextRequoteIfHex = true
}
case r > utf8.MaxRune:
// Not a valid Unicode code point?
return "", false
case r < 0x10000:
// \uXXXX, fixed at four hexadecimal characters.
fmt.Fprintf(&b, "\\u%04x", r)
default:
// \UXXXXXXXX, fixed at eight hexadecimal characters.
if lang == LangMirBSDKorn {
// mksh seems to lack support for codepoints above 16 bits?
// See the CAVEATS section in R59.
return "", false
}
fmt.Fprintf(&b, "\\U%08x", r)
}
rem = rem[size:]
lastRequoteIfHex = nextRequoteIfHex
}
b.WriteString("'")
return b.String(), true
Expand All @@ -1250,3 +1289,9 @@ func Quote(s string) (_ string, ok bool) {
b.WriteByte('"')
return b.String(), true
}

func isHex(r rune) bool {
return (r >= '0' && r <= '9') ||
(r >= 'a' && r <= 'f') ||
(r >= 'A' && r <= 'F')
}
4 changes: 4 additions & 0 deletions syntax/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ const (
// LangBash corresponds to the GNU Bash language, as described in its
// manual at https://www.gnu.org/software/bash/manual/bash.html.
//
// We currently follow Bash version 5.1.
//
// Its string representation is "bash".
LangBash LangVariant = iota

Expand All @@ -45,6 +47,8 @@ const (
// Note that it shares some features with Bash, due to the the shared
// ancestry that is ksh.
//
// We currently follow mksh version 59.
//
// Its string representation is "mksh".
LangMirBSDKorn

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("\xb3c")
byte('\x02')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("\x0fC")
byte('\x00')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("\u05f5A")
byte('\x00')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("\U00086199")
byte('\x02')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("\xb6")
byte('\x01')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("\x050")
byte('\x02')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
go test fuzz v1
string("\U000600a04")
byte('\x00')

0 comments on commit c8af746

Please sign in to comment.