Skip to content

Commit

Permalink
Merge pull request #9123 from ipfs/fix/gw-trailing-slash-unixfs-dirs
Browse files Browse the repository at this point in the history
fix(gw): directory URL normalization
  • Loading branch information
lidel authored Jul 21, 2022
2 parents bcab64d + 3182986 commit cf82ae5
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 32 deletions.
30 changes: 16 additions & 14 deletions core/corehttp/gateway_handler_unixfs_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,30 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
}
originalUrlPath := requestURI.Path

// Check if directory has index.html, if so, serveFile
idxPath := ipath.Join(contentPath, "index.html")
idx, err := i.api.Unixfs().Get(ctx, idxPath)
switch err.(type) {
case nil:
cpath := contentPath.String()
dirwithoutslash := cpath[len(cpath)-1] != '/'
// Ensure directory paths end with '/'
if originalUrlPath[len(originalUrlPath)-1] != '/' {
// don't redirect to trailing slash if it's go get
// https://github.com/ipfs/kubo/pull/3963
goget := r.URL.Query().Get("go-get") == "1"
if dirwithoutslash && !goget {
// See comment above where originalUrlPath is declared.
if !goget {
suffix := "/"
// preserve query parameters
if r.URL.RawQuery != "" {
// preserve query parameters
suffix = suffix + "?" + r.URL.RawQuery
}

// /ipfs/cid/foo?bar must be redirected to /ipfs/cid/foo/?bar
redirectURL := originalUrlPath + suffix
logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath)
http.Redirect(w, r, redirectURL, http.StatusFound)
logger.Debugw("directory location moved permanently", "status", http.StatusMovedPermanently)
http.Redirect(w, r, redirectURL, http.StatusMovedPermanently)
return
}
}

// Check if directory has index.html, if so, serveFile
idxPath := ipath.Join(contentPath, "index.html")
idx, err := i.api.Unixfs().Get(ctx, idxPath)
switch err.(type) {
case nil:
f, ok := idx.(files.File)
if !ok {
internalWebError(w, files.ErrNotReader)
Expand Down Expand Up @@ -163,7 +165,7 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
// add the correct link depending on whether the path ends with a slash
default:
if strings.HasSuffix(backLink, "/") {
backLink += "./.."
backLink += ".."
} else {
backLink += "/.."
}
Expand Down
16 changes: 8 additions & 8 deletions core/corehttp/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,9 @@ func TestIPNSHostnameRedirect(t *testing.T) {
t.Fatal(err)
}

// expect 302 redirect to same path, but with trailing slash
if res.StatusCode != 302 {
t.Errorf("status is %d, expected 302", res.StatusCode)
// expect 301 redirect to same path, but with trailing slash
if res.StatusCode != 301 {
t.Errorf("status is %d, expected 301", res.StatusCode)
}
hdr := res.Header["Location"]
if len(hdr) < 1 {
Expand All @@ -403,9 +403,9 @@ func TestIPNSHostnameRedirect(t *testing.T) {
t.Fatal(err)
}

// expect 302 redirect to same path, but with prefix and trailing slash
if res.StatusCode != 302 {
t.Errorf("status is %d, expected 302", res.StatusCode)
// expect 301 redirect to same path, but with prefix and trailing slash
if res.StatusCode != 301 {
t.Errorf("status is %d, expected 301", res.StatusCode)
}
hdr = res.Header["Location"]
if len(hdr) < 1 {
Expand Down Expand Up @@ -492,7 +492,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/./..\">") {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/..\">") {
t.Fatalf("expected backlink in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/file.txt\">") {
Expand Down Expand Up @@ -566,7 +566,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>/<a href=\"//example.net/foo%3F%20%23%3C%27/bar\">bar</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/./..\">") {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/..\">") {
t.Fatalf("expected backlink in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/file.txt\">") {
Expand Down
8 changes: 8 additions & 0 deletions test/sharness/t0110-gateway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,17 @@ test_expect_success "GET IPFS directory file output looks good" '

test_expect_success "GET IPFS directory with index.html returns redirect to add trailing slash" "
curl -sI -o response_without_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?query=to-remember\" &&
test_should_contain \"HTTP/1.1 301 Moved Permanently\" response_without_slash &&
test_should_contain \"Location: /ipfs/$HASH2/dirwithindex/?query=to-remember\" response_without_slash
"

# This enables go get to parse go-import meta tags from index.html files stored in IPFS
# https://github.com/ipfs/kubo/pull/3963
test_expect_success "GET IPFS directory with index.html and no trailing slash returns expected output when go-get is passed" "
curl -s -o response_with_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?go-get=1\" &&
test_should_contain \"hello i am a webpage\" response_with_slash
"

test_expect_success "GET IPFS directory with index.html and trailing slash returns expected output" "
curl -s -o response_with_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex/?query=to-remember\" &&
test_should_contain \"hello i am a webpage\" response_with_slash
Expand Down
2 changes: 1 addition & 1 deletion test/sharness/t0113-gateway-symlink.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ test_expect_success "Add the test directory" '
'

test_expect_success "Test the directory listing" '
curl "$GWAY_ADDR/ipfs/$HASH" > list_response &&
curl "$GWAY_ADDR/ipfs/$HASH/" > list_response &&
test_should_contain ">foo<" list_response &&
test_should_contain ">bar<" list_response
'
Expand Down
4 changes: 2 additions & 2 deletions test/sharness/t0114-gateway-subdomains.sh
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ test_expect_success "valid file and subdirectory paths in directory listing at {

test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.localhost/sub/dir" '
curl -s --resolve $DIR_HOSTNAME:127.0.0.1 "http://$DIR_HOSTNAME/ipfs/ipns/" > list_response &&
test_should_contain "<a href=\"/ipfs/ipns/./..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'

Expand Down Expand Up @@ -441,7 +441,7 @@ test_expect_success "valid file and directory paths in directory listing at {cid

test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.example.com/sub/dir" '
curl -s -H "Host: $DIR_FQDN" http://127.0.0.1:$GWAY_PORT/ipfs/ipns/ > list_response &&
test_should_contain "<a href=\"/ipfs/ipns/./..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'

Expand Down
32 changes: 25 additions & 7 deletions test/sharness/t0115-gateway-dir-listing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,14 @@ test_expect_success "path gw: backlink on root CID should be hidden" '
test_should_not_contain "<a href=\"/ipfs/$DIR_CID/\">..</a>" list_response
'

test_expect_success "path gw: Etag should be present" '
test_expect_success "path gw: redirect dir listing to URL with trailing slash" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /ipfs/${DIR_CID}/%c4%85/%c4%99/" list_response
'

test_expect_success "path gw: Etag should be present" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'
Expand Down Expand Up @@ -72,31 +78,37 @@ test_expect_success "path gw: hash column should be a CID link with filename par
DIR_HOSTNAME="${DIR_CID}.ipfs.localhost"
# note: we skip DNS lookup by running curl with --resolve $DIR_HOSTNAME:127.0.0.1

test_expect_success "path gw: backlink on root CID should be hidden" '
test_expect_success "subdomain gw: backlink on root CID should be hidden" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_not_contain "<a href=\"/\">..</a>" list_response
'

test_expect_success "path gw: Etag should be present" '
test_expect_success "subdomain gw: redirect dir listing to URL with trailing slash" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /%c4%85/%c4%99/" list_response
'

test_expect_success "subdomain gw: Etag should be present" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'

test_expect_success "path gw: backlink on subdirectory should point at parent directory" '
test_expect_success "subdomain gw: backlink on subdirectory should point at parent directory" '
test_should_contain "<a href=\"/%C4%85/%C4%99/..\">..</a>" list_response
'

test_expect_success "subdomain gw: breadcrumbs should leverage path-based router mounted on the parent domain" '
test_should_contain "/ipfs/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID\">$DIR_CID</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85\">ą</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85/%C4%99\">ę</a>" list_response
'

test_expect_success "path gw: name column should be a link to content root mounted at subdomain origin" '
test_expect_success "subdomain gw: name column should be a link to content root mounted at subdomain origin" '
test_should_contain "<a href=\"/%C4%85/%C4%99/file-%C5%BA%C5%82.txt\">file-źł.txt</a>" list_response
'

test_expect_success "path gw: hash column should be a CID link to path router with filename param" '
test_expect_success "subdomain gw: hash column should be a CID link to path router with filename param" '
test_should_contain "<a class=\"ipfs-hash\" translate=\"no\" href=\"//localhost:$GWAY_PORT/ipfs/$FILE_CID?filename=file-%25C5%25BA%25C5%2582.txt\">" list_response
'

Expand All @@ -121,8 +133,14 @@ test_expect_success "dnslink gw: backlink on root CID should be hidden" '
test_should_not_contain "<a href=\"/\">..</a>" list_response
'

test_expect_success "dnslink gw: Etag should be present" '
test_expect_success "dnslink gw: redirect dir listing to URL with trailing slash" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /%c4%85/%c4%99/" list_response
'

test_expect_success "dnslink gw: Etag should be present" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'
Expand Down

0 comments on commit cf82ae5

Please sign in to comment.