Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure NGINX reload occurs #1033

Merged
merged 7 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ API to update the handled resources' statuses and emit events.
- Read: *NKG* reads the PID file `nginx.pid` from the `nginx-run` volume, located at `/var/run/nginx`. *NKG*
extracts the PID of the nginx process from this file in order to send reload signals to *NGINX master*.
4. (File I/O) *NKG* writes logs to its *stdout* and *stderr*, which are collected by the container runtime.
5. (HTTP) *NKG* fetches the NGINX metrics via the unix:/var/lib/nginx/nginx-status.sock UNIX socket and converts it to
5. (HTTP) *NKG* fetches the NGINX metrics via the unix:/var/run/nginx/nginx-status.sock UNIX socket and converts it to
*Prometheus* format used in #2.
6. (Signal) To reload NGINX, *NKG* sends the [reload signal][reload] to the **NGINX master**.
7. (File I/O)
Expand All @@ -124,9 +124,12 @@ API to update the handled resources' statuses and emit events.
9. (File I/O) The *NGINX master* sends logs to its *stdout* and *stderr*, which are collected by the container runtime.
10. (File I/O) An *NGINX worker* writes logs to its *stdout* and *stderr*, which are collected by the container runtime.
11. (Signal) The *NGINX master* controls the [lifecycle of *NGINX workers*][lifecycle] it creates workers with the new
configuration and shutdowns workers with the old configuration.
12. (HTTP,HTTPS) A *client* sends traffic to and receives traffic from any of the *NGINX workers* on ports 80 and 443.
13. (HTTP,HTTPS) An *NGINX worker* sends traffic to and receives traffic from the *backends*.
configuration and shutdowns workers with the old configuration.
12. (HTTP) To consider a configuration reload a success, *NKG* ensures that at least one NGINX worker has the new
configuration. To do that, *NKG* checks a particular endpoint via the unix:/var/run/nginx/nginx-config-version.sock
UNIX socket.
13. (HTTP,HTTPS) A *client* sends traffic to and receives traffic from any of the *NGINX workers* on ports 80 and 443.
14. (HTTP,HTTPS) An *NGINX worker* sends traffic to and receives traffic from the *backends*.

[controller]: https://kubernetes.io/docs/concepts/architecture/controller/

Expand Down
Binary file modified docs/images/nkg-pod.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 8 additions & 2 deletions internal/mode/static/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ type eventHandlerConfig struct {
logger logr.Logger
// controlConfigNSName is the NamespacedName of the NginxGateway config for this controller.
controlConfigNSName types.NamespacedName
// version is the current version number of the nginx config.
version int
}

// eventHandlerImpl implements EventHandler.
Expand Down Expand Up @@ -90,7 +92,11 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, batch events.Ev
}

var nginxReloadRes nginxReloadResult
if err := h.updateNginx(ctx, dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver)); err != nil {
h.cfg.version++
if err := h.updateNginx(
ctx,
dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version),
); err != nil {
h.cfg.logger.Error(err, "Failed to update NGINX configuration")
nginxReloadRes.error = err
} else {
Expand All @@ -107,7 +113,7 @@ func (h *eventHandlerImpl) updateNginx(ctx context.Context, conf dataplane.Confi
return fmt.Errorf("failed to replace NGINX configuration files: %w", err)
}

if err := h.cfg.nginxRuntimeMgr.Reload(ctx); err != nil {
if err := h.cfg.nginxRuntimeMgr.Reload(ctx, conf.Version); err != nil {
return fmt.Errorf("failed to reload NGINX: %w", err)
}

Expand Down
4 changes: 2 additions & 2 deletions internal/mode/static/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ var _ = Describe("eventHandler", func() {
handler.HandleEventBatch(context.Background(), batch)

checkUpsertEventExpectations(e)
expectReconfig(dataplane.Configuration{}, fakeCfgFiles)
expectReconfig(dataplane.Configuration{Version: 1}, fakeCfgFiles)
})

It("should process Delete", func() {
Expand All @@ -124,7 +124,7 @@ var _ = Describe("eventHandler", func() {
handler.HandleEventBatch(context.Background(), batch)

checkDeleteEventExpectations(e)
expectReconfig(dataplane.Configuration{}, fakeCfgFiles)
expectReconfig(dataplane.Configuration{Version: 1}, fakeCfgFiles)
})
})

Expand Down
16 changes: 16 additions & 0 deletions internal/mode/static/nginx/config/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ const (

// httpConfigFile is the path to the configuration file with HTTP configuration.
httpConfigFile = httpFolder + "/http.conf"

// configVersionFile is the path to the config version configuration file.
configVersionFile = httpFolder + "/config-version.conf"
)

// ConfigFolders is a list of folders where NGINX configuration files are stored.
Expand Down Expand Up @@ -63,6 +66,8 @@ func (g GeneratorImpl) Generate(conf dataplane.Configuration) []file.File {

files = append(files, generateHTTPConfig(conf))

files = append(files, generateConfigVersion(conf.Version))

return files
}

Expand Down Expand Up @@ -104,3 +109,14 @@ func getExecuteFuncs() []executeFunc {
executeMaps,
}
}

// generateConfigVersion writes the config version file.
func generateConfigVersion(configVersion int) file.File {
c := executeVersion(configVersion)

return file.File{
Content: c,
Path: configVersionFile,
Type: file.TypeRegular,
}
}
8 changes: 7 additions & 1 deletion internal/mode/static/nginx/config/generator_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package config_test

import (
"fmt"
"testing"

. "github.com/onsi/gomega"
Expand Down Expand Up @@ -65,7 +66,7 @@ func TestGenerate(t *testing.T) {

files := generator.Generate(conf)

g.Expect(files).To(HaveLen(2))
g.Expect(files).To(HaveLen(3))

g.Expect(files[0]).To(Equal(file.File{
Type: file.TypeSecret,
Expand All @@ -82,4 +83,9 @@ func TestGenerate(t *testing.T) {
g.Expect(httpCfg).To(ContainSubstring("listen 443"))
g.Expect(httpCfg).To(ContainSubstring("upstream"))
g.Expect(httpCfg).To(ContainSubstring("split_clients"))

g.Expect(files[2].Type).To(Equal(file.TypeRegular))
g.Expect(files[2].Path).To(Equal("/etc/nginx/conf.d/config-version.conf"))
configVersion := string(files[2].Content)
g.Expect(configVersion).To(ContainSubstring(fmt.Sprintf("return 200 %d", conf.Version)))
}
2 changes: 1 addition & 1 deletion internal/mode/static/nginx/config/servers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ func TestExecuteServers(t *testing.T) {
func TestExecuteForDefaultServers(t *testing.T) {
testcases := []struct {
msg string
conf dataplane.Configuration
httpPorts []int
sslPorts []int
conf dataplane.Configuration
}{
{
conf: dataplane.Configuration{},
Expand Down
11 changes: 11 additions & 0 deletions internal/mode/static/nginx/config/version.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package config

import (
gotemplate "text/template"
)

var versionTemplate = gotemplate.Must(gotemplate.New("version").Parse(versionTemplateText))

func executeVersion(version int) []byte {
return execute(versionTemplate, version)
}
12 changes: 12 additions & 0 deletions internal/mode/static/nginx/config/version_template.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package config

var versionTemplateText = `
server {
listen unix:/var/run/nginx/nginx-config-version.sock;
access_log off;

location /version {
return 200 {{.}};
}
}
`
20 changes: 20 additions & 0 deletions internal/mode/static/nginx/config/version_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package config

import (
"strings"
"testing"

. "github.com/onsi/gomega"
)

func TestExecuteVersion(t *testing.T) {
g := NewWithT(t)
expSubStrings := map[string]int{
"return 200 42;": 1,
}

maps := string(executeVersion(42))
for expSubStr, expCount := range expSubStrings {
g.Expect(expCount).To(Equal(strings.Count(maps, expSubStr)))
}
}
79 changes: 61 additions & 18 deletions internal/mode/static/nginx/runtime/manager.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package runtime

import (
"bytes"
"context"
"errors"
"fmt"
Expand All @@ -15,58 +16,73 @@ import (
)

const (
pidFile = "/var/run/nginx/nginx.pid"
pidFileTimeout = 10 * time.Second
pidFile = "/var/run/nginx/nginx.pid"
pidFileTimeout = 10000 * time.Millisecond
childProcsTimeout = 1000 * time.Millisecond
nginxReloadTimeout = 60000 * time.Millisecond
)

type (
readFileFunc func(string) ([]byte, error)
checkFileFunc func(string) (fs.FileInfo, error)
)

var (
noNewWorkersErrFmt = "reload unsuccessful: no new NGINX worker processes started for config version %d." +
" Please check the NGINX container logs for possible configuration issues: %w"
childProcPathFmt = "/proc/%[1]v/task/%[1]v/children"
)

//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Manager

// Manager manages the runtime of NGINX.
type Manager interface {
// Reload reloads NGINX configuration. It is a blocking operation.
Reload(ctx context.Context) error
Reload(ctx context.Context, configVersion int) error
}

// ManagerImpl implements Manager.
type ManagerImpl struct{}
type ManagerImpl struct {
verifyClient *verifyClient
}

// NewManagerImpl creates a new ManagerImpl.
func NewManagerImpl() *ManagerImpl {
return &ManagerImpl{}
return &ManagerImpl{
verifyClient: newVerifyClient(nginxReloadTimeout),
}
}

func (m *ManagerImpl) Reload(ctx context.Context) error {
func (m *ManagerImpl) Reload(ctx context.Context, configVersion int) error {
// We find the main NGINX PID on every reload because it will change if the NGINX container is restarted.
pid, err := findMainProcess(ctx, os.Stat, os.ReadFile, pidFileTimeout)
if err != nil {
return fmt.Errorf("failed to find NGINX main process: %w", err)
}

childProcFile := fmt.Sprintf(childProcPathFmt, pid)
previousChildProcesses, err := os.ReadFile(childProcFile)
if err != nil {
return err
}

// send HUP signal to the NGINX main process reload configuration
// See https://nginx.org/en/docs/control.html
if err := syscall.Kill(pid, syscall.SIGHUP); err != nil {
kate-osborn marked this conversation as resolved.
Show resolved Hide resolved
return fmt.Errorf("failed to send the HUP signal to NGINX main: %w", err)
}

// FIXME(pleshakov)
// (1) ensure the reload actually happens.
// https://github.com/nginxinc/nginx-kubernetes-gateway/issues/664

// for now, to prevent a subsequent reload starting before the in-flight reload finishes, we simply sleep.
// Fixing (1) will make the sleep unnecessary.

select {
case <-ctx.Done():
return nil
case <-time.After(1 * time.Second):
if err := ensureNewNginxWorkers(
ctx,
childProcFile,
previousChildProcesses,
os.ReadFile,
childProcsTimeout,
); err != nil {
return fmt.Errorf(noNewWorkersErrFmt, configVersion, err)
}

return nil
return m.verifyClient.waitForCorrectVersion(ctx, configVersion)
}

// EnsureNginxRunning ensures NGINX is running by locating the main process.
Expand Down Expand Up @@ -116,3 +132,30 @@ func findMainProcess(

return pid, nil
}

func ensureNewNginxWorkers(
ctx context.Context,
childProcFile string,
previousContents []byte,
ciarams87 marked this conversation as resolved.
Show resolved Hide resolved
readFile readFileFunc,
timeout time.Duration,
) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

return wait.PollUntilContextCancel(
ctx,
25*time.Millisecond,
true, /* poll immediately */
func(ctx context.Context) (bool, error) {
content, err := readFile(childProcFile)
if err != nil {
return false, err
}
if !bytes.Equal(previousContents, content) {
return true, nil
}
return false, nil
},
)
}
Loading