grace/grace.go
2013-10-06 13:01:58 -07:00

250 lines
5.9 KiB
Go

// Package grace allows for gracefully waiting for a listener to
// finish serving it's active requests.
package grace
import (
"errors"
"fmt"
"net"
"os"
"os/exec"
"os/signal"
"strconv"
"strings"
"sync"
"syscall"
"time"
)
var (
// This error is returned by Inherits() when we're not inheriting any fds.
ErrNotInheriting = errors.New("no inherited listeners")
// This error is returned by Listener.Accept() when Close is in progress.
ErrAlreadyClosed = errors.New("already closed")
// Time in the past to trigger immediate deadline.
timeInPast = time.Date(1983, time.November, 6, 0, 0, 0, 0, time.UTC)
)
const (
// Used to indicate a graceful restart in the new process.
envCountKey = "LISTEN_FDS"
envCountKeyPrefix = envCountKey + "="
// The error returned by the standard library when the socket is closed.
errClosed = "use of closed network connection"
)
// A Listener providing a graceful Close process and can be sent
// across processes using the underlying File descriptor.
type Listener interface {
net.Listener
// Will return the underlying file representing this Listener.
File() (f *os.File, err error)
}
type listener struct {
Listener
closed bool
closedMutex sync.RWMutex
wg sync.WaitGroup
}
type deadliner interface {
SetDeadline(t time.Time) error
}
// Allows for us to notice when the connection is closed.
type conn struct {
net.Conn
wg *sync.WaitGroup
}
func (c conn) Close() error {
defer c.wg.Done()
return c.Conn.Close()
}
// Wraps an existing File listener to provide a graceful Close() process.
func NewListener(l Listener) Listener {
return &listener{Listener: l}
}
func (l *listener) Close() error {
l.closedMutex.Lock()
l.closed = true
l.closedMutex.Unlock()
var err error
// Init provided sockets dont actually close so we trigger Accept to return
// by setting the deadline.
if os.Getppid() == 1 {
if ld, ok := l.Listener.(deadliner); ok {
err = ld.SetDeadline(timeInPast)
} else {
fmt.Fprintln(os.Stderr, "init activated server did not have SetDeadline")
}
} else {
err = l.Listener.Close()
}
l.wg.Wait()
return err
}
func (l *listener) Accept() (net.Conn, error) {
// Presume we'll accept and decrement in defer if we don't. If we did this
// after a successful accept we would have a race condition where we may end
// up incorrectly shutting down between the time we do a successful accept
// and the increment.
var c net.Conn
l.wg.Add(1)
defer func() {
// If we didn't accept, we decrement our presumptuous count above.
if c == nil {
l.wg.Done()
}
}()
l.closedMutex.RLock()
if l.closed {
l.closedMutex.RUnlock()
return nil, ErrAlreadyClosed
}
l.closedMutex.RUnlock()
c, err := l.Listener.Accept()
if err != nil {
if strings.HasSuffix(err.Error(), errClosed) {
return nil, ErrAlreadyClosed
}
// We use SetDeadline above to trigger Accept to return when we're trying
// to handoff to a child as part of our restart process. In this scenario
// we want to treat the timeout the same as a Close.
if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
l.closedMutex.RLock()
if l.closed {
l.closedMutex.RUnlock()
return nil, ErrAlreadyClosed
}
l.closedMutex.RUnlock()
}
return nil, err
}
return conn{Conn: c, wg: &l.wg}, nil
}
// Wait for signals to gracefully terminate or restart the process.
func Wait(listeners []Listener) (err error) {
ch := make(chan os.Signal, 2)
signal.Notify(ch, syscall.SIGTERM, syscall.SIGUSR2)
for {
sig := <-ch
switch sig {
case syscall.SIGTERM:
var wg sync.WaitGroup
wg.Add(len(listeners))
for _, l := range listeners {
go func(l Listener) {
defer wg.Done()
cErr := l.Close()
if cErr != nil {
err = cErr
}
}(l)
}
wg.Wait()
return
case syscall.SIGUSR2:
rErr := Restart(listeners)
if rErr != nil {
return rErr
}
}
}
}
// Try to inherit listeners from the parent process.
func Inherit() (listeners []Listener, err error) {
countStr := os.Getenv(envCountKey)
if countStr == "" {
return nil, ErrNotInheriting
}
count, err := strconv.Atoi(countStr)
if err != nil {
return nil, err
}
// If we are inheriting, the listeners will begin at fd 3
for i := 3; i < 3+count; i++ {
file := os.NewFile(uintptr(i), "listener")
tmp, err := net.FileListener(file)
file.Close()
if err != nil {
return nil, err
}
l := tmp.(Listener)
listeners = append(listeners, NewListener(l))
}
return
}
// Start the Close process in the parent. This does not wait for the
// parent to close and simply sends it the TERM signal.
func CloseParent() error {
ppid := os.Getppid()
if ppid == 1 { // init provided sockets, for example systemd
return nil
}
return syscall.Kill(ppid, syscall.SIGTERM)
}
// Restart the process passing the given listeners to the new process.
func Restart(listeners []Listener) (err error) {
if len(listeners) == 0 {
return errors.New("restart must be given listeners.")
}
// Extract the fds from the listeners.
files := make([]*os.File, len(listeners))
for i, l := range listeners {
files[i], err = l.File()
if err != nil {
return err
}
defer files[i].Close()
syscall.CloseOnExec(int(files[i].Fd()))
}
// Use the original binary location. This works with symlinks such that if
// the file it points to has been changed we will use the updated symlink.
argv0, err := exec.LookPath(os.Args[0])
if err != nil {
return err
}
// In order to keep the working directory the same as when we started.
wd, err := os.Getwd()
if err != nil {
return err
}
// Pass on the environment and replace the old count key with the new one.
var env []string
for _, v := range os.Environ() {
if !strings.HasPrefix(v, envCountKeyPrefix) {
env = append(env, v)
}
}
env = append(env, fmt.Sprintf("%s%d", envCountKeyPrefix, len(listeners)))
allFiles := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, files...)
_, err = os.StartProcess(argv0, os.Args, &os.ProcAttr{
Dir: wd,
Env: env,
Files: allFiles,
})
return err
}