forkjo/modules/graceful/manager_unix.go
wxiaoguang 4647660776
Rewrite logger system (#24726)
## ⚠️ Breaking

The `log.<mode>.<logger>` style config has been dropped. If you used it,
please check the new config manual & app.example.ini to make your
instance output logs as expected.

Although many legacy options still work, it's encouraged to upgrade to
the new options.

The SMTP logger is deleted because SMTP is not suitable to collect logs.

If you have manually configured Gitea log options, please confirm the
logger system works as expected after upgrading.

## Description

Close #12082 and maybe more log-related issues, resolve some related
FIXMEs in old code (which seems unfixable before)

Just like rewriting queue #24505 : make code maintainable, clear legacy
bugs, and add the ability to support more writers (eg: JSON, structured
log)

There is a new document (with examples): `logging-config.en-us.md`

This PR is safer than the queue rewriting, because it's just for
logging, it won't break other logic.

## The old problems

The logging system is quite old and difficult to maintain:
* Unclear concepts: Logger, NamedLogger, MultiChannelledLogger,
SubLogger, EventLogger, WriterLogger etc
* Some code is diffuclt to konw whether it is right:
`log.DelNamedLogger("console")` vs `log.DelNamedLogger(log.DEFAULT)` vs
`log.DelLogger("console")`
* The old system heavily depends on ini config system, it's difficult to
create new logger for different purpose, and it's very fragile.
* The "color" trick is difficult to use and read, many colors are
unnecessary, and in the future structured log could help
* It's difficult to add other log formats, eg: JSON format
* The log outputer doesn't have full control of its goroutine, it's
difficult to make outputer have advanced behaviors
* The logs could be lost in some cases: eg: no Fatal error when using
CLI.
* Config options are passed by JSON, which is quite fragile.
* INI package makes the KEY in `[log]` section visible in `[log.sub1]`
and `[log.sub1.subA]`, this behavior is quite fragile and would cause
more unclear problems, and there is no strong requirement to support
`log.<mode>.<logger>` syntax.


## The new design

See `logger.go` for documents.


## Screenshot

<details>


![image](https://github.com/go-gitea/gitea/assets/2114189/4462d713-ba39-41f5-bb08-de912e67e1ff)


![image](https://github.com/go-gitea/gitea/assets/2114189/b188035e-f691-428b-8b2d-ff7b2199b2f9)


![image](https://github.com/go-gitea/gitea/assets/2114189/132e9745-1c3b-4e00-9e0d-15eaea495dee)

</details>

## TODO

* [x] add some new tests
* [x] fix some tests
* [x] test some sub-commands (manually ....)

---------

Co-authored-by: Jason Song <i@wolfogre.com>
Co-authored-by: delvh <dev.lh@web.de>
Co-authored-by: Giteabot <teabot@gitea.io>
2023-05-21 22:35:11 +00:00

278 lines
7.6 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
//go:build !windows
package graceful
import (
"context"
"errors"
"os"
"os/signal"
"runtime/pprof"
"strconv"
"sync"
"syscall"
"time"
"code.gitea.io/gitea/modules/graceful/releasereopen"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting"
)
// Manager manages the graceful shutdown process
type Manager struct {
isChild bool
forked bool
lock *sync.RWMutex
state state
shutdownCtx context.Context
hammerCtx context.Context
terminateCtx context.Context
managerCtx context.Context
shutdownCtxCancel context.CancelFunc
hammerCtxCancel context.CancelFunc
terminateCtxCancel context.CancelFunc
managerCtxCancel context.CancelFunc
runningServerWaitGroup sync.WaitGroup
createServerWaitGroup sync.WaitGroup
terminateWaitGroup sync.WaitGroup
toRunAtShutdown []func()
toRunAtHammer []func()
toRunAtTerminate []func()
}
func newGracefulManager(ctx context.Context) *Manager {
manager := &Manager{
isChild: len(os.Getenv(listenFDsEnv)) > 0 && os.Getppid() > 1,
lock: &sync.RWMutex{},
}
manager.createServerWaitGroup.Add(numberOfServersToCreate)
manager.start(ctx)
return manager
}
type systemdNotifyMsg string
const (
readyMsg systemdNotifyMsg = "READY=1"
stoppingMsg systemdNotifyMsg = "STOPPING=1"
reloadingMsg systemdNotifyMsg = "RELOADING=1"
watchdogMsg systemdNotifyMsg = "WATCHDOG=1"
)
func statusMsg(msg string) systemdNotifyMsg {
return systemdNotifyMsg("STATUS=" + msg)
}
func pidMsg() systemdNotifyMsg {
return systemdNotifyMsg("MAINPID=" + strconv.Itoa(os.Getpid()))
}
// Notify systemd of status via the notify protocol
func (g *Manager) notify(msg systemdNotifyMsg) {
conn, err := getNotifySocket()
if err != nil {
// the err is logged in getNotifySocket
return
}
if conn == nil {
return
}
defer conn.Close()
if _, err = conn.Write([]byte(msg)); err != nil {
log.Warn("Failed to notify NOTIFY_SOCKET: %v", err)
return
}
}
func (g *Manager) start(ctx context.Context) {
// Make contexts
g.terminateCtx, g.terminateCtxCancel = context.WithCancel(ctx)
g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(ctx)
g.hammerCtx, g.hammerCtxCancel = context.WithCancel(ctx)
g.managerCtx, g.managerCtxCancel = context.WithCancel(ctx)
// Next add pprof labels to these contexts
g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate"))
g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown"))
g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer"))
g.managerCtx = pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "with-manager"))
// Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager
pprof.SetGoroutineLabels(g.managerCtx)
defer pprof.SetGoroutineLabels(ctx)
// Set the running state & handle signals
g.setState(stateRunning)
g.notify(statusMsg("Starting Gitea"))
g.notify(pidMsg())
go g.handleSignals(g.managerCtx)
// Handle clean up of unused provided listeners and delayed start-up
startupDone := make(chan struct{})
go func() {
defer close(startupDone)
// Wait till we're done getting all of the listeners and then close
// the unused ones
g.createServerWaitGroup.Wait()
// Ignore the error here there's not much we can do with it
// They're logged in the CloseProvidedListeners function
_ = CloseProvidedListeners()
g.notify(readyMsg)
}()
if setting.StartupTimeout > 0 {
go func() {
select {
case <-startupDone:
return
case <-g.IsShutdown():
func() {
// When waitgroup counter goes negative it will panic - we don't care about this so we can just ignore it.
defer func() {
_ = recover()
}()
// Ensure that the createServerWaitGroup stops waiting
for {
g.createServerWaitGroup.Done()
}
}()
return
case <-time.After(setting.StartupTimeout):
log.Error("Startup took too long! Shutting down")
g.notify(statusMsg("Startup took too long! Shutting down"))
g.notify(stoppingMsg)
g.doShutdown()
}
}()
}
}
func (g *Manager) handleSignals(ctx context.Context) {
ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Graceful: HandleSignals", process.SystemProcessType, true)
defer finished()
signalChannel := make(chan os.Signal, 1)
signal.Notify(
signalChannel,
syscall.SIGHUP,
syscall.SIGUSR1,
syscall.SIGUSR2,
syscall.SIGINT,
syscall.SIGTERM,
syscall.SIGTSTP,
)
watchdogTimeout := getWatchdogTimeout()
t := &time.Ticker{}
if watchdogTimeout != 0 {
g.notify(watchdogMsg)
t = time.NewTicker(watchdogTimeout / 2)
}
pid := syscall.Getpid()
for {
select {
case sig := <-signalChannel:
switch sig {
case syscall.SIGHUP:
log.Info("PID: %d. Received SIGHUP. Attempting GracefulRestart...", pid)
g.DoGracefulRestart()
case syscall.SIGUSR1:
log.Warn("PID %d. Received SIGUSR1. Releasing and reopening logs", pid)
g.notify(statusMsg("Releasing and reopening logs"))
if err := releasereopen.GetManager().ReleaseReopen(); err != nil {
log.Error("Error whilst releasing and reopening logs: %v", err)
}
case syscall.SIGUSR2:
log.Warn("PID %d. Received SIGUSR2. Hammering...", pid)
g.DoImmediateHammer()
case syscall.SIGINT:
log.Warn("PID %d. Received SIGINT. Shutting down...", pid)
g.DoGracefulShutdown()
case syscall.SIGTERM:
log.Warn("PID %d. Received SIGTERM. Shutting down...", pid)
g.DoGracefulShutdown()
case syscall.SIGTSTP:
log.Info("PID %d. Received SIGTSTP.", pid)
default:
log.Info("PID %d. Received %v.", pid, sig)
}
case <-t.C:
g.notify(watchdogMsg)
case <-ctx.Done():
log.Warn("PID: %d. Background context for manager closed - %v - Shutting down...", pid, ctx.Err())
g.DoGracefulShutdown()
return
}
}
}
func (g *Manager) doFork() error {
g.lock.Lock()
if g.forked {
g.lock.Unlock()
return errors.New("another process already forked. Ignoring this one")
}
g.forked = true
g.lock.Unlock()
g.notify(reloadingMsg)
// We need to move the file logs to append pids
setting.RestartLogsWithPIDSuffix()
_, err := RestartProcess()
return err
}
// DoGracefulRestart causes a graceful restart
func (g *Manager) DoGracefulRestart() {
if setting.GracefulRestartable {
log.Info("PID: %d. Forking...", os.Getpid())
err := g.doFork()
if err != nil {
if err.Error() == "another process already forked. Ignoring this one" {
g.DoImmediateHammer()
} else {
log.Error("Error whilst forking from PID: %d : %v", os.Getpid(), err)
}
}
} else {
log.Info("PID: %d. Not set restartable. Shutting down...", os.Getpid())
g.notify(stoppingMsg)
g.doShutdown()
}
}
// DoImmediateHammer causes an immediate hammer
func (g *Manager) DoImmediateHammer() {
g.notify(statusMsg("Sending immediate hammer"))
g.doHammerTime(0 * time.Second)
}
// DoGracefulShutdown causes a graceful shutdown
func (g *Manager) DoGracefulShutdown() {
g.lock.Lock()
if !g.forked {
g.lock.Unlock()
g.notify(stoppingMsg)
} else {
g.lock.Unlock()
g.notify(statusMsg("Shutting down after fork"))
}
g.doShutdown()
}
// RegisterServer registers the running of a listening server, in the case of unix this means that the parent process can now die.
// Any call to RegisterServer must be matched by a call to ServerDone
func (g *Manager) RegisterServer() {
KillParent()
g.runningServerWaitGroup.Add(1)
}