kubectl-persistent-logger/logs/watcher.go

204 lines
5.3 KiB
Go

package logs
import (
"bufio"
"context"
"errors"
"fmt"
"io"
"log"
"strings"
"sync"
"time"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
)
// KubernetesClient wraps a Kubernetes clientset.
type KubernetesClient struct {
kubernetes.Interface
}
// concurrentWriter implements io.Writer.
type concurrentWriter struct {
w io.Writer
mu sync.Mutex
}
// NewConcurrentWriter returns an io.Writer which can be safely written to from
// multiple goroutines without further synchronization.
func NewConcurrentWriter(w io.Writer) io.Writer {
return &concurrentWriter{w: w}
}
// Write implements io.Writer.
func (cw *concurrentWriter) Write(p []byte) (int, error) {
cw.mu.Lock()
defer cw.mu.Unlock()
return cw.w.Write(p)
}
const nl = "\n"
type streamError struct {
err error
podName string
recoverable bool
}
func (re *streamError) Error() string {
return re.err.Error()
}
// Watcher watches a deployment and tails the logs for its currently active
// pods.
type Watcher struct {
deployName string
container string
clientset KubernetesClient
spec map[string]*corev1.Pod
status map[string]bool
streamResults chan error
dst io.Writer
}
// NewWatcher creates a new Watcher.
func NewWatcher(deployName string, container string, clientset KubernetesClient, dst io.Writer) *Watcher {
return &Watcher{
deployName: deployName,
container: container,
clientset: clientset,
spec: make(map[string]*corev1.Pod),
status: make(map[string]bool),
streamResults: make(chan error),
dst: dst,
}
}
// Watch watches a deployment.
func (w *Watcher) Watch(ctx context.Context) error {
dst := NewConcurrentWriter(w.dst)
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
deploymentsClient := w.clientset.AppsV1().Deployments(corev1.NamespaceDefault)
var opts metav1.GetOptions
deployment, err := deploymentsClient.Get(ctx, w.deployName, opts)
if err != nil {
return err
}
podsClient := w.clientset.CoreV1().Pods(corev1.NamespaceDefault)
labelsMap, err := metav1.LabelSelectorAsMap(deployment.Spec.Selector)
if err != nil {
return err
}
watcher, err := podsClient.Watch(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelsMap).String()})
if err != nil {
return err
}
defer watcher.Stop()
// streamErrors is never closed.
streamErrors := make(chan error)
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
// Iterate through the desired state (w.spec) and launch goroutines to
// process the logs of any missing pods.
for podName, pod := range w.spec {
pod := pod
if _, ok := w.status[podName]; !ok {
log.Printf("adding pod, name = %s", pod.Name)
w.status[pod.Name] = true
go func() {
if err := copyPodLogs(ctx, w.clientset, pod, w.container, dst); err != nil {
streamErrors <- err
}
}()
}
}
// For any pods which no longer exist, remove the pod.
// TODO: check this is needed when a pod's labels change to no longer
// match the deployment's selector.
for podName := range w.status {
if _, ok := w.spec[podName]; !ok {
w.removePod(podName)
}
}
case err := <-streamErrors:
var streamErr *streamError
if errors.As(err, &streamErr) && streamErr.recoverable {
// if the error is recoverable, we just remove the pod from the status
// map. It will be recreated and retried on the next iteration.
w.removePod(streamErr.podName)
} else {
return streamErr
}
case evt := <-watcher.ResultChan():
switch evt.Type {
case watch.Added, watch.Modified:
pod := evt.Object.(*corev1.Pod)
log.Printf("event rcvd, type = %s, pod name = %s, phase = %s", evt.Type, pod.Name, pod.Status.Phase)
if pod.Status.Phase == corev1.PodRunning {
w.spec[pod.Name] = pod
}
case watch.Deleted:
pod := evt.Object.(*corev1.Pod)
delete(w.spec, pod.Name)
log.Printf("event rcvd, type = DELETED, pod name = %s", pod.Name)
}
}
}
}
func copyPodLogs(ctx context.Context, clientset KubernetesClient, p *corev1.Pod, container string, dst io.Writer) error {
podLogOpts := corev1.PodLogOptions{
Follow: true,
Container: container,
}
req := clientset.CoreV1().Pods(p.Namespace).GetLogs(p.Name, &podLogOpts)
logs, err := req.Stream(ctx)
// If one pod or container is in a non-running state, we don't want to quit.
// Checking the response string avoids the risk of a race condition but
// obviously feels a bit brittle too.
if err != nil && strings.Contains(err.Error(), "is waiting to start") {
return &streamError{err: err, podName: p.Name, recoverable: true}
} else if err != nil {
return &streamError{err: err, podName: p.Name}
}
defer func() { _ = logs.Close() }()
scanner := bufio.NewScanner(logs)
for scanner.Scan() {
if _, err = dst.Write([]byte("[" + p.Name + "] " + scanner.Text() + nl)); err != nil {
return &streamError{err: fmt.Errorf("error writing: %v", err), podName: p.Name}
}
}
if err := scanner.Err(); err != nil {
return &streamError{err: fmt.Errorf("error scanning: %v", err), podName: p.Name}
}
return nil
}
func (w *Watcher) removePod(podName string) {
log.Printf("removing pod, name = %s", podName)
delete(w.status, podName)
}