
204 lines
5.3 KiB
Raw Normal View History

2022-05-29 19:04:02 +00:00
package logs
import (
2022-05-31 04:11:44 +00:00
2022-05-29 19:04:02 +00:00
2022-05-31 04:11:44 +00:00
2022-05-29 19:04:02 +00:00
2022-05-31 04:11:44 +00:00
2022-05-29 19:04:02 +00:00
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2022-05-31 04:11:44 +00:00
// KubernetesClient wraps a Kubernetes clientset.
2022-05-30 10:10:58 +00:00
type KubernetesClient struct {
2022-05-31 04:11:44 +00:00
// concurrentWriter implements io.Writer.
type concurrentWriter struct {
w io.Writer
mu sync.Mutex
// NewConcurrentWriter returns an io.Writer which can be safely written to from
// multiple goroutines without further synchronization.
func NewConcurrentWriter(w io.Writer) io.Writer {
return &concurrentWriter{w: w}
// Write implements io.Writer.
func (cw *concurrentWriter) Write(p []byte) (int, error) {
defer cw.mu.Unlock()
return cw.w.Write(p)
const nl = "\n"
type streamError struct {
err error
podName string
recoverable bool
func (re *streamError) Error() string {
return re.err.Error()
2022-05-29 19:04:02 +00:00
// Watcher watches a deployment and tails the logs for its currently active
// pods.
type Watcher struct {
2022-05-31 04:11:44 +00:00
deployName string
container string
clientset KubernetesClient
spec map[string]*corev1.Pod
status map[string]bool
streamResults chan error
dst io.Writer
2022-05-29 19:04:02 +00:00
// NewWatcher creates a new Watcher.
2022-05-31 04:11:44 +00:00
func NewWatcher(deployName string, container string, clientset KubernetesClient, dst io.Writer) *Watcher {
2022-05-29 19:04:02 +00:00
return &Watcher{
2022-05-31 04:11:44 +00:00
deployName: deployName,
container: container,
clientset: clientset,
spec: make(map[string]*corev1.Pod),
status: make(map[string]bool),
streamResults: make(chan error),
dst: dst,
2022-05-29 19:04:02 +00:00
// Watch watches a deployment.
func (w *Watcher) Watch(ctx context.Context) error {
2022-05-30 10:10:58 +00:00
dst := NewConcurrentWriter(w.dst)
2022-05-29 19:04:02 +00:00
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
deploymentsClient := w.clientset.AppsV1().Deployments(corev1.NamespaceDefault)
var opts metav1.GetOptions
deployment, err := deploymentsClient.Get(ctx, w.deployName, opts)
if err != nil {
return err
podsClient := w.clientset.CoreV1().Pods(corev1.NamespaceDefault)
labelsMap, err := metav1.LabelSelectorAsMap(deployment.Spec.Selector)
if err != nil {
return err
watcher, err := podsClient.Watch(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelsMap).String()})
if err != nil {
return err
defer watcher.Stop()
2022-05-31 04:11:44 +00:00
// streamErrors is never closed.
streamErrors := make(chan error)
2022-05-29 19:04:02 +00:00
for {
select {
2022-05-30 10:10:58 +00:00
case <-ctx.Done():
return ctx.Err()
2022-05-29 19:04:02 +00:00
case <-ticker.C:
// Iterate through the desired state (w.spec) and launch goroutines to
// process the logs of any missing pods.
for podName, pod := range w.spec {
2022-05-31 04:11:44 +00:00
pod := pod
2022-05-29 19:04:02 +00:00
if _, ok := w.status[podName]; !ok {
2022-05-31 04:11:44 +00:00
log.Printf("adding pod, name = %s", pod.Name)
w.status[pod.Name] = true
go func() {
if err := copyPodLogs(ctx, w.clientset, pod, w.container, dst); err != nil {
streamErrors <- err
2022-05-29 19:04:02 +00:00
// For any pods which no longer exist, remove the pod.
// TODO: check this is needed when a pod's labels change to no longer
// match the deployment's selector.
for podName := range w.status {
if _, ok := w.spec[podName]; !ok {
2022-05-31 04:11:44 +00:00
case err := <-streamErrors:
var streamErr *streamError
if errors.As(err, &streamErr) && streamErr.recoverable {
// if the error is recoverable, we just remove the pod from the status
// map. It will be recreated and retried on the next iteration.
} else {
return streamErr
2022-05-29 19:04:02 +00:00
case evt := <-watcher.ResultChan():
switch evt.Type {
case watch.Added, watch.Modified:
pod := evt.Object.(*corev1.Pod)
log.Printf("event rcvd, type = %s, pod name = %s, phase = %s", evt.Type, pod.Name, pod.Status.Phase)
if pod.Status.Phase == corev1.PodRunning {
w.spec[pod.Name] = pod
case watch.Deleted:
pod := evt.Object.(*corev1.Pod)
delete(w.spec, pod.Name)
log.Printf("event rcvd, type = DELETED, pod name = %s", pod.Name)
2022-05-31 04:11:44 +00:00
func copyPodLogs(ctx context.Context, clientset KubernetesClient, p *corev1.Pod, container string, dst io.Writer) error {
podLogOpts := corev1.PodLogOptions{
Follow: true,
Container: container,
req := clientset.CoreV1().Pods(p.Namespace).GetLogs(p.Name, &podLogOpts)
logs, err := req.Stream(ctx)
// If one pod or container is in a non-running state, we don't want to quit.
// Checking the response string avoids the risk of a race condition but
// obviously feels a bit brittle too.
if err != nil && strings.Contains(err.Error(), "is waiting to start") {
return &streamError{err: err, podName: p.Name, recoverable: true}
} else if err != nil {
return &streamError{err: err, podName: p.Name}
defer func() { _ = logs.Close() }()
scanner := bufio.NewScanner(logs)
for scanner.Scan() {
if _, err = dst.Write([]byte("[" + p.Name + "] " + scanner.Text() + nl)); err != nil {
return &streamError{err: fmt.Errorf("error writing: %v", err), podName: p.Name}
2022-05-29 19:04:02 +00:00
2022-05-31 04:11:44 +00:00
if err := scanner.Err(); err != nil {
return &streamError{err: fmt.Errorf("error scanning: %v", err), podName: p.Name}
return nil
2022-05-29 19:04:02 +00:00
func (w *Watcher) removePod(podName string) {
log.Printf("removing pod, name = %s", podName)
delete(w.status, podName)