Add video and thumbnail display

2021-09-14 22:26:46 +02:00 · 2021-09-14 22:26:46 +02:00 · b64ce1d424
commit b64ce1d424
parent ce3817dab8
6 changed files with 484 additions and 145 deletions
--- a/backend/main.go
+++ b/backend/main.go
@ -1,6 +1,7 @@
 package main

 import (
+	"context"
 	"encoding/json"
 	"log"
 	"net/http"
@ -30,7 +31,7 @@ func handleRequest(w http.ResponseWriter, r *http.Request) {
 	w.Header().Add("Content-Type", ContentTypeApplicationJSON)
 	w.Header().Add("Access-Control-Allow-Origin", "*")

-	if strings.HasPrefix(r.URL.Path, "/api/download") {
+	if strings.HasPrefix(r.URL.Path, "/api/media_sets") {
 		videoID := r.URL.Query().Get("video_id")
 		if videoID == "" {
 			w.WriteHeader(http.StatusBadRequest)
@ -54,7 +55,8 @@ func handleRequest(w http.ResponseWriter, r *http.Request) {
 			var err error
 			var youtubeClient youtubev2.Client
 			downloader := youtube.NewDownloader(&youtubeClient)
-			mediaSet, err = downloader.Download(videoID)
+			log.Printf("background context = %p, req context = %p", context.Background(), r.Context())
+			mediaSet, err = downloader.Download(r.Context(), videoID)
 			if err != nil {
 				log.Printf("error downloading MediaSet: %v", err)
 				w.WriteHeader(http.StatusInternalServerError)
@ -75,12 +77,6 @@ func handleRequest(w http.ResponseWriter, r *http.Request) {
 	if strings.HasPrefix(r.URL.Path, "/api/audio") {
 		log.Printf("got headers for audio request: %+v", r.Header)
 		videoID := r.URL.Query().Get("video_id")
-		if videoID == "" {
-			w.WriteHeader(http.StatusBadRequest)
-			w.Write([]byte(`{"error": "no video ID provided"}`))
-			return
-		}
-
 		mediaSet := media.MediaSet{ID: videoID}
 		if err := mediaSet.Load(); err != nil {
 			log.Printf("error loading MediaSet: %v", err)
@ -89,11 +85,44 @@ func handleRequest(w http.ResponseWriter, r *http.Request) {
 			return
 		}

+		// TODO: ensure content-type matches the actual downloaded format.
+		w.Header().Set("Content-Type", "audio/webm")
 		http.ServeFile(w, r, mediaSet.EncodedAudioPath())

 		return
 	}

+	if strings.HasPrefix(r.URL.Path, "/api/video") {
+		videoID := r.URL.Query().Get("video_id")
+		mediaSet := media.MediaSet{ID: videoID}
+		if err := mediaSet.Load(); err != nil {
+			log.Printf("error loading MediaSet: %v", err)
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(`{"error": "could not fetch media"}`))
+			return
+		}
+
+		http.ServeFile(w, r, mediaSet.VideoPath())
+
+		return
+	}
+
+	if strings.HasPrefix(r.URL.Path, "/api/thumbnails") {
+		videoID := r.URL.Query().Get("video_id")
+		mediaSet := media.MediaSet{ID: videoID}
+		if err := mediaSet.Load(); err != nil {
+			log.Printf("error loading MediaSet: %v", err)
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(`{"error": "could not fetch media"}`))
+			return
+		}
+
+		w.Header().Set("Content-Type", "image/jpeg")
+		http.ServeFile(w, r, mediaSet.ThumbnailPath())
+
+		return
+	}
+
 	if strings.HasPrefix(r.URL.Path, "/api/peaks") {
 		videoID := r.URL.Query().Get("video_id")
 		if videoID == "" {
--- a/backend/media/media_set.go
+++ b/backend/media/media_set.go
@ -6,30 +6,50 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"log"
 	"os"
+	"time"
 )

 const SizeOfInt16 = 2

+type Audio struct {
+	Bytes      int64 `json:"bytes"`
+	Channels   int   `json:"channels"`
+	Frames     int64 `json:"frames"`
+	SampleRate int   `json:"sample_rate"`
+}
+
+type Video struct {
+	Bytes           int64         `json:"bytes"`
+	Duration        time.Duration `json:"duration"`
+	ThumbnailWidth  int           `json:"thumbnail_width"`
+	ThumbnailHeight int           `json:"thumbnail_height"`
+}
+
 // MediaSet represents the media and metadata associated with a single media
 // resource (for example, a YouTube video).
 type MediaSet struct {
-	ID         string `json:"id"`
-	Source     string `json:"source"`
-	Bytes      int64  `json:"bytes"`
-	Channels   int    `json:"channels"`
-	Frames     int64  `json:"frames"`
-	SampleRate int    `json:"sample_rate"`
+	Audio Audio `json:"audio"`
+	Video Video `json:"video"`
+
+	ID     string `json:"id"`
+	Source string `json:"source"`

 	exists bool
 }

+// TODO: pass io.Readers/Writers instead of strings.
 func (m *MediaSet) RawAudioPath() string     { return fmt.Sprintf("cache/%s.raw", m.ID) }
 func (m *MediaSet) EncodedAudioPath() string { return fmt.Sprintf("cache/%s.m4a", m.ID) }
-func (m *MediaSet) VideoPath() string        { return fmt.Sprintf("cache/%s.mp4", m.ID) }
+func (m *MediaSet) VideoPath() string        { return fmt.Sprintf("cache/%s.webm", m.ID) }
+func (m *MediaSet) ThumbnailPath() string    { return fmt.Sprintf("cache/%s.jpg", m.ID) }
 func (m *MediaSet) MetadataPath() string     { return fmt.Sprintf("cache/%s.json", m.ID) }

 func (m *MediaSet) Exists() bool {
+	if m.ID == "" {
+		return false
+	}
 	if m.exists {
 		return true
 	}
@ -41,6 +61,10 @@ func (m *MediaSet) Exists() bool {
 }

 func (m *MediaSet) Load() error {
+	if m.ID == "" {
+		return errors.New("error opening mediaset with blank ID")
+	}
+
 	metadataFile, err := os.Open(m.MetadataPath())
 	if err != nil {
 		return fmt.Errorf("error opening metadata file: %v", err)
@ -65,7 +89,7 @@ func (m *MediaSet) Peaks(start, end int64, numBins int) ([][]int16, error) {
 	}
 	defer fptr.Close()

-	startByte := start * int64(m.Channels) * SizeOfInt16
+	startByte := start * int64(m.Audio.Channels) * SizeOfInt16
 	if _, err = fptr.Seek(startByte, io.SeekStart); err != nil {
 		return nil, fmt.Errorf("audio seek error: %v", err)
 	}
@ -73,12 +97,12 @@ func (m *MediaSet) Peaks(start, end int64, numBins int) ([][]int16, error) {
 	numFrames := end - start
 	framesPerBin := numFrames / int64(numBins)

-	peaks := make([][]int16, m.Channels)
-	for i := 0; i < m.Channels; i++ {
+	peaks := make([][]int16, m.Audio.Channels)
+	for i := 0; i < m.Audio.Channels; i++ {
 		peaks[i] = make([]int16, numBins)
 	}

-	samples := make([]int16, framesPerBin*int64(m.Channels))
+	samples := make([]int16, framesPerBin*int64(m.Audio.Channels))

 	for binNum := 0; binNum < numBins; binNum++ {
 		if err := binary.Read(fptr, binary.LittleEndian, samples); err != nil {
@ -88,12 +112,13 @@ func (m *MediaSet) Peaks(start, end int64, numBins int) ([][]int16, error) {
 			if samp < 0 {
 				samp = -samp
 			}
-			chanIndex := i % m.Channels
+			chanIndex := i % m.Audio.Channels
 			if samp > peaks[chanIndex][binNum] {
 				peaks[chanIndex][binNum] = samp
 			}
 		}
 	}

+	log.Println("finished generating peaks")
 	return peaks, nil
 }
--- a/backend/youtube/youtube.go
+++ b/backend/youtube/youtube.go
@ -2,14 +2,18 @@ package youtube

 import (
 	"bytes"
+	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"log"
+	"math"
 	"os"
 	"os/exec"
 	"strconv"
+	"sync"
+	"time"

 	"git.netflux.io/rob/clipper/media"

@ -22,12 +26,16 @@ const (
 	EncodedAudioCodec      = "pcm_s16le"
 	EncodedAudioFormat     = "s16le"
 	EncodedAudioSampleRate = 48000
+
+	thumbnailWidth  = 30
+	thumbnailHeight = 100
+	videoItag       = 18
 )

 // YoutubeClient wraps the youtube.Client client.
 type YoutubeClient interface {
-	GetVideo(string) (*youtubev2.Video, error)
-	GetStream(*youtubev2.Video, *youtubev2.Format) (io.ReadCloser, int64, error)
+	GetVideoContext(context.Context, string) (*youtubev2.Video, error)
+	GetStreamContext(context.Context, *youtubev2.Video, *youtubev2.Format) (io.ReadCloser, int64, error)
 }

 // Downloader downloads a set of Youtube media for a given video ID, including
@ -41,85 +49,67 @@ func NewDownloader(youtubeClient YoutubeClient) *Downloader {
 	return &Downloader{youtubeClient: youtubeClient}
 }

+type audioResult struct {
+	*media.Audio
+	err error
+}
+
+// videoMediaSet represents the video part of a media.MediaSet:
+type videoMediaSet struct {
+	bytes int64
+}
+
+type videoResult struct {
+	*media.Video
+	err error
+}
+
 // Download downloads the relevant audio and video files for the provided
 // Youtube video ID. If successful, a *media.MediaSet struct containing
 // metadata about the downloaded items is returned.
-func (d *Downloader) Download(videoID string) (*media.MediaSet, error) {
-	video, err := d.youtubeClient.GetVideo(videoID)
+func (d *Downloader) Download(ctx context.Context, videoID string) (*media.MediaSet, error) {
+	var video *youtubev2.Video
+	video, err := d.youtubeClient.GetVideoContext(ctx, videoID)
 	if err != nil {
 		return nil, fmt.Errorf("error fetching video: %v", err)
 	}

-	// TODO: improve selection of audio and video format.
-	// Perhaps download both separately?
-	var format *youtubev2.Format
-	for i := range video.Formats {
-		candidate := video.Formats[i]
-		if candidate.FPS == 0 || candidate.AudioChannels == 0 {
-			continue
-		}
-		if format == nil || (candidate.ContentLength > 0 && candidate.ContentLength < format.ContentLength) {
-			format = &candidate
-		}
-	}
-	if format == nil {
-		return nil, errors.New("error selecting format: no format available")
-	}
-	log.Printf("selected format: %+v", format)
-
-	stream, _, err := d.youtubeClient.GetStream(video, format)
-	if err != nil {
-		return nil, fmt.Errorf("error fetching stream: %v", err)
-	}
-
 	mediaSet := media.MediaSet{ID: videoID, Source: "youtube"}

-	rawAudioFile, err := os.Create(mediaSet.RawAudioPath())
-	if err != nil {
-		return nil, fmt.Errorf("error creating raw audio file: %v", err)
+	audioResultChan := make(chan audioResult, 1)
+	videoResultChan := make(chan videoResult, 1)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+	go func() {
+		defer close(audioResultChan)
+		audio, audioErr := d.downloadAudio(ctx, video, mediaSet.EncodedAudioPath(), mediaSet.RawAudioPath())
+		result := audioResult{audio, audioErr}
+		audioResultChan <- result
+		wg.Done()
+	}()
+	go func() {
+		defer close(videoResultChan)
+		video, videoErr := d.downloadVideo(ctx, video, mediaSet.VideoPath(), mediaSet.ThumbnailPath())
+		result := videoResult{video, videoErr}
+		videoResultChan <- result
+		wg.Done()
+	}()
+
+	wg.Wait()
+
+	audioResult := <-audioResultChan
+	videoResult := <-videoResultChan
+
+	if err = audioResult.err; err != nil {
+		return nil, fmt.Errorf("error downloading audio: %v", err)
+	}
+	if err = videoResult.err; err != nil {
+		return nil, fmt.Errorf("error downloading video: %v", err)
 	}

-	encodedAudioFile, err := os.Create(mediaSet.EncodedAudioPath())
-	if err != nil {
-		return nil, fmt.Errorf("error creating encoded audio file: %v", err)
-	}
-	streamReader := io.TeeReader(stream, encodedAudioFile)
-
-	var errOut bytes.Buffer
-	cmd := exec.Command("ffmpeg", "-i", "-", "-f", EncodedAudioFormat, "-ar", strconv.Itoa(EncodedAudioSampleRate), "-acodec", EncodedAudioCodec, "-")
-	cmd.Stdin = streamReader
-	cmd.Stdout = rawAudioFile
-	cmd.Stderr = &errOut
-
-	if err = cmd.Run(); err != nil {
-		log.Println(errOut.String())
-		return nil, fmt.Errorf("error processing audio: %v", err)
-	}
-
-	if err = rawAudioFile.Close(); err != nil {
-		return nil, fmt.Errorf("error writing raw audio file: %v", err)
-	}
-
-	rawAudioFile, err = os.Open(mediaSet.RawAudioPath())
-	if err != nil {
-		return nil, fmt.Errorf("error reading file: %v", err)
-	}
-
-	fi, err := rawAudioFile.Stat()
-	if err != nil {
-		return nil, fmt.Errorf("error reading file info: %v", err)
-	}
-
-	numFrames := fi.Size() / int64(SizeOfInt16) / int64(format.AudioChannels)
-	sampleRate, err := strconv.Atoi(format.AudioSampleRate)
-	if err != nil {
-		return nil, fmt.Errorf("invalid samplerate: %s", format.AudioSampleRate)
-	}
-
-	mediaSet.Bytes = fi.Size()
-	mediaSet.Channels = format.AudioChannels
-	mediaSet.Frames = numFrames
-	mediaSet.SampleRate = sampleRate
+	mediaSet.Audio = *audioResult.Audio
+	mediaSet.Video = *videoResult.Video

 	metadataFile, err := os.Create(mediaSet.MetadataPath())
 	if err != nil {
@ -134,5 +124,126 @@ func (d *Downloader) Download(videoID string) (*media.MediaSet, error) {
 		return nil, fmt.Errorf("error writing metadata file: %v", err)
 	}

+	log.Println("finished downloading mediaset")
+
 	return &mediaSet, nil
 }
+
+func (d *Downloader) downloadAudio(ctx context.Context, video *youtubev2.Video, outPath, rawOutPath string) (*media.Audio, error) {
+	var format *youtubev2.Format
+	for _, candidate := range video.Formats.WithAudioChannels() {
+		if format == nil || (candidate.ContentLength > 0 && candidate.ContentLength < format.ContentLength) {
+			candidate := candidate
+			format = &candidate
+		}
+	}
+	if format == nil {
+		return nil, errors.New("error selecting audio format: no format available")
+	}
+	log.Printf("selected audio format: %+v", format)
+
+	stream, _, err := d.youtubeClient.GetStreamContext(ctx, video, format)
+	if err != nil {
+		return nil, fmt.Errorf("error fetching audio stream: %v", err)
+	}
+
+	rawAudioFile, err := os.Create(rawOutPath)
+	if err != nil {
+		return nil, fmt.Errorf("error creating raw audio file: %v", err)
+	}
+
+	encodedAudioFile, err := os.Create(outPath)
+	if err != nil {
+		return nil, fmt.Errorf("error creating encoded audio file: %v", err)
+	}
+	streamReader := io.TeeReader(stream, encodedAudioFile)
+
+	var errOut bytes.Buffer
+	cmd := exec.CommandContext(ctx, "ffmpeg", "-i", "-", "-f", EncodedAudioFormat, "-ar", strconv.Itoa(EncodedAudioSampleRate), "-acodec", EncodedAudioCodec, "-")
+	cmd.Stdin = streamReader
+	cmd.Stdout = rawAudioFile
+	cmd.Stderr = &errOut
+
+	if err = cmd.Run(); err != nil {
+		log.Println(errOut.String())
+		return nil, fmt.Errorf("error processing audio: %v", err)
+	}
+
+	if err = rawAudioFile.Close(); err != nil {
+		return nil, fmt.Errorf("error writing raw audio file: %v", err)
+	}
+
+	rawAudioFile, err = os.Open(rawOutPath)
+	if err != nil {
+		return nil, fmt.Errorf("error opening raw audio file: %v", err)
+	}
+
+	fi, err := rawAudioFile.Stat()
+	if err != nil {
+		return nil, fmt.Errorf("error reading raw audio file info: %v", err)
+	}
+
+	numFrames := fi.Size() / int64(SizeOfInt16) / int64(format.AudioChannels)
+	sampleRate, err := strconv.Atoi(format.AudioSampleRate)
+	if err != nil {
+		return nil, fmt.Errorf("invalid samplerate: %s", format.AudioSampleRate)
+	}
+
+	return &media.Audio{
+		Bytes:      fi.Size(),
+		Channels:   format.AudioChannels,
+		Frames:     numFrames,
+		SampleRate: sampleRate,
+	}, nil
+}
+
+func thumbnailGridSize(seconds int) (int, int) {
+	x := int(math.Floor(math.Sqrt(float64(seconds))))
+	if x*x < seconds {
+		return x + 1, x
+	}
+	return x, x
+}
+
+func (d *Downloader) downloadVideo(ctx context.Context, video *youtubev2.Video, outPath, thumbnailOutPath string) (*media.Video, error) {
+	// TODO: check if iTag 18 always exists, and works in a good variety of browsers.
+	format := video.Formats.FindByItag(videoItag)
+	log.Printf("selected video format: %+v", format)
+
+	stream, _, err := d.youtubeClient.GetStreamContext(ctx, video, format)
+	if err != nil {
+		return nil, fmt.Errorf("error fetching video stream: %v", err)
+	}
+
+	videoFile, err := os.Create(outPath)
+	if err != nil {
+		return nil, fmt.Errorf("error creating video file: %v", err)
+	}
+
+	streamReader := io.TeeReader(stream, videoFile)
+	durationMsecs, err := strconv.Atoi(format.ApproxDurationMs)
+	if err != nil {
+		return nil, fmt.Errorf("could not parse video duration: %s", err)
+	}
+	durationSecs := durationMsecs / 1000
+	gridSizeX, gridSizeY := thumbnailGridSize(durationSecs)
+
+	var errOut bytes.Buffer
+	cmd := exec.CommandContext(ctx, "ffmpeg", "-i", "-", "-vf", fmt.Sprintf("fps=1,scale=-1:110,crop=%d:%d,tile=%dx%d", thumbnailWidth, thumbnailHeight, gridSizeX, gridSizeY), "-f", "image2pipe", "-vsync", "0", thumbnailOutPath)
+	cmd.Stdin = streamReader
+	cmd.Stderr = &errOut
+
+	if err = cmd.Run(); err != nil {
+		log.Println(errOut.String())
+		return nil, fmt.Errorf("error processing video: %v", err)
+	}
+
+	duration := time.Duration(durationMsecs) * time.Millisecond
+
+	return &media.Video{
+		Bytes:           format.ContentLength,
+		ThumbnailWidth:  thumbnailWidth,
+		ThumbnailHeight: thumbnailHeight,
+		Duration:        duration,
+	}, nil
+}
--- a/frontend/src/Waveform.tsx
+++ b/frontend/src/Waveform.tsx
@ -1,5 +1,6 @@
 import { useEffect, useState, useRef, MouseEvent } from 'react';
 import { Waveform as WaveformOverview } from './Waveform/Overview';
+import { Thumbnails } from './Waveform/Thumbnails';
 import { Canvas as WaveformCanvas } from './Waveform/Canvas';
 import {
  secsToCanvasX,
@ -11,13 +12,30 @@ type Props = {
  audioContext: AudioContext;
 };

-export type AudioFile = {
+// Audio corresponds to media.Audio.
+export type Audio = {
  bytes: number;
  channels: number;
  frames: number;
  sampleRate: number;
 };

+// Video corresponds to media.Video.
+export type Video = {
+  bytes: number;
+  thumbnailWidth: number;
+  thumbnailHeight: number;
+  durationMillis: number;
+};
+
+// MediaSet corresponds to media.MediaSet.
+export type MediaSet = {
+  id: string;
+  source: string;
+  audio: Audio;
+  video: Video;
+};
+
 export type Selection = {
  x1: number;
  x2: number;
@ -34,14 +52,15 @@ export const CanvasLogicalWidth = 2000;
 export const CanvasLogicalHeight = 500;

 export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
-  const [audioFile, setAudioFile] = useState<AudioFile | null>(null);
+  const [mediaSet, setMediaSet] = useState<MediaSet | null>(null);
  const [currentTime, setCurrentTime] = useState(0);
-  // TODO: fix linter error
+  // TODO: extract to player component.
  const [audio, setAudio] = useState(new Audio());
  const [zoomSettings, setZoomSettings] = useState(defaultZoomSettings);
  const [waveformPeaks, setWaveformPeaks] = useState(null);
  const [overviewPeaks, setOverviewPeaks] = useState(null);
  const hudCanvasRef = useRef<HTMLCanvasElement>(null);
+  const videoRef = useRef<HTMLVideoElement>(null);

  // TODO: error handling
  const videoID = new URLSearchParams(window.location.search).get('video_id');
@ -51,60 +70,88 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
  // setup player on page load:
  useEffect(() => {
    (async function () {
-      audio.addEventListener('timeupdate', () => {
-        setCurrentTime(audio.currentTime);
+      const video = videoRef.current;
+      if (video == null) {
+        return;
+      }
+
+      video.addEventListener('timeupdate', () => {
+        setCurrentTime(video.currentTime);
      });
    })();
-  }, [audio]);
+  });

-  // fetch audio data on page load:
+  // fetch mediaset on page load:
  useEffect(() => {
    (async function () {
-      console.log('fetching audio data...');
+      console.log('fetching media...');

      const resp = await fetch(
-        `http://localhost:8888/api/download?video_id=${videoID}`
+        `http://localhost:8888/api/media_sets?video_id=${videoID}`
      );
      const respBody = await resp.json();

      if (respBody.error) {
-        console.log('error fetching audio data:', respBody.error);
+        console.log('error fetching media set:', respBody.error);
        return;
      }

-      // TODO: safer deserialization?
-      const audioFile: AudioFile = {
-        bytes: respBody.bytes,
-        channels: respBody.channels,
-        frames: respBody.frames,
-        sampleRate: respBody.sample_rate,
+      const mediaSet: MediaSet = {
+        id: respBody.id,
+        source: respBody.source,
+        audio: {
+          sampleRate: respBody.audio.sample_rate,
+          bytes: respBody.audio.bytes,
+          frames: respBody.audio.frames,
+          channels: respBody.audio.channels,
+        },
+        video: {
+          bytes: respBody.video.bytes,
+          thumbnailWidth: respBody.video.thumbnail_width,
+          thumbnailHeight: respBody.video.thumbnail_height,
+          durationMillis: Math.floor(respBody.video.duration / 1000 / 1000),
+        },
      };

-      setAudioFile(audioFile);
-      setZoomSettings({ startFrame: 0, endFrame: audioFile.frames });
+      setMediaSet(mediaSet);
+      setZoomSettings({ startFrame: 0, endFrame: mediaSet.audio.frames });
    })();
  }, [audioContext]);

-  // render overview waveform to canvas when the audio file is loaded:
+  // load video when MediaSet is loaded:
+  useEffect(() => {
+    if (mediaSet == null) {
+      return;
+    }
+
+    const video = videoRef.current;
+    if (video == null) {
+      return;
+    }
+
+    const url = `http://localhost:8888/api/video?video_id=${videoID}`;
+    video.src = url;
+    video.muted = false;
+    video.volume = 1;
+    video.controls = true;
+  }, [mediaSet]);

  // fetch new waveform peaks when zoom settings are updated:
  useEffect(() => {
    (async function () {
-      if (audioFile == null) {
+      if (mediaSet == null) {
        return;
      }

      let endFrame = zoomSettings.endFrame;
      if (endFrame <= zoomSettings.startFrame) {
-        endFrame = audioFile.frames;
+        endFrame = mediaSet.audio.frames;
      }

      const resp = await fetch(
        `http://localhost:8888/api/peaks?video_id=${videoID}&start=${zoomSettings.startFrame}&end=${endFrame}&bins=${CanvasLogicalWidth}`
      );
      const peaks = await resp.json();
-      console.log('respBody from peaks =', peaks);
-
      setWaveformPeaks(peaks);

      if (overviewPeaks == null) {
@ -118,7 +165,6 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
    (async function () {
      const canvas = hudCanvasRef.current;
      if (canvas == null) {
-        console.error('no hud canvas ref available');
        return;
      }

@ -130,14 +176,14 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {

      ctx.clearRect(0, 0, canvas.width, canvas.height);

-      if (audioFile == null) {
+      if (mediaSet == null) {
        return;
      }

      const x = secsToCanvasX(
        currentTime,
-        audioFile.sampleRate,
-        audioFile.frames
+        mediaSet.audio.sampleRate,
+        mediaSet.audio.frames
      );

      ctx.strokeStyle = 'red';
@ -148,10 +194,16 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
    })();
  }, [currentTime]);

+  // end of hook configuration.
+  // TODO: render loading page here.
+  if (mediaSet == null) {
+    return null;
+  }
+
  // callbacks

  const handleMouseMove = (evt: MouseEvent<HTMLCanvasElement>) => {
-    if (audioFile == null) {
+    if (mediaSet == null) {
      return;
    }
    const canvasX = mouseEventToCanvasX(evt);
@ -159,7 +211,7 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
      'mousemove, x =',
      canvasX,
      'frame =',
-      canvasXToFrame(canvasX, numFrames)
+      canvasXToFrame(canvasX, mediaSet.audio.frames)
    );
  };

@ -172,19 +224,25 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
  };

  const handlePlay = async () => {
-    const url = `http://localhost:8888/api/audio?video_id=${videoID}`;
-    audio.src = url;
-    await audio.play();
-    console.log('playing audio from', url);
+    const video = videoRef.current;
+    if (video == null) {
+      return;
+    }
+    await video.play();
  };

  const handlePause = () => {
-    audio.pause();
-    console.log('paused audio');
+    const video = videoRef.current;
+    if (video == null) {
+      return;
+    }
+
+    video.pause();
+    console.log('paused video');
  };

  const handleZoomIn = () => {
-    if (audioFile == null) {
+    if (mediaSet == null) {
      return;
    }
    console.log('zoom in');
@ -195,7 +253,7 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
  };

  const handleZoomOut = () => {
-    if (audioFile == null) {
+    if (mediaSet == null) {
      return;
    }
    console.log('zoom out');
@ -203,19 +261,19 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
    const newDiff = diff * 2;
    const endFrame = Math.min(
      zoomSettings.endFrame + newDiff,
-      audioFile.frames
+      mediaSet.audio.frames
    );
    const settings = { ...zoomSettings, endFrame: endFrame };
    setZoomSettings(settings);
  };

  const handleSelectionChange = (selection: Selection) => {
-    if (audioFile == null) {
+    if (mediaSet == null) {
      return;
    }
    const settings: ZoomSettings = {
-      startFrame: canvasXToFrame(selection.x1, audioFile.frames),
-      endFrame: canvasXToFrame(selection.x2, audioFile.frames),
+      startFrame: canvasXToFrame(selection.x1, mediaSet.audio.frames),
+      endFrame: canvasXToFrame(selection.x2, mediaSet.audio.frames),
    };
    setZoomSettings(settings);
  };
@ -224,7 +282,7 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {

  const wrapperProps = {
    width: '90%',
-    height: '350px',
+    height: '250px',
    position: 'relative',
    margin: '0 auto',
  } as React.CSSProperties;
@ -251,20 +309,34 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
    zIndex: 1,
  } as React.CSSProperties;

-  const overviewStyles = { ...wrapperProps, height: '90px' };
+  const overviewStyles = { ...wrapperProps, height: '60px' };

  // TODO: why is the margin needed?
  const controlPanelStyles = { margin: '1em' } as React.CSSProperties;
  const clockTextAreaProps = { color: '#999', width: '400px' };
-
-  let numFrames = 0;
-  if (audioFile != null) {
-    numFrames = audioFile.frames;
-  }
+  const videoStyles = {
+    width: '30%',
+    height: 'auto',
+    margin: '10px auto 0 auto',
+    zIndex: 2,
+  } as React.CSSProperties;
+  const thumbnailStyles = {
+    width: '90%',
+    height: '35px',
+    margin: '10px auto 0 auto',
+    display: 'block',
+  };

  return (
    <>
-      <h1>clipper</h1>
+      <video ref={videoRef} style={videoStyles}></video>
+      <Thumbnails mediaSet={mediaSet} style={thumbnailStyles} />
+      <WaveformOverview
+        peaks={overviewPeaks}
+        numFrames={mediaSet.audio.frames}
+        style={overviewStyles}
+        onSelectionChange={handleSelectionChange}
+      ></WaveformOverview>
      <div style={wrapperProps}>
        <WaveformCanvas
          peaks={waveformPeaks}
@ -282,12 +354,6 @@ export const Waveform: React.FC<Props> = ({ audioContext }: Props) => {
          height={CanvasLogicalHeight}
        ></canvas>
      </div>
-      <WaveformOverview
-        peaks={overviewPeaks}
-        numFrames={numFrames}
-        style={overviewStyles}
-        onSelectionChange={handleSelectionChange}
-      ></WaveformOverview>
      <div style={controlPanelStyles}>
        <button onClick={handlePlay}>Play</button>
        <button onClick={handlePause}>Pause</button>
--- a/frontend/src/Waveform/Helpers.tsx
+++ b/frontend/src/Waveform/Helpers.tsx
@ -16,6 +16,12 @@ export const canvasXToFrame = (x: number, numFrames: number): number => {
  return Math.floor((x / CanvasLogicalWidth) * numFrames);
 };

+// // TODO: add tests
+// export const canvasXToSecs = (x: number, numFrames: number): number => {
+//   const frame := canvasXToFrame(x, numFrames)
+
+// }
+
 // TODO: add tests
 export const secsToCanvasX = (
  secs: number,
--- a/frontend/src/Waveform/Thumbnails.tsx
+++ b/frontend/src/Waveform/Thumbnails.tsx
@ -0,0 +1,102 @@
+import { useState, useEffect, useRef } from 'react';
+import { CanvasLogicalWidth, CanvasLogicalHeight, MediaSet } from '../Waveform';
+
+interface Props {
+  mediaSet: MediaSet;
+  style: React.CSSProperties;
+}
+
+enum State {
+  Loading,
+  Ready,
+  Error,
+}
+
+export const Thumbnails: React.FC<Props> = ({ mediaSet, style }: Props) => {
+  const [image, setImage] = useState(new Image());
+  const [state, setState] = useState(State.Loading);
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+
+  // load thumbnail image when available:
+  useEffect(() => {
+    if (mediaSet == null) return;
+
+    image.src = `http://localhost:8888/api/thumbnails?video_id=${mediaSet.id}`;
+    image.onload = () => {
+      setState(State.Ready);
+    };
+  }, []);
+
+  // render canvas if image has been loaded successfully:
+  useEffect(() => {
+    if (state != State.Ready) return;
+    if (mediaSet == null) return;
+
+    const canvas = canvasRef.current;
+    if (canvas == null) {
+      console.error('no canvas available');
+      return;
+    }
+
+    const ctx = canvas.getContext('2d');
+    if (ctx == null) {
+      console.error('no thumbnail 2d context available');
+      return;
+    }
+
+    const tw = mediaSet.video.thumbnailWidth;
+    const th = mediaSet.video.thumbnailHeight;
+    const iw = image.width;
+    const ih = image.height;
+    const { width: pw, height: ph } = canvas.getBoundingClientRect();
+
+    // set canvas logical width to suit the aspect ratio:
+    // TODO: confirm this is needed.
+    const ar = tw / th;
+    const par = pw / ph;
+    canvas.width = tw * (par / ar);
+
+    const durationSecs = mediaSet.video.durationMillis / 1000;
+
+    for (let dx = 0; dx < canvas.width; dx += tw) {
+      const secs = Math.floor((dx / canvas.width) * durationSecs);
+      const sx = (secs * tw) % iw;
+      const sy = Math.floor(secs / (iw / tw)) * th;
+      ctx.drawImage(image, sx, sy, tw, th, dx, 0, tw, th);
+    }
+  }, [state]);
+
+  // rendering
+
+  if (mediaSet == null || mediaSet.video == null) {
+    console.error('unexpected null video');
+    return null;
+  }
+
+  if (state == State.Loading) {
+    return (
+      <>
+        <div>Loading...</div>
+      </>
+    );
+  }
+
+  if (state == State.Error) {
+    return (
+      <>
+        <span>Something went wrong</span>
+      </>
+    );
+  }
+
+  return (
+    <>
+      <canvas
+        ref={canvasRef}
+        style={style}
+        width={CanvasLogicalWidth}
+        height={100}
+      ></canvas>
+    </>
+  );
+};