From 4e99f5ac0ca659f3e5c083d0613b012849844607 Mon Sep 17 00:00:00 2001
From: Rob Watson <rob@netflux.io>
Date: Sat, 11 Sep 2021 11:00:27 +0200
Subject: [PATCH] Calculate peaks, draw basic waveform to canvas

---
 backend/main.go           | 400 +++++++++++++++++++++++++++++---------
 frontend/src/Waveform.tsx | 223 ++++++++++++++++++---
 2 files changed, 508 insertions(+), 115 deletions(-)

diff --git a/backend/main.go b/backend/main.go
index 38367b8..ef19398 100644
--- a/backend/main.go
+++ b/backend/main.go
@@ -1,11 +1,17 @@
 package main
 
 import (
+	"bytes"
+	"encoding/binary"
+	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"log"
 	"net/http"
 	"os"
+	"os/exec"
+	"strconv"
 	"strings"
 	"time"
 
@@ -13,78 +19,202 @@ import (
 )
 
 const (
-	ContentTypeAudioM4A = "audio/m4a"
-	ItagM4AAudio        = 140
+	SizeOfInt16                = 2
+	ContentTypeAudioM4A        = "audio/m4a"
+	ContentTypeApplicationJSON = "application/json"
+	ItagM4AAudio               = 140
+
+	DefaultFormat       = "s16le"
+	DefaultAudioCodec   = "pcm_s16le"
+	DefaultSampleRate   = 48000
 	DefaultHTTPBindAddr = "0.0.0.0:8888"
-	DefaultTimeout      = 5 * time.Second
+	DefaultTimeout      = 30 * time.Second
 )
 
-type AudioDownloader struct {
-	videoID  string
-	ytClient youtube.Client
-	reader   io.Reader
+type AudioFileMetadata struct {
+	Bytes      int64 `json:"bytes"`
+	Channels   int   `json:"channels"`
+	Frames     int64 `json:"frames"`
+	SampleRate int   `json:"sample_rate"`
 }
 
-func NewAudioDownloader(videoID string) (*AudioDownloader, error) {
-	var (
-		reader   io.Reader
-		ytClient youtube.Client
-	)
-
-	cachePath := fmt.Sprintf("cache/%s.m4a", videoID)
-	fetch := true
-
-	if _, err := os.Stat(cachePath); err == nil {
-		if fptr, err := os.Open(cachePath); err == nil {
-			reader = fptr
-			fetch = false
-		} else {
-			log.Printf("warning: error opening cache file: %v", err)
-		}
-	}
-
-	if fetch {
-		log.Println("fetching audio stream from youtube...")
-
-		video, err := ytClient.GetVideo(videoID)
-		if err != nil {
-			return nil, fmt.Errorf("error fetching video: %v", err)
-		}
-
-		format := video.Formats.FindByItag(ItagM4AAudio)
-		log.Printf("M4A format expected to contain %d bytes", format.ContentLength)
-		stream, _, err := ytClient.GetStream(video, format)
-		if err != nil {
-			return nil, fmt.Errorf("error fetching stream: %v", err)
-		}
-
-		// TODO: only allow the cached file to be accessed after it has been
-		// successfully downloaded.
-		cacheFile, err := os.Create(cachePath)
-		if err != nil {
-			return nil, fmt.Errorf("error creating cache file: %v", err)
-		}
-		reader = io.TeeReader(stream, cacheFile)
-	}
-
-	return &AudioDownloader{
-		videoID:  videoID,
-		ytClient: ytClient,
-		reader:   reader,
-	}, nil
+type AudioFile struct {
+	AudioFileMetadata
+	videoID string
+	exists  bool
 }
 
-func (d *AudioDownloader) Read(p []byte) (int, error) {
-	return d.reader.Read(p)
+func NewAudioFile(videoID string) (*AudioFile, error) {
+	f := &AudioFile{videoID: videoID}
+
+	if f.Exists() {
+		metadataFile, err := os.Open(f.metadataPath())
+		if err != nil {
+			return nil, fmt.Errorf("error opening metadata file: %v", err)
+		}
+		defer func() { _ = metadataFile.Close() }()
+
+		if err := json.NewDecoder(metadataFile).Decode(f); err != nil {
+			return nil, fmt.Errorf("error decoding metadata: %v", err)
+		}
+
+		f.exists = true
+	}
+
+	return f, nil
 }
 
-func (d *AudioDownloader) Close() error {
-	if rc, ok := d.reader.(io.ReadCloser); ok {
-		return rc.Close()
+func (f *AudioFile) rawAudioPath() string     { return fmt.Sprintf("cache/%s.raw", f.videoID) }
+func (f *AudioFile) encodedAudioPath() string { return fmt.Sprintf("cache/%s.m4a", f.videoID) }
+func (f *AudioFile) metadataPath() string     { return fmt.Sprintf("cache/%s.json", f.videoID) }
+
+func (f *AudioFile) Exists() bool {
+	if f.exists {
+		return true
 	}
+	if _, err := os.Stat(f.metadataPath()); err == nil {
+		f.exists = true
+		return true
+	}
+	return false
+}
+
+func (f *AudioFile) Download() error {
+	log.Println("fetching audio stream from youtube...")
+	var ytClient youtube.Client
+
+	video, err := ytClient.GetVideo(f.videoID)
+	if err != nil {
+		return fmt.Errorf("error fetching video: %v", err)
+	}
+
+	var format *youtube.Format
+	for _, candidate := range video.Formats.WithAudioChannels() {
+		if format == nil || (candidate.ContentLength > 0 && candidate.ContentLength < format.ContentLength) {
+			format = &candidate
+		}
+	}
+	if format == nil {
+		return errors.New("error selecting format: no format available")
+	}
+
+	stream, _, err := ytClient.GetStream(video, format)
+	if err != nil {
+		return fmt.Errorf("error fetching stream: %v", err)
+	}
+
+	rawAudioFile, err := os.Create(f.rawAudioPath())
+	if err != nil {
+		return fmt.Errorf("error creating raw audio file: %v", err)
+	}
+
+	encodedAudioFile, err := os.Create(f.encodedAudioPath())
+	if err != nil {
+		return fmt.Errorf("error creating encoded audio file: %v", err)
+	}
+	streamReader := io.TeeReader(stream, encodedAudioFile)
+
+	var errOut bytes.Buffer
+	cmd := exec.Command("ffmpeg", "-i", "-", "-f", DefaultFormat, "-ar", strconv.Itoa(DefaultSampleRate), "-acodec", DefaultAudioCodec, "-")
+	cmd.Stdin = streamReader
+	cmd.Stdout = rawAudioFile
+	cmd.Stderr = &errOut
+
+	if err = cmd.Run(); err != nil {
+		log.Println(errOut.String())
+		return fmt.Errorf("error processing audio: %v", err)
+	}
+
+	if err = rawAudioFile.Close(); err != nil {
+		return fmt.Errorf("error writing file: %v", err)
+	}
+
+	rawAudioFile, err = os.Open(f.rawAudioPath())
+	if err != nil {
+		return fmt.Errorf("error reading file: %v", err)
+	}
+
+	fi, err := rawAudioFile.Stat()
+	if err != nil {
+		return fmt.Errorf("error reading file info: %v", err)
+	}
+
+	numFrames := fi.Size() / int64(SizeOfInt16) / int64(format.AudioChannels)
+	sampleRate, err := strconv.Atoi(format.AudioSampleRate)
+	if err != nil {
+		return fmt.Errorf("invalid samplerate: %s", format.AudioSampleRate)
+	}
+
+	f.AudioFileMetadata = AudioFileMetadata{
+		Bytes:      fi.Size(),
+		Channels:   format.AudioChannels,
+		Frames:     numFrames,
+		SampleRate: sampleRate,
+	}
+
+	metadataFile, err := os.Create(f.metadataPath())
+	if err != nil {
+		return fmt.Errorf("error opening metadata file: %v", err)
+	}
+
+	if err = json.NewEncoder(metadataFile).Encode(f.AudioFileMetadata); err != nil {
+		return fmt.Errorf("error encoding metadata: %v", err)
+	}
+
+	if err = metadataFile.Close(); err != nil {
+		return fmt.Errorf("error writing metadata file: %v", err)
+	}
+
 	return nil
 }
 
+func (f *AudioFile) Peaks(start, end int64, numBins int) ([][]int16, error) {
+	if !f.Exists() {
+		return nil, errors.New("cannot compute peaks for non-existent file")
+	}
+
+	var err error
+	fptr, err := os.Open(f.rawAudioPath())
+	if err != nil {
+		return nil, fmt.Errorf("audio open error: %v", err)
+	}
+	defer fptr.Close()
+
+	numChannels := f.Channels
+
+	startByte := start * int64(numChannels) * SizeOfInt16
+	if _, err = fptr.Seek(startByte, io.SeekStart); err != nil {
+		return nil, fmt.Errorf("audio seek error: %v", err)
+	}
+
+	numFrames := end - start
+	framesPerBin := numFrames / int64(numBins)
+
+	peaks := make([][]int16, numChannels)
+	for i := 0; i < numChannels; i++ {
+		peaks[i] = make([]int16, numBins)
+	}
+
+	samples := make([]int16, framesPerBin*int64(numChannels))
+
+	for binNum := 0; binNum < numBins; binNum++ {
+		if err := binary.Read(fptr, binary.LittleEndian, samples); err != nil {
+			return nil, fmt.Errorf("error reading samples: %v", err)
+		}
+		for i, samp := range samples {
+			if samp < 0 {
+				samp = -samp
+			}
+			chanIndex := i % numChannels
+			if samp > peaks[chanIndex][binNum] {
+				peaks[chanIndex][binNum] = samp
+			}
+		}
+	}
+
+	return peaks, nil
+}
+
 func handleRequest(w http.ResponseWriter, r *http.Request) {
 	if r.Method != http.MethodGet {
 		w.WriteHeader(http.StatusMethodNotAllowed)
@@ -92,39 +222,135 @@ func handleRequest(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	if !strings.HasPrefix(r.URL.Path, "/api/audio") {
-		w.WriteHeader(http.StatusNotFound)
-		w.Write([]byte("page not found"))
-		return
-	}
-
-	videoID := r.URL.Query().Get("video_id")
-	if videoID == "" {
-		w.WriteHeader(http.StatusBadRequest)
-		w.Write([]byte("no video ID provided"))
-		return
-	}
-
-	downloader, err := NewAudioDownloader(videoID)
-	if err != nil {
-		log.Printf("downloader error: %v", err)
-		w.WriteHeader(http.StatusInternalServerError)
-		w.Write([]byte("could not download video"))
-		return
-	}
-	defer downloader.Close()
-
-	w.Header().Add("Content-Type", ContentTypeAudioM4A)
+	w.Header().Add("Content-Type", ContentTypeApplicationJSON)
 	w.Header().Add("Access-Control-Allow-Origin", "*")
-	w.WriteHeader(http.StatusOK)
 
-	n, err := io.Copy(w, downloader)
-	if err != nil {
-		log.Printf("error writing response: %v", err)
+	if strings.HasPrefix(r.URL.Path, "/api/download") {
+		videoID := r.URL.Query().Get("video_id")
+		if videoID == "" {
+			w.WriteHeader(http.StatusBadRequest)
+			w.Write([]byte(`{"error": "no video ID provided"}`))
+			return
+		}
+
+		audioFile, err := NewAudioFile(videoID)
+		if err != nil {
+			log.Printf("error building audio file: %v", err)
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(`{"error": "could not download audio"}`))
+			return
+		}
+
+		if !audioFile.Exists() {
+			if err = audioFile.Download(); err != nil {
+				log.Printf("error downloading audio file: %v", err)
+				w.WriteHeader(http.StatusInternalServerError)
+				w.Write([]byte(`{"error": "could not download audio"}`))
+				return
+			}
+		}
+
+		w.WriteHeader(http.StatusOK)
+
+		err = json.NewEncoder(w).Encode(audioFile)
+		if err != nil {
+			log.Printf("error encoding audio file: %v", err)
+		}
+
 		return
 	}
 
-	log.Printf("wrote %d bytes for video ID %s", n, videoID)
+	if strings.HasPrefix(r.URL.Path, "/api/audio") {
+		log.Printf("got headers for audio request: %+v", r.Header)
+		videoID := r.URL.Query().Get("video_id")
+		if videoID == "" {
+			w.WriteHeader(http.StatusBadRequest)
+			w.Write([]byte(`{"error": "no video ID provided"}`))
+			return
+		}
+
+		audioFile, err := NewAudioFile(videoID)
+		if err != nil {
+			log.Printf("error building audio file: %v", err)
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(`{"error": "could not download audio"}`))
+			return
+		}
+		if !audioFile.Exists() {
+			if err = audioFile.Download(); err != nil {
+				log.Printf("error downloading audio file: %v", err)
+				w.WriteHeader(http.StatusInternalServerError)
+				w.Write([]byte(`{"error": "could not download audio"}`))
+				return
+			}
+		}
+
+		http.ServeFile(w, r, audioFile.encodedAudioPath())
+
+		return
+	}
+
+	if strings.HasPrefix(r.URL.Path, "/api/peaks") {
+		videoID := r.URL.Query().Get("video_id")
+		if videoID == "" {
+			w.WriteHeader(http.StatusBadRequest)
+			w.Write([]byte(`{"error": "no video ID provided"}`))
+			return
+		}
+
+		start, err := strconv.ParseInt(r.URL.Query().Get("start"), 0, 64)
+		if err != nil {
+			w.WriteHeader(http.StatusBadRequest)
+			w.Write([]byte(`{"error": "invalid start parameter provided"}`))
+			return
+		}
+		end, err := strconv.ParseInt(r.URL.Query().Get("end"), 0, 64)
+		if err != nil {
+			w.WriteHeader(http.StatusBadRequest)
+			w.Write([]byte(`{"error": "invalid end parameter provided"}`))
+			return
+		}
+		numBins, err := strconv.Atoi(r.URL.Query().Get("bins"))
+		if err != nil {
+			w.WriteHeader(http.StatusBadRequest)
+			w.Write([]byte(`{"error": "invalid bins parameter provided"}`))
+			return
+		}
+
+		audioFile, err := NewAudioFile(videoID)
+		if err != nil {
+			log.Printf("error building audio file: %v", err)
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(`{"error": "could not download audio"}`))
+			return
+		}
+
+		if !audioFile.Exists() {
+			log.Println("audio file does not exists, cannot compute peaks")
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(`{"error": "audio file not available"}`))
+			return
+		}
+
+		peaks, err := audioFile.Peaks(start, end, numBins)
+		if err != nil {
+			log.Printf("error generating peaks: %v", err)
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(`{"error": "could not generate peaks"}`))
+		}
+
+		w.WriteHeader(http.StatusOK)
+
+		err = json.NewEncoder(w).Encode(peaks)
+		if err != nil {
+			log.Printf("error encoding peaks: %v", err)
+		}
+
+		return
+	}
+
+	w.WriteHeader(http.StatusNotFound)
+	w.Write([]byte("page not found"))
 }
 
 func main() {
diff --git a/frontend/src/Waveform.tsx b/frontend/src/Waveform.tsx
index aebac03..c22c02d 100644
--- a/frontend/src/Waveform.tsx
+++ b/frontend/src/Waveform.tsx
@@ -1,61 +1,228 @@
-import { useEffect, useState, useRef } from "react";
+import { useEffect, useState, useRef, MouseEvent } from "react";
 
 type WaveformProps = {
   audioContext: AudioContext;
 };
 
+type AudioFile = {
+  bytes: number;
+  channels: number;
+  frames: number;
+  sampleRate: number;
+};
+
 export const Waveform: React.FC<WaveformProps> = ({ audioContext }: WaveformProps) => {
-  const [audioData, setAudioData] = useState<AudioBuffer | null>(null);
-  const canvasRef = useRef<HTMLCanvasElement>(null);
+  const [audioFile, setAudioFile] = useState<AudioFile | null>(null);
+  const [currentTime, setCurrentTime] = useState(0);
+  const [audio, setAudio] = useState(new Audio());
+
+  const waveformCanvasRef = useRef<HTMLCanvasElement>(null);
+  const hudCanvasRef = useRef<HTMLCanvasElement>(null);
+  const canvasLogicalWidth = 2000;
+  const canvasLogicalHeight = 500;
+  const videoID = new URLSearchParams(window.location.search).get("video_id")
+
+  // helpers
+
+  const mouseEventToCanvasX = (evt: MouseEvent<HTMLCanvasElement>): number => {
+    // TODO: use offsetX/offsetY?
+    const rect = evt.currentTarget.getBoundingClientRect();
+    const elementX = evt.clientX - rect.left;
+    const canvas = evt.target as HTMLCanvasElement;
+    return elementX * canvas.width / rect.width;
+  };
+
+  const canvasXToFrame = (x: number): number => {
+    if (audioFile == null) {
+      return 0;
+    }
+    return Math.floor((x / canvasLogicalWidth) * audioFile.frames);
+  }
+
+  const canvasXToSecs = (x: number): number => {
+    if (audioFile == null) {
+      return 0;
+    }
+    const duration = audioFile.frames / audioFile.sampleRate;
+    return (canvasXToFrame(x) / audioFile.frames) * duration;
+  }
+
+  const secsToCanvasX = (canvasWidth: number, secs: number): number => {
+    if (audioFile == null) {
+      return 0;
+    }
+    const duration = audioFile.frames / audioFile.sampleRate;
+    return Math.floor(canvasWidth * (secs / duration));
+  };
+
+  // effects
+  
+  // setup player on page load:
+  useEffect(() => {
+    (async function() {
+     audio.addEventListener("timeupdate", () => { setCurrentTime(audio.currentTime); });
+     })()
+  }, [audio]);
 
   // load audio data on page load:
   useEffect(() => {
     (async function() {
       console.log("fetching audio data...");
 
-      const videoID = "s_oJYdRlrv0";
+      const resp = await fetch(`http://localhost:8888/api/download?video_id=${videoID}`);
+      const respBody = await resp.json();
 
-      const resp = await fetch(`http://localhost:8888/api/audio?video_id=${videoID}`)
-      console.log("resp =", resp)
+      if (respBody.error) {
+        console.log("error fetching audio data:", respBody.error)
+        return;
+      }
 
-      const body = await resp.arrayBuffer();
-      console.log("body =", body)
+      // TODO: safer deserialization?
+      const audioFile: AudioFile = {
+        bytes: respBody.bytes,
+        channels: respBody.channels,
+        frames: respBody.frames,
+        sampleRate: respBody.sample_rate,
+      };
 
-      const data = await audioContext.decodeAudioData(body);
-
-      console.log("decodedAudio =", data, "len =", data.length);
-      setAudioData(data);
+      setAudioFile(audioFile);
     })();
   }, [audioContext]);
 
   // render waveform to canvas when audioData is updated:
   useEffect(() => {
-    const canvas = canvasRef.current;
-    if (canvas == null) {
-      console.error("no canvas ref available");
-      return
-    }
+    (async function() {
+     if (audioFile == null) {
+       return;
+     }
 
-    const ctx = canvas.getContext("2d");
-    if (ctx == null) {
+     console.log("audiofile is", audioFile);
+
+     const canvas = waveformCanvasRef.current;
+     if (canvas == null) {
+      console.error("no canvas ref available");
+      return;
+     }
+
+     const ctx = canvas.getContext("2d");
+     if (ctx == null) {
       console.error("no 2d context available");
       return;
-    }
+     }
 
-    ctx.fillStyle = 'black';
-    ctx.fillRect(0, 0, canvas.width, canvas.height);
+     ctx.strokeStyle = '#00aa00';
+     ctx.fillStyle = 'black';
+     ctx.fillRect(0, 0, canvas.width, canvas.height);
 
-    if (audioData == null) {
+     const resp = await fetch(`http://localhost:8888/api/peaks?video_id=${videoID}&start=0&end=${Math.round(audioFile.frames)}&bins=${canvas.width}`);
+     const peaks = await resp.json();
+     console.log("respBody from peaks =", peaks)
+
+     const numChannels = peaks.length;
+     const chanHeight = canvas.height / numChannels;
+     for (let c = 0; c < numChannels; c++) {
+       const yOffset = chanHeight * c;
+       for (let i = 0; i < peaks[c].length; i++) {
+         const val = peaks[c][i];
+         const height = Math.floor((val / 32768) * chanHeight);
+         const y1 = ((chanHeight - height)/2)+yOffset;
+         const y2 = y1 + height;
+         ctx.beginPath();
+         ctx.moveTo(i, y1)
+         ctx.lineTo(i, y2)
+         ctx.stroke()
+       }
+     }
+    })();
+  }, [audioFile]);
+
+  // redraw HUD
+  useEffect(() => {
+    (async function() {
+
+      const canvas = hudCanvasRef.current;
+      if (canvas == null) {
+        console.error("no hud canvas ref available");
+        return;
+      }
+
+      const ctx = canvas.getContext("2d");
+      if (ctx == null) {
+        console.error("no hud 2d context available");
+        return;
+      }
+
+      ctx.clearRect(0, 0, canvas.width, canvas.height);
+
+      const x = secsToCanvasX(canvas.width, currentTime);
+
+      ctx.strokeStyle = "red";
+      ctx.beginPath();
+      ctx.moveTo(x, 0);
+      ctx.lineTo(x, canvas.height);
+      ctx.stroke();
+
+    })()
+  }, [currentTime]);
+
+  // callbacks
+  
+  const handleMouseMove = (evt: MouseEvent<HTMLCanvasElement>) => {
+    const canvasX = mouseEventToCanvasX(evt);
+    console.log("mousemove, x =", canvasX, "frame =", canvasXToFrame(canvasX));
+  }
+
+  const handleMouseDown = (evt: MouseEvent<HTMLCanvasElement>) => {
+    if (audioFile == null) {
       return;
     }
 
-    console.log("rendering audio")
-  }, [audioData]);
+    const canvasX = mouseEventToCanvasX(evt);
+    audio.currentTime = canvasXToSecs(canvasX);
+    console.log("currentTime now", canvasXToSecs(canvasX));
+  };
+
+  const handleMouseUp = () => {
+    return null;
+  };
+
+  const handlePlay = async () => {
+    const url = `http://localhost:8888/api/audio?video_id=${videoID}`;
+    audio.src = url;
+    await audio.play();
+    console.log("playing audio from", url);
+  }
+
+  const handlePause = () => {
+    audio.pause(); 
+    console.log("paused audio")
+  }
+
+  const handleZoomIn = () => {
+    console.log("zoom in");
+  };
+
+  const handleZoomOut = () => {
+    console.log("zoom out");
+  };
 
   // render component:
 
-  console.log("rendering, audioData =", audioData);
+  const wrapperProps = {width: "90%", height: "500px", position: "relative", margin: "0 auto"} as React.CSSProperties;
+  const waveformCanvasProps = {width: "100%", position: "absolute", top: 0, left: 0, right: 0, bottom: 0, zIndex: 0} as React.CSSProperties;
+  const hudCanvasProps = {width: "100%", position: "absolute", top: 0, left: 0, right: 0, bottom: 0, zIndex: 1} as React.CSSProperties;
+  const clockTextAreaProps = {color: "#999", width: "400px"};
 
-  const canvasProps = {width: "100%", height: "500px"};
-  return <canvas ref={canvasRef} style={canvasProps}></canvas>
+  return <>
+    <h1>clipper</h1>
+    <div style={wrapperProps}>
+      <canvas ref={waveformCanvasRef} width={canvasLogicalWidth} height={canvasLogicalHeight} style={waveformCanvasProps}></canvas>
+      <canvas ref={hudCanvasRef} width={canvasLogicalWidth} height={canvasLogicalHeight} onMouseMove={handleMouseMove} onMouseDown={handleMouseDown} onMouseUp={handleMouseUp} style={hudCanvasProps}></canvas>
+    </div>
+    <button onClick={handlePlay}>Play</button>
+    <button onClick={handlePause}>Pause</button>
+    <button onClick={handleZoomIn}>+</button>
+    <button onClick={handleZoomOut}>-</button>
+    <input type="readonly" style={clockTextAreaProps}/>
+  </>
 }