From 4e99f5ac0ca659f3e5c083d0613b012849844607 Mon Sep 17 00:00:00 2001 From: Rob Watson Date: Sat, 11 Sep 2021 11:00:27 +0200 Subject: [PATCH] Calculate peaks, draw basic waveform to canvas --- backend/main.go | 400 +++++++++++++++++++++++++++++--------- frontend/src/Waveform.tsx | 223 ++++++++++++++++++--- 2 files changed, 508 insertions(+), 115 deletions(-) diff --git a/backend/main.go b/backend/main.go index 38367b8..ef19398 100644 --- a/backend/main.go +++ b/backend/main.go @@ -1,11 +1,17 @@ package main import ( + "bytes" + "encoding/binary" + "encoding/json" + "errors" "fmt" "io" "log" "net/http" "os" + "os/exec" + "strconv" "strings" "time" @@ -13,78 +19,202 @@ import ( ) const ( - ContentTypeAudioM4A = "audio/m4a" - ItagM4AAudio = 140 + SizeOfInt16 = 2 + ContentTypeAudioM4A = "audio/m4a" + ContentTypeApplicationJSON = "application/json" + ItagM4AAudio = 140 + + DefaultFormat = "s16le" + DefaultAudioCodec = "pcm_s16le" + DefaultSampleRate = 48000 DefaultHTTPBindAddr = "0.0.0.0:8888" - DefaultTimeout = 5 * time.Second + DefaultTimeout = 30 * time.Second ) -type AudioDownloader struct { - videoID string - ytClient youtube.Client - reader io.Reader +type AudioFileMetadata struct { + Bytes int64 `json:"bytes"` + Channels int `json:"channels"` + Frames int64 `json:"frames"` + SampleRate int `json:"sample_rate"` } -func NewAudioDownloader(videoID string) (*AudioDownloader, error) { - var ( - reader io.Reader - ytClient youtube.Client - ) - - cachePath := fmt.Sprintf("cache/%s.m4a", videoID) - fetch := true - - if _, err := os.Stat(cachePath); err == nil { - if fptr, err := os.Open(cachePath); err == nil { - reader = fptr - fetch = false - } else { - log.Printf("warning: error opening cache file: %v", err) - } - } - - if fetch { - log.Println("fetching audio stream from youtube...") - - video, err := ytClient.GetVideo(videoID) - if err != nil { - return nil, fmt.Errorf("error fetching video: %v", err) - } - - format := video.Formats.FindByItag(ItagM4AAudio) - log.Printf("M4A format expected to contain %d bytes", format.ContentLength) - stream, _, err := ytClient.GetStream(video, format) - if err != nil { - return nil, fmt.Errorf("error fetching stream: %v", err) - } - - // TODO: only allow the cached file to be accessed after it has been - // successfully downloaded. - cacheFile, err := os.Create(cachePath) - if err != nil { - return nil, fmt.Errorf("error creating cache file: %v", err) - } - reader = io.TeeReader(stream, cacheFile) - } - - return &AudioDownloader{ - videoID: videoID, - ytClient: ytClient, - reader: reader, - }, nil +type AudioFile struct { + AudioFileMetadata + videoID string + exists bool } -func (d *AudioDownloader) Read(p []byte) (int, error) { - return d.reader.Read(p) +func NewAudioFile(videoID string) (*AudioFile, error) { + f := &AudioFile{videoID: videoID} + + if f.Exists() { + metadataFile, err := os.Open(f.metadataPath()) + if err != nil { + return nil, fmt.Errorf("error opening metadata file: %v", err) + } + defer func() { _ = metadataFile.Close() }() + + if err := json.NewDecoder(metadataFile).Decode(f); err != nil { + return nil, fmt.Errorf("error decoding metadata: %v", err) + } + + f.exists = true + } + + return f, nil } -func (d *AudioDownloader) Close() error { - if rc, ok := d.reader.(io.ReadCloser); ok { - return rc.Close() +func (f *AudioFile) rawAudioPath() string { return fmt.Sprintf("cache/%s.raw", f.videoID) } +func (f *AudioFile) encodedAudioPath() string { return fmt.Sprintf("cache/%s.m4a", f.videoID) } +func (f *AudioFile) metadataPath() string { return fmt.Sprintf("cache/%s.json", f.videoID) } + +func (f *AudioFile) Exists() bool { + if f.exists { + return true } + if _, err := os.Stat(f.metadataPath()); err == nil { + f.exists = true + return true + } + return false +} + +func (f *AudioFile) Download() error { + log.Println("fetching audio stream from youtube...") + var ytClient youtube.Client + + video, err := ytClient.GetVideo(f.videoID) + if err != nil { + return fmt.Errorf("error fetching video: %v", err) + } + + var format *youtube.Format + for _, candidate := range video.Formats.WithAudioChannels() { + if format == nil || (candidate.ContentLength > 0 && candidate.ContentLength < format.ContentLength) { + format = &candidate + } + } + if format == nil { + return errors.New("error selecting format: no format available") + } + + stream, _, err := ytClient.GetStream(video, format) + if err != nil { + return fmt.Errorf("error fetching stream: %v", err) + } + + rawAudioFile, err := os.Create(f.rawAudioPath()) + if err != nil { + return fmt.Errorf("error creating raw audio file: %v", err) + } + + encodedAudioFile, err := os.Create(f.encodedAudioPath()) + if err != nil { + return fmt.Errorf("error creating encoded audio file: %v", err) + } + streamReader := io.TeeReader(stream, encodedAudioFile) + + var errOut bytes.Buffer + cmd := exec.Command("ffmpeg", "-i", "-", "-f", DefaultFormat, "-ar", strconv.Itoa(DefaultSampleRate), "-acodec", DefaultAudioCodec, "-") + cmd.Stdin = streamReader + cmd.Stdout = rawAudioFile + cmd.Stderr = &errOut + + if err = cmd.Run(); err != nil { + log.Println(errOut.String()) + return fmt.Errorf("error processing audio: %v", err) + } + + if err = rawAudioFile.Close(); err != nil { + return fmt.Errorf("error writing file: %v", err) + } + + rawAudioFile, err = os.Open(f.rawAudioPath()) + if err != nil { + return fmt.Errorf("error reading file: %v", err) + } + + fi, err := rawAudioFile.Stat() + if err != nil { + return fmt.Errorf("error reading file info: %v", err) + } + + numFrames := fi.Size() / int64(SizeOfInt16) / int64(format.AudioChannels) + sampleRate, err := strconv.Atoi(format.AudioSampleRate) + if err != nil { + return fmt.Errorf("invalid samplerate: %s", format.AudioSampleRate) + } + + f.AudioFileMetadata = AudioFileMetadata{ + Bytes: fi.Size(), + Channels: format.AudioChannels, + Frames: numFrames, + SampleRate: sampleRate, + } + + metadataFile, err := os.Create(f.metadataPath()) + if err != nil { + return fmt.Errorf("error opening metadata file: %v", err) + } + + if err = json.NewEncoder(metadataFile).Encode(f.AudioFileMetadata); err != nil { + return fmt.Errorf("error encoding metadata: %v", err) + } + + if err = metadataFile.Close(); err != nil { + return fmt.Errorf("error writing metadata file: %v", err) + } + return nil } +func (f *AudioFile) Peaks(start, end int64, numBins int) ([][]int16, error) { + if !f.Exists() { + return nil, errors.New("cannot compute peaks for non-existent file") + } + + var err error + fptr, err := os.Open(f.rawAudioPath()) + if err != nil { + return nil, fmt.Errorf("audio open error: %v", err) + } + defer fptr.Close() + + numChannels := f.Channels + + startByte := start * int64(numChannels) * SizeOfInt16 + if _, err = fptr.Seek(startByte, io.SeekStart); err != nil { + return nil, fmt.Errorf("audio seek error: %v", err) + } + + numFrames := end - start + framesPerBin := numFrames / int64(numBins) + + peaks := make([][]int16, numChannels) + for i := 0; i < numChannels; i++ { + peaks[i] = make([]int16, numBins) + } + + samples := make([]int16, framesPerBin*int64(numChannels)) + + for binNum := 0; binNum < numBins; binNum++ { + if err := binary.Read(fptr, binary.LittleEndian, samples); err != nil { + return nil, fmt.Errorf("error reading samples: %v", err) + } + for i, samp := range samples { + if samp < 0 { + samp = -samp + } + chanIndex := i % numChannels + if samp > peaks[chanIndex][binNum] { + peaks[chanIndex][binNum] = samp + } + } + } + + return peaks, nil +} + func handleRequest(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { w.WriteHeader(http.StatusMethodNotAllowed) @@ -92,39 +222,135 @@ func handleRequest(w http.ResponseWriter, r *http.Request) { return } - if !strings.HasPrefix(r.URL.Path, "/api/audio") { - w.WriteHeader(http.StatusNotFound) - w.Write([]byte("page not found")) - return - } - - videoID := r.URL.Query().Get("video_id") - if videoID == "" { - w.WriteHeader(http.StatusBadRequest) - w.Write([]byte("no video ID provided")) - return - } - - downloader, err := NewAudioDownloader(videoID) - if err != nil { - log.Printf("downloader error: %v", err) - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte("could not download video")) - return - } - defer downloader.Close() - - w.Header().Add("Content-Type", ContentTypeAudioM4A) + w.Header().Add("Content-Type", ContentTypeApplicationJSON) w.Header().Add("Access-Control-Allow-Origin", "*") - w.WriteHeader(http.StatusOK) - n, err := io.Copy(w, downloader) - if err != nil { - log.Printf("error writing response: %v", err) + if strings.HasPrefix(r.URL.Path, "/api/download") { + videoID := r.URL.Query().Get("video_id") + if videoID == "" { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"error": "no video ID provided"}`)) + return + } + + audioFile, err := NewAudioFile(videoID) + if err != nil { + log.Printf("error building audio file: %v", err) + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"error": "could not download audio"}`)) + return + } + + if !audioFile.Exists() { + if err = audioFile.Download(); err != nil { + log.Printf("error downloading audio file: %v", err) + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"error": "could not download audio"}`)) + return + } + } + + w.WriteHeader(http.StatusOK) + + err = json.NewEncoder(w).Encode(audioFile) + if err != nil { + log.Printf("error encoding audio file: %v", err) + } + return } - log.Printf("wrote %d bytes for video ID %s", n, videoID) + if strings.HasPrefix(r.URL.Path, "/api/audio") { + log.Printf("got headers for audio request: %+v", r.Header) + videoID := r.URL.Query().Get("video_id") + if videoID == "" { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"error": "no video ID provided"}`)) + return + } + + audioFile, err := NewAudioFile(videoID) + if err != nil { + log.Printf("error building audio file: %v", err) + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"error": "could not download audio"}`)) + return + } + if !audioFile.Exists() { + if err = audioFile.Download(); err != nil { + log.Printf("error downloading audio file: %v", err) + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"error": "could not download audio"}`)) + return + } + } + + http.ServeFile(w, r, audioFile.encodedAudioPath()) + + return + } + + if strings.HasPrefix(r.URL.Path, "/api/peaks") { + videoID := r.URL.Query().Get("video_id") + if videoID == "" { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"error": "no video ID provided"}`)) + return + } + + start, err := strconv.ParseInt(r.URL.Query().Get("start"), 0, 64) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"error": "invalid start parameter provided"}`)) + return + } + end, err := strconv.ParseInt(r.URL.Query().Get("end"), 0, 64) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"error": "invalid end parameter provided"}`)) + return + } + numBins, err := strconv.Atoi(r.URL.Query().Get("bins")) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"error": "invalid bins parameter provided"}`)) + return + } + + audioFile, err := NewAudioFile(videoID) + if err != nil { + log.Printf("error building audio file: %v", err) + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"error": "could not download audio"}`)) + return + } + + if !audioFile.Exists() { + log.Println("audio file does not exists, cannot compute peaks") + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"error": "audio file not available"}`)) + return + } + + peaks, err := audioFile.Peaks(start, end, numBins) + if err != nil { + log.Printf("error generating peaks: %v", err) + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"error": "could not generate peaks"}`)) + } + + w.WriteHeader(http.StatusOK) + + err = json.NewEncoder(w).Encode(peaks) + if err != nil { + log.Printf("error encoding peaks: %v", err) + } + + return + } + + w.WriteHeader(http.StatusNotFound) + w.Write([]byte("page not found")) } func main() { diff --git a/frontend/src/Waveform.tsx b/frontend/src/Waveform.tsx index aebac03..c22c02d 100644 --- a/frontend/src/Waveform.tsx +++ b/frontend/src/Waveform.tsx @@ -1,61 +1,228 @@ -import { useEffect, useState, useRef } from "react"; +import { useEffect, useState, useRef, MouseEvent } from "react"; type WaveformProps = { audioContext: AudioContext; }; +type AudioFile = { + bytes: number; + channels: number; + frames: number; + sampleRate: number; +}; + export const Waveform: React.FC = ({ audioContext }: WaveformProps) => { - const [audioData, setAudioData] = useState(null); - const canvasRef = useRef(null); + const [audioFile, setAudioFile] = useState(null); + const [currentTime, setCurrentTime] = useState(0); + const [audio, setAudio] = useState(new Audio()); + + const waveformCanvasRef = useRef(null); + const hudCanvasRef = useRef(null); + const canvasLogicalWidth = 2000; + const canvasLogicalHeight = 500; + const videoID = new URLSearchParams(window.location.search).get("video_id") + + // helpers + + const mouseEventToCanvasX = (evt: MouseEvent): number => { + // TODO: use offsetX/offsetY? + const rect = evt.currentTarget.getBoundingClientRect(); + const elementX = evt.clientX - rect.left; + const canvas = evt.target as HTMLCanvasElement; + return elementX * canvas.width / rect.width; + }; + + const canvasXToFrame = (x: number): number => { + if (audioFile == null) { + return 0; + } + return Math.floor((x / canvasLogicalWidth) * audioFile.frames); + } + + const canvasXToSecs = (x: number): number => { + if (audioFile == null) { + return 0; + } + const duration = audioFile.frames / audioFile.sampleRate; + return (canvasXToFrame(x) / audioFile.frames) * duration; + } + + const secsToCanvasX = (canvasWidth: number, secs: number): number => { + if (audioFile == null) { + return 0; + } + const duration = audioFile.frames / audioFile.sampleRate; + return Math.floor(canvasWidth * (secs / duration)); + }; + + // effects + + // setup player on page load: + useEffect(() => { + (async function() { + audio.addEventListener("timeupdate", () => { setCurrentTime(audio.currentTime); }); + })() + }, [audio]); // load audio data on page load: useEffect(() => { (async function() { console.log("fetching audio data..."); - const videoID = "s_oJYdRlrv0"; + const resp = await fetch(`http://localhost:8888/api/download?video_id=${videoID}`); + const respBody = await resp.json(); - const resp = await fetch(`http://localhost:8888/api/audio?video_id=${videoID}`) - console.log("resp =", resp) + if (respBody.error) { + console.log("error fetching audio data:", respBody.error) + return; + } - const body = await resp.arrayBuffer(); - console.log("body =", body) + // TODO: safer deserialization? + const audioFile: AudioFile = { + bytes: respBody.bytes, + channels: respBody.channels, + frames: respBody.frames, + sampleRate: respBody.sample_rate, + }; - const data = await audioContext.decodeAudioData(body); - - console.log("decodedAudio =", data, "len =", data.length); - setAudioData(data); + setAudioFile(audioFile); })(); }, [audioContext]); // render waveform to canvas when audioData is updated: useEffect(() => { - const canvas = canvasRef.current; - if (canvas == null) { - console.error("no canvas ref available"); - return - } + (async function() { + if (audioFile == null) { + return; + } - const ctx = canvas.getContext("2d"); - if (ctx == null) { + console.log("audiofile is", audioFile); + + const canvas = waveformCanvasRef.current; + if (canvas == null) { + console.error("no canvas ref available"); + return; + } + + const ctx = canvas.getContext("2d"); + if (ctx == null) { console.error("no 2d context available"); return; - } + } - ctx.fillStyle = 'black'; - ctx.fillRect(0, 0, canvas.width, canvas.height); + ctx.strokeStyle = '#00aa00'; + ctx.fillStyle = 'black'; + ctx.fillRect(0, 0, canvas.width, canvas.height); - if (audioData == null) { + const resp = await fetch(`http://localhost:8888/api/peaks?video_id=${videoID}&start=0&end=${Math.round(audioFile.frames)}&bins=${canvas.width}`); + const peaks = await resp.json(); + console.log("respBody from peaks =", peaks) + + const numChannels = peaks.length; + const chanHeight = canvas.height / numChannels; + for (let c = 0; c < numChannels; c++) { + const yOffset = chanHeight * c; + for (let i = 0; i < peaks[c].length; i++) { + const val = peaks[c][i]; + const height = Math.floor((val / 32768) * chanHeight); + const y1 = ((chanHeight - height)/2)+yOffset; + const y2 = y1 + height; + ctx.beginPath(); + ctx.moveTo(i, y1) + ctx.lineTo(i, y2) + ctx.stroke() + } + } + })(); + }, [audioFile]); + + // redraw HUD + useEffect(() => { + (async function() { + + const canvas = hudCanvasRef.current; + if (canvas == null) { + console.error("no hud canvas ref available"); + return; + } + + const ctx = canvas.getContext("2d"); + if (ctx == null) { + console.error("no hud 2d context available"); + return; + } + + ctx.clearRect(0, 0, canvas.width, canvas.height); + + const x = secsToCanvasX(canvas.width, currentTime); + + ctx.strokeStyle = "red"; + ctx.beginPath(); + ctx.moveTo(x, 0); + ctx.lineTo(x, canvas.height); + ctx.stroke(); + + })() + }, [currentTime]); + + // callbacks + + const handleMouseMove = (evt: MouseEvent) => { + const canvasX = mouseEventToCanvasX(evt); + console.log("mousemove, x =", canvasX, "frame =", canvasXToFrame(canvasX)); + } + + const handleMouseDown = (evt: MouseEvent) => { + if (audioFile == null) { return; } - console.log("rendering audio") - }, [audioData]); + const canvasX = mouseEventToCanvasX(evt); + audio.currentTime = canvasXToSecs(canvasX); + console.log("currentTime now", canvasXToSecs(canvasX)); + }; + + const handleMouseUp = () => { + return null; + }; + + const handlePlay = async () => { + const url = `http://localhost:8888/api/audio?video_id=${videoID}`; + audio.src = url; + await audio.play(); + console.log("playing audio from", url); + } + + const handlePause = () => { + audio.pause(); + console.log("paused audio") + } + + const handleZoomIn = () => { + console.log("zoom in"); + }; + + const handleZoomOut = () => { + console.log("zoom out"); + }; // render component: - console.log("rendering, audioData =", audioData); + const wrapperProps = {width: "90%", height: "500px", position: "relative", margin: "0 auto"} as React.CSSProperties; + const waveformCanvasProps = {width: "100%", position: "absolute", top: 0, left: 0, right: 0, bottom: 0, zIndex: 0} as React.CSSProperties; + const hudCanvasProps = {width: "100%", position: "absolute", top: 0, left: 0, right: 0, bottom: 0, zIndex: 1} as React.CSSProperties; + const clockTextAreaProps = {color: "#999", width: "400px"}; - const canvasProps = {width: "100%", height: "500px"}; - return + return <> +

clipper

+
+ + +
+ + + + + + }