Calculate peaks, draw basic waveform to canvas

This commit is contained in:
Rob Watson 2021-09-11 11:00:27 +02:00
parent 6e987d93fc
commit 4e99f5ac0c
2 changed files with 508 additions and 115 deletions

View File

@ -1,11 +1,17 @@
package main
import (
"bytes"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
"time"
@ -13,78 +19,202 @@ import (
)
const (
ContentTypeAudioM4A = "audio/m4a"
ItagM4AAudio = 140
SizeOfInt16 = 2
ContentTypeAudioM4A = "audio/m4a"
ContentTypeApplicationJSON = "application/json"
ItagM4AAudio = 140
DefaultFormat = "s16le"
DefaultAudioCodec = "pcm_s16le"
DefaultSampleRate = 48000
DefaultHTTPBindAddr = "0.0.0.0:8888"
DefaultTimeout = 5 * time.Second
DefaultTimeout = 30 * time.Second
)
type AudioDownloader struct {
videoID string
ytClient youtube.Client
reader io.Reader
type AudioFileMetadata struct {
Bytes int64 `json:"bytes"`
Channels int `json:"channels"`
Frames int64 `json:"frames"`
SampleRate int `json:"sample_rate"`
}
func NewAudioDownloader(videoID string) (*AudioDownloader, error) {
var (
reader io.Reader
ytClient youtube.Client
)
cachePath := fmt.Sprintf("cache/%s.m4a", videoID)
fetch := true
if _, err := os.Stat(cachePath); err == nil {
if fptr, err := os.Open(cachePath); err == nil {
reader = fptr
fetch = false
} else {
log.Printf("warning: error opening cache file: %v", err)
}
}
if fetch {
log.Println("fetching audio stream from youtube...")
video, err := ytClient.GetVideo(videoID)
if err != nil {
return nil, fmt.Errorf("error fetching video: %v", err)
}
format := video.Formats.FindByItag(ItagM4AAudio)
log.Printf("M4A format expected to contain %d bytes", format.ContentLength)
stream, _, err := ytClient.GetStream(video, format)
if err != nil {
return nil, fmt.Errorf("error fetching stream: %v", err)
}
// TODO: only allow the cached file to be accessed after it has been
// successfully downloaded.
cacheFile, err := os.Create(cachePath)
if err != nil {
return nil, fmt.Errorf("error creating cache file: %v", err)
}
reader = io.TeeReader(stream, cacheFile)
}
return &AudioDownloader{
videoID: videoID,
ytClient: ytClient,
reader: reader,
}, nil
type AudioFile struct {
AudioFileMetadata
videoID string
exists bool
}
func (d *AudioDownloader) Read(p []byte) (int, error) {
return d.reader.Read(p)
func NewAudioFile(videoID string) (*AudioFile, error) {
f := &AudioFile{videoID: videoID}
if f.Exists() {
metadataFile, err := os.Open(f.metadataPath())
if err != nil {
return nil, fmt.Errorf("error opening metadata file: %v", err)
}
defer func() { _ = metadataFile.Close() }()
if err := json.NewDecoder(metadataFile).Decode(f); err != nil {
return nil, fmt.Errorf("error decoding metadata: %v", err)
}
f.exists = true
}
return f, nil
}
func (d *AudioDownloader) Close() error {
if rc, ok := d.reader.(io.ReadCloser); ok {
return rc.Close()
func (f *AudioFile) rawAudioPath() string { return fmt.Sprintf("cache/%s.raw", f.videoID) }
func (f *AudioFile) encodedAudioPath() string { return fmt.Sprintf("cache/%s.m4a", f.videoID) }
func (f *AudioFile) metadataPath() string { return fmt.Sprintf("cache/%s.json", f.videoID) }
func (f *AudioFile) Exists() bool {
if f.exists {
return true
}
if _, err := os.Stat(f.metadataPath()); err == nil {
f.exists = true
return true
}
return false
}
func (f *AudioFile) Download() error {
log.Println("fetching audio stream from youtube...")
var ytClient youtube.Client
video, err := ytClient.GetVideo(f.videoID)
if err != nil {
return fmt.Errorf("error fetching video: %v", err)
}
var format *youtube.Format
for _, candidate := range video.Formats.WithAudioChannels() {
if format == nil || (candidate.ContentLength > 0 && candidate.ContentLength < format.ContentLength) {
format = &candidate
}
}
if format == nil {
return errors.New("error selecting format: no format available")
}
stream, _, err := ytClient.GetStream(video, format)
if err != nil {
return fmt.Errorf("error fetching stream: %v", err)
}
rawAudioFile, err := os.Create(f.rawAudioPath())
if err != nil {
return fmt.Errorf("error creating raw audio file: %v", err)
}
encodedAudioFile, err := os.Create(f.encodedAudioPath())
if err != nil {
return fmt.Errorf("error creating encoded audio file: %v", err)
}
streamReader := io.TeeReader(stream, encodedAudioFile)
var errOut bytes.Buffer
cmd := exec.Command("ffmpeg", "-i", "-", "-f", DefaultFormat, "-ar", strconv.Itoa(DefaultSampleRate), "-acodec", DefaultAudioCodec, "-")
cmd.Stdin = streamReader
cmd.Stdout = rawAudioFile
cmd.Stderr = &errOut
if err = cmd.Run(); err != nil {
log.Println(errOut.String())
return fmt.Errorf("error processing audio: %v", err)
}
if err = rawAudioFile.Close(); err != nil {
return fmt.Errorf("error writing file: %v", err)
}
rawAudioFile, err = os.Open(f.rawAudioPath())
if err != nil {
return fmt.Errorf("error reading file: %v", err)
}
fi, err := rawAudioFile.Stat()
if err != nil {
return fmt.Errorf("error reading file info: %v", err)
}
numFrames := fi.Size() / int64(SizeOfInt16) / int64(format.AudioChannels)
sampleRate, err := strconv.Atoi(format.AudioSampleRate)
if err != nil {
return fmt.Errorf("invalid samplerate: %s", format.AudioSampleRate)
}
f.AudioFileMetadata = AudioFileMetadata{
Bytes: fi.Size(),
Channels: format.AudioChannels,
Frames: numFrames,
SampleRate: sampleRate,
}
metadataFile, err := os.Create(f.metadataPath())
if err != nil {
return fmt.Errorf("error opening metadata file: %v", err)
}
if err = json.NewEncoder(metadataFile).Encode(f.AudioFileMetadata); err != nil {
return fmt.Errorf("error encoding metadata: %v", err)
}
if err = metadataFile.Close(); err != nil {
return fmt.Errorf("error writing metadata file: %v", err)
}
return nil
}
func (f *AudioFile) Peaks(start, end int64, numBins int) ([][]int16, error) {
if !f.Exists() {
return nil, errors.New("cannot compute peaks for non-existent file")
}
var err error
fptr, err := os.Open(f.rawAudioPath())
if err != nil {
return nil, fmt.Errorf("audio open error: %v", err)
}
defer fptr.Close()
numChannels := f.Channels
startByte := start * int64(numChannels) * SizeOfInt16
if _, err = fptr.Seek(startByte, io.SeekStart); err != nil {
return nil, fmt.Errorf("audio seek error: %v", err)
}
numFrames := end - start
framesPerBin := numFrames / int64(numBins)
peaks := make([][]int16, numChannels)
for i := 0; i < numChannels; i++ {
peaks[i] = make([]int16, numBins)
}
samples := make([]int16, framesPerBin*int64(numChannels))
for binNum := 0; binNum < numBins; binNum++ {
if err := binary.Read(fptr, binary.LittleEndian, samples); err != nil {
return nil, fmt.Errorf("error reading samples: %v", err)
}
for i, samp := range samples {
if samp < 0 {
samp = -samp
}
chanIndex := i % numChannels
if samp > peaks[chanIndex][binNum] {
peaks[chanIndex][binNum] = samp
}
}
}
return peaks, nil
}
func handleRequest(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
w.WriteHeader(http.StatusMethodNotAllowed)
@ -92,39 +222,135 @@ func handleRequest(w http.ResponseWriter, r *http.Request) {
return
}
if !strings.HasPrefix(r.URL.Path, "/api/audio") {
w.WriteHeader(http.StatusNotFound)
w.Write([]byte("page not found"))
return
}
videoID := r.URL.Query().Get("video_id")
if videoID == "" {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte("no video ID provided"))
return
}
downloader, err := NewAudioDownloader(videoID)
if err != nil {
log.Printf("downloader error: %v", err)
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte("could not download video"))
return
}
defer downloader.Close()
w.Header().Add("Content-Type", ContentTypeAudioM4A)
w.Header().Add("Content-Type", ContentTypeApplicationJSON)
w.Header().Add("Access-Control-Allow-Origin", "*")
w.WriteHeader(http.StatusOK)
n, err := io.Copy(w, downloader)
if err != nil {
log.Printf("error writing response: %v", err)
if strings.HasPrefix(r.URL.Path, "/api/download") {
videoID := r.URL.Query().Get("video_id")
if videoID == "" {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"error": "no video ID provided"}`))
return
}
audioFile, err := NewAudioFile(videoID)
if err != nil {
log.Printf("error building audio file: %v", err)
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error": "could not download audio"}`))
return
}
if !audioFile.Exists() {
if err = audioFile.Download(); err != nil {
log.Printf("error downloading audio file: %v", err)
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error": "could not download audio"}`))
return
}
}
w.WriteHeader(http.StatusOK)
err = json.NewEncoder(w).Encode(audioFile)
if err != nil {
log.Printf("error encoding audio file: %v", err)
}
return
}
log.Printf("wrote %d bytes for video ID %s", n, videoID)
if strings.HasPrefix(r.URL.Path, "/api/audio") {
log.Printf("got headers for audio request: %+v", r.Header)
videoID := r.URL.Query().Get("video_id")
if videoID == "" {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"error": "no video ID provided"}`))
return
}
audioFile, err := NewAudioFile(videoID)
if err != nil {
log.Printf("error building audio file: %v", err)
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error": "could not download audio"}`))
return
}
if !audioFile.Exists() {
if err = audioFile.Download(); err != nil {
log.Printf("error downloading audio file: %v", err)
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error": "could not download audio"}`))
return
}
}
http.ServeFile(w, r, audioFile.encodedAudioPath())
return
}
if strings.HasPrefix(r.URL.Path, "/api/peaks") {
videoID := r.URL.Query().Get("video_id")
if videoID == "" {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"error": "no video ID provided"}`))
return
}
start, err := strconv.ParseInt(r.URL.Query().Get("start"), 0, 64)
if err != nil {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"error": "invalid start parameter provided"}`))
return
}
end, err := strconv.ParseInt(r.URL.Query().Get("end"), 0, 64)
if err != nil {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"error": "invalid end parameter provided"}`))
return
}
numBins, err := strconv.Atoi(r.URL.Query().Get("bins"))
if err != nil {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"error": "invalid bins parameter provided"}`))
return
}
audioFile, err := NewAudioFile(videoID)
if err != nil {
log.Printf("error building audio file: %v", err)
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error": "could not download audio"}`))
return
}
if !audioFile.Exists() {
log.Println("audio file does not exists, cannot compute peaks")
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error": "audio file not available"}`))
return
}
peaks, err := audioFile.Peaks(start, end, numBins)
if err != nil {
log.Printf("error generating peaks: %v", err)
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error": "could not generate peaks"}`))
}
w.WriteHeader(http.StatusOK)
err = json.NewEncoder(w).Encode(peaks)
if err != nil {
log.Printf("error encoding peaks: %v", err)
}
return
}
w.WriteHeader(http.StatusNotFound)
w.Write([]byte("page not found"))
}
func main() {

View File

@ -1,61 +1,228 @@
import { useEffect, useState, useRef } from "react";
import { useEffect, useState, useRef, MouseEvent } from "react";
type WaveformProps = {
audioContext: AudioContext;
};
type AudioFile = {
bytes: number;
channels: number;
frames: number;
sampleRate: number;
};
export const Waveform: React.FC<WaveformProps> = ({ audioContext }: WaveformProps) => {
const [audioData, setAudioData] = useState<AudioBuffer | null>(null);
const canvasRef = useRef<HTMLCanvasElement>(null);
const [audioFile, setAudioFile] = useState<AudioFile | null>(null);
const [currentTime, setCurrentTime] = useState(0);
const [audio, setAudio] = useState(new Audio());
const waveformCanvasRef = useRef<HTMLCanvasElement>(null);
const hudCanvasRef = useRef<HTMLCanvasElement>(null);
const canvasLogicalWidth = 2000;
const canvasLogicalHeight = 500;
const videoID = new URLSearchParams(window.location.search).get("video_id")
// helpers
const mouseEventToCanvasX = (evt: MouseEvent<HTMLCanvasElement>): number => {
// TODO: use offsetX/offsetY?
const rect = evt.currentTarget.getBoundingClientRect();
const elementX = evt.clientX - rect.left;
const canvas = evt.target as HTMLCanvasElement;
return elementX * canvas.width / rect.width;
};
const canvasXToFrame = (x: number): number => {
if (audioFile == null) {
return 0;
}
return Math.floor((x / canvasLogicalWidth) * audioFile.frames);
}
const canvasXToSecs = (x: number): number => {
if (audioFile == null) {
return 0;
}
const duration = audioFile.frames / audioFile.sampleRate;
return (canvasXToFrame(x) / audioFile.frames) * duration;
}
const secsToCanvasX = (canvasWidth: number, secs: number): number => {
if (audioFile == null) {
return 0;
}
const duration = audioFile.frames / audioFile.sampleRate;
return Math.floor(canvasWidth * (secs / duration));
};
// effects
// setup player on page load:
useEffect(() => {
(async function() {
audio.addEventListener("timeupdate", () => { setCurrentTime(audio.currentTime); });
})()
}, [audio]);
// load audio data on page load:
useEffect(() => {
(async function() {
console.log("fetching audio data...");
const videoID = "s_oJYdRlrv0";
const resp = await fetch(`http://localhost:8888/api/download?video_id=${videoID}`);
const respBody = await resp.json();
const resp = await fetch(`http://localhost:8888/api/audio?video_id=${videoID}`)
console.log("resp =", resp)
if (respBody.error) {
console.log("error fetching audio data:", respBody.error)
return;
}
const body = await resp.arrayBuffer();
console.log("body =", body)
// TODO: safer deserialization?
const audioFile: AudioFile = {
bytes: respBody.bytes,
channels: respBody.channels,
frames: respBody.frames,
sampleRate: respBody.sample_rate,
};
const data = await audioContext.decodeAudioData(body);
console.log("decodedAudio =", data, "len =", data.length);
setAudioData(data);
setAudioFile(audioFile);
})();
}, [audioContext]);
// render waveform to canvas when audioData is updated:
useEffect(() => {
const canvas = canvasRef.current;
if (canvas == null) {
console.error("no canvas ref available");
return
}
(async function() {
if (audioFile == null) {
return;
}
const ctx = canvas.getContext("2d");
if (ctx == null) {
console.log("audiofile is", audioFile);
const canvas = waveformCanvasRef.current;
if (canvas == null) {
console.error("no canvas ref available");
return;
}
const ctx = canvas.getContext("2d");
if (ctx == null) {
console.error("no 2d context available");
return;
}
}
ctx.fillStyle = 'black';
ctx.fillRect(0, 0, canvas.width, canvas.height);
ctx.strokeStyle = '#00aa00';
ctx.fillStyle = 'black';
ctx.fillRect(0, 0, canvas.width, canvas.height);
if (audioData == null) {
const resp = await fetch(`http://localhost:8888/api/peaks?video_id=${videoID}&start=0&end=${Math.round(audioFile.frames)}&bins=${canvas.width}`);
const peaks = await resp.json();
console.log("respBody from peaks =", peaks)
const numChannels = peaks.length;
const chanHeight = canvas.height / numChannels;
for (let c = 0; c < numChannels; c++) {
const yOffset = chanHeight * c;
for (let i = 0; i < peaks[c].length; i++) {
const val = peaks[c][i];
const height = Math.floor((val / 32768) * chanHeight);
const y1 = ((chanHeight - height)/2)+yOffset;
const y2 = y1 + height;
ctx.beginPath();
ctx.moveTo(i, y1)
ctx.lineTo(i, y2)
ctx.stroke()
}
}
})();
}, [audioFile]);
// redraw HUD
useEffect(() => {
(async function() {
const canvas = hudCanvasRef.current;
if (canvas == null) {
console.error("no hud canvas ref available");
return;
}
const ctx = canvas.getContext("2d");
if (ctx == null) {
console.error("no hud 2d context available");
return;
}
ctx.clearRect(0, 0, canvas.width, canvas.height);
const x = secsToCanvasX(canvas.width, currentTime);
ctx.strokeStyle = "red";
ctx.beginPath();
ctx.moveTo(x, 0);
ctx.lineTo(x, canvas.height);
ctx.stroke();
})()
}, [currentTime]);
// callbacks
const handleMouseMove = (evt: MouseEvent<HTMLCanvasElement>) => {
const canvasX = mouseEventToCanvasX(evt);
console.log("mousemove, x =", canvasX, "frame =", canvasXToFrame(canvasX));
}
const handleMouseDown = (evt: MouseEvent<HTMLCanvasElement>) => {
if (audioFile == null) {
return;
}
console.log("rendering audio")
}, [audioData]);
const canvasX = mouseEventToCanvasX(evt);
audio.currentTime = canvasXToSecs(canvasX);
console.log("currentTime now", canvasXToSecs(canvasX));
};
const handleMouseUp = () => {
return null;
};
const handlePlay = async () => {
const url = `http://localhost:8888/api/audio?video_id=${videoID}`;
audio.src = url;
await audio.play();
console.log("playing audio from", url);
}
const handlePause = () => {
audio.pause();
console.log("paused audio")
}
const handleZoomIn = () => {
console.log("zoom in");
};
const handleZoomOut = () => {
console.log("zoom out");
};
// render component:
console.log("rendering, audioData =", audioData);
const wrapperProps = {width: "90%", height: "500px", position: "relative", margin: "0 auto"} as React.CSSProperties;
const waveformCanvasProps = {width: "100%", position: "absolute", top: 0, left: 0, right: 0, bottom: 0, zIndex: 0} as React.CSSProperties;
const hudCanvasProps = {width: "100%", position: "absolute", top: 0, left: 0, right: 0, bottom: 0, zIndex: 1} as React.CSSProperties;
const clockTextAreaProps = {color: "#999", width: "400px"};
const canvasProps = {width: "100%", height: "500px"};
return <canvas ref={canvasRef} style={canvasProps}></canvas>
return <>
<h1>clipper</h1>
<div style={wrapperProps}>
<canvas ref={waveformCanvasRef} width={canvasLogicalWidth} height={canvasLogicalHeight} style={waveformCanvasProps}></canvas>
<canvas ref={hudCanvasRef} width={canvasLogicalWidth} height={canvasLogicalHeight} onMouseMove={handleMouseMove} onMouseDown={handleMouseDown} onMouseUp={handleMouseUp} style={hudCanvasProps}></canvas>
</div>
<button onClick={handlePlay}>Play</button>
<button onClick={handlePause}>Pause</button>
<button onClick={handleZoomIn}>+</button>
<button onClick={handleZoomOut}>-</button>
<input type="readonly" style={clockTextAreaProps}/>
</>
}