package media import ( "encoding/binary" "encoding/json" "errors" "fmt" "io" "log" "os" "time" ) const SizeOfInt16 = 2 type Audio struct { Bytes int64 `json:"bytes"` Channels int `json:"channels"` // ApproxFrames is used during initial processing when a precise frame count // cannot be determined. Prefer Frames in all other cases. ApproxFrames int64 `json:"approx_frames"` Frames int64 `json:"frames"` SampleRate int `json:"sample_rate"` YoutubeItag int `json:"youtube_itag"` MimeType string `json:"mime_type"` } type Video struct { Bytes int64 `json:"bytes"` Duration time.Duration `json:"duration"` // not sure if this are needed any more? ThumbnailWidth int `json:"thumbnail_width"` ThumbnailHeight int `json:"thumbnail_height"` YoutubeItag int `json:"youtube_itag"` MimeType string `json:"mime_type"` } // MediaSet represents the media and metadata associated with a single media // resource (for example, a YouTube video). type MediaSet struct { Audio Audio `json:"audio"` Video Video `json:"video"` ID string `json:"id"` YoutubeID string `json:"youtube_id"` exists bool `json:"exists"` } // New builds a new MediaSet with the given ID. func NewMediaSet(youtubeID string) *MediaSet { return &MediaSet{YoutubeID: youtubeID} } // TODO: pass io.Readers/Writers instead of strings. func (m *MediaSet) RawAudioPath() string { return fmt.Sprintf("cache/%s.raw", m.YoutubeID) } func (m *MediaSet) EncodedAudioPath() string { return fmt.Sprintf("cache/%s.m4a", m.YoutubeID) } func (m *MediaSet) VideoPath() string { return fmt.Sprintf("cache/%s.mp4", m.YoutubeID) } func (m *MediaSet) ThumbnailPath() string { return fmt.Sprintf("cache/%s.jpg", m.YoutubeID) } func (m *MediaSet) MetadataPath() string { return fmt.Sprintf("cache/%s.json", m.YoutubeID) } func (m *MediaSet) Exists() bool { if m.YoutubeID == "" { return false } if m.exists { return true } if _, err := os.Stat(m.MetadataPath()); err == nil { m.exists = true return true } return false } func (m *MediaSet) Load() error { if m.YoutubeID == "" { return errors.New("error opening mediaset with blank ID") } metadataFile, err := os.Open(m.MetadataPath()) if err != nil { return fmt.Errorf("error opening metadata file: %v", err) } defer func() { _ = metadataFile.Close() }() if err := json.NewDecoder(metadataFile).Decode(m); err != nil { return fmt.Errorf("error decoding metadata: %v", err) } return nil } func (m *MediaSet) Peaks(start, end int64, numBins int) ([][]int16, error) { if !m.Exists() { return nil, errors.New("cannot compute peaks for non-existent MediaSet") } var err error fptr, err := os.Open(m.RawAudioPath()) if err != nil { return nil, fmt.Errorf("audio open error: %v", err) } defer fptr.Close() startByte := start * int64(m.Audio.Channels) * SizeOfInt16 if _, err = fptr.Seek(startByte, io.SeekStart); err != nil { return nil, fmt.Errorf("audio seek error: %v", err) } numFrames := end - start framesPerBin := numFrames / int64(numBins) peaks := make([][]int16, m.Audio.Channels) for i := 0; i < m.Audio.Channels; i++ { peaks[i] = make([]int16, numBins) } samples := make([]int16, framesPerBin*int64(m.Audio.Channels)) for binNum := 0; binNum < numBins; binNum++ { if err := binary.Read(fptr, binary.LittleEndian, samples); err != nil { return nil, fmt.Errorf("error reading samples: %v", err) } for i, samp := range samples { if samp < 0 { samp = -samp } chanIndex := i % m.Audio.Channels if samp > peaks[chanIndex][binNum] { peaks[chanIndex][binNum] = samp } } } log.Println("finished generating peaks") return peaks, nil }