Skip to content

Commit

Permalink
Add Rumble extractor (#1232)
Browse files Browse the repository at this point in the history
* Add Rumble extractor

* Remove unused cookies

* Update Supported Sites

* Fetch live playlist
  • Loading branch information
shavit committed May 9, 2023
1 parent e0729b0 commit 9142f2a
Show file tree
Hide file tree
Showing 5 changed files with 381 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/stream_rumble.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: rumble

on:
push:
paths:
- "extractors/rumble/*.go"
- ".github/workflows/stream_rumble.yml"
pull_request:
paths:
- "extractors/rumble/*.go"
- ".github/workflows/stream_rumble.yml"
schedule:
# run ci weekly
- cron: "0 0 * * 0"

jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
go: ["1.20"]
os: [ubuntu-latest]
name: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go }}

- name: Test
run: go test -timeout 5m -race -coverpkg=./... -coverprofile=coverage.txt github.com/iawia002/lux/extractors/rumble
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ $ lux -j "https://www.bilibili.com/video/av20203945"
| Reddit | <https://www.reddit.com> ||| | | | [![reddit](https://github.com/iawia002/lux/actions/workflows/stream_reddit.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_reddit.yml) |
| VKontakte | <https://vk.com> || | | | | [![vk](https://github.com/iawia002/lux/actions/workflows/stream_vk.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_vk.yml/) |
| 知乎 | <https://zhihu.com> || | | | | [![zhihu](https://github.com/iawia002/lux/actions/workflows/stream_zhihu.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_zhihu.yml/) |
| Rumble | <https://rumble.com> || | | | | [![rumble](https://github.com/iawia002/lux/actions/workflows/stream_rumble.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_rumble.yml/) |


## Known issues
Expand Down
1 change: 1 addition & 0 deletions app/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
_ "github.com/iawia002/lux/extractors/pornhub"
_ "github.com/iawia002/lux/extractors/qq"
_ "github.com/iawia002/lux/extractors/reddit"
_ "github.com/iawia002/lux/extractors/rumble"
_ "github.com/iawia002/lux/extractors/streamtape"
_ "github.com/iawia002/lux/extractors/tangdou"
_ "github.com/iawia002/lux/extractors/tiktok"
Expand Down
320 changes: 320 additions & 0 deletions extractors/rumble/rumble.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
package rumble

import (
"compress/flate"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"path"
"regexp"
"strconv"

"github.com/pkg/errors"

"github.com/iawia002/lux/extractors"
"github.com/iawia002/lux/request"
"github.com/iawia002/lux/utils"
)

func init() {
extractors.Register("rumble", New())
}

type extractor struct{}

// New returns a rumble extractor.
func New() extractors.Extractor {
return &extractor{}
}

type rumbleData struct {
Format string `json:"format"`
Name string `json:"name"`
EmbedURL string `json:"embedUrl"`
ThumbnailURL string `json:"thumbnailUrl"`
Type string `json:"@type"`
VideoURL string `json:"videoUrl"`
Quality string `json:"quality"`
}

// Extract is the main function to extract the data.
func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) {
res, err := request.Request(http.MethodGet, url, nil, nil)
if err != nil {
return nil, errors.WithStack(err)
}

defer res.Body.Close() // nolint

var reader io.ReadCloser
switch res.Header.Get("Content-Encoding") {
case "gzip":
reader, _ = gzip.NewReader(res.Body)
case "deflate":
reader = flate.NewReader(res.Body)
default:
reader = res.Body
}
defer reader.Close() // nolint

b, err := io.ReadAll(reader)
if err != nil {
return nil, errors.WithStack(err)
}

html := string(b)
var title string
matchTitle := utils.MatchOneOf(html, `<title>(.+?)</title>`)
if len(matchTitle) > 1 {
title = matchTitle[1]
} else {
title = "rumble video"
}

payload, err := readPayload(html)
if err != nil {
return nil, errors.WithStack(err)
}

videoID, err := getVideoID(payload.EmbedURL)
if err != nil {
return nil, errors.WithStack(err)
}

streams, err := fetchVideoQuality(videoID)
if err != nil {
return nil, errors.WithStack(err)
}

return []*extractors.Data{
{
Site: "Rumble rumble.com",
Title: title,
Type: extractors.DataTypeVideo,
Streams: streams,
URL: url,
},
}, nil
}

// Read JSON object from the video webpage
func readPayload(html string) (*rumbleData, error) {
matchPayload := utils.MatchOneOf(html, `\<script\stype="?application\/ld\+json"?\>(.+?)\<\/script>`)
if len(matchPayload) < 1 {
return nil, errors.WithStack(extractors.ErrURLQueryParamsParseFailed)
}

rumbles := make([]rumbleData, 0)
if err := json.Unmarshal([]byte(matchPayload[1]), &rumbles); err != nil {
return nil, errors.WithStack(err)
}

for _, it := range rumbles {
if it.Type == "VideoObject" {
return &it, nil
}
}

return nil, errors.WithStack(extractors.ErrURLParseFailed)
}

func getVideoID(embedURL string) (string, error) {
u, err := url.Parse(embedURL)
if err != nil {
return "", errors.WithStack(extractors.ErrURLParseFailed)
}

return path.Base(u.Path), nil
}

// Rumble response contains the streams in `rumbleStreams`
type rumbleResponse struct {
Streams *json.RawMessage `json:"ua"`
}

// Common video meta data
type streamInfo struct {
URL string `json:"url"`
Meta struct {
Bitrate uint16 `json:"bitrate"`
Size int64 `json:"size"`
Width uint16 `json:"w"`
Height uint16 `json:"h"`
} `json:"meta"`
}

// common video qualities for `mp4`, `webm`
type videoQualities struct {
Q240 struct{ streamInfo } `json:"240"`
Q360 struct{ streamInfo } `json:"360"`
Q480 struct{ streamInfo } `json:"480"`
Q720 struct{ streamInfo } `json:"720"`
Q1080 struct{ streamInfo } `json:"1080"`
Q1440 struct{ streamInfo } `json:"1440"`
Q2160 struct{ streamInfo } `json:"2160"`
Q2161 struct{ streamInfo } `json:"2161"`
}

// Video payload for adaptive stream and different qualities
type rumbleStreams struct {
FMp4 struct {
videoQualities
} `json:"mp4"`
FWebm struct {
videoQualities
} `json:"webm"`
FHLS struct {
QAuto struct{ streamInfo } `json:"auto"`
} `json:"hls"`
}

// Unmarshall the video response
// Some properties like `mp4`, `webm` are either array or an object
func (r *rumbleStreams) UnmarshalJSON(b []byte) error {
var resp *rumbleResponse
if err := json.Unmarshal(b, &resp); err != nil {
return errors.WithStack(extractors.ErrURLParseFailed)
}

// Get individual stream from the response
var obj map[string]*json.RawMessage
if err := json.Unmarshal(*resp.Streams, &obj); err != nil {
return errors.WithStack(extractors.ErrURLParseFailed)
}

if v, ok := obj["mp4"]; ok {
_ = json.Unmarshal(*v, &r.FMp4)
}
if v, ok := obj["webm"]; ok {
_ = json.Unmarshal(*v, &r.FWebm)
}
if v, ok := obj["hls"]; ok {
_ = json.Unmarshal(*v, &r.FHLS)
}

return nil
}

// Use this to create all the streams for `mp4`, `webm`
func (rs *rumbleStreams) makeAllVODStreams(m map[string]*extractors.Stream) {
m["webm"] = makeStreamMeta("480", "webm", &rs.FWebm.Q480.streamInfo)
m["240"] = makeStreamMeta("240", "mp4", &rs.FMp4.Q240.streamInfo)
m["360"] = makeStreamMeta("360", "mp4", &rs.FMp4.Q360.streamInfo)
m["480"] = makeStreamMeta("480", "mp4", &rs.FMp4.Q480.streamInfo)
m["720"] = makeStreamMeta("720", "mp4", &rs.FMp4.Q720.streamInfo)
m["1080"] = makeStreamMeta("1080", "mp4", &rs.FMp4.Q1080.streamInfo)
m["1440"] = makeStreamMeta("1440", "mp4", &rs.FMp4.Q1440.streamInfo)
m["2160"] = makeStreamMeta("2160", "mp4", &rs.FMp4.Q2160.streamInfo)
m["2161"] = makeStreamMeta("2161", "mp4", &rs.FMp4.Q2161.streamInfo)
}

var reResolution = regexp.MustCompile(`_(\d{3,4})p\/`) // ex. _720p/

// Use this to create all the streams for live videos
func (rs *rumbleStreams) makeAllLiveStreams(m map[string]*extractors.Stream) error {
playlists, err := utils.M3u8URLs(rs.FHLS.QAuto.URL)
if err != nil {
return errors.WithStack(err)
}

if len(playlists) == 0 {
return errors.WithStack(extractors.ErrURLParseFailed)
}

// Find the highest resolution
playlistURL := playlists[0]
maxRes := 0
for _, x := range playlists {
matched := reResolution.FindStringSubmatch(x)
if len(matched) == 0 {
continue
}
res, err := strconv.Atoi(matched[1])
if err != nil {
continue
}

if maxRes < res {
maxRes = res
playlistURL = x
}
}

tsURLs, err := utils.M3u8URLs(playlistURL)
if err != nil {
return errors.WithStack(err)
}

var parts []*extractors.Part
for _, x := range tsURLs {
part := &extractors.Part{
URL: x,
Size: rs.FHLS.QAuto.streamInfo.Meta.Size,
Ext: "ts",
}
parts = append(parts, part)
}

m["hls"] = &extractors.Stream{
Parts: parts,
Size: rs.FHLS.QAuto.streamInfo.Meta.Size,
Quality: strconv.Itoa(maxRes),
}

return nil
}

// Request video formats and qualities
func fetchVideoQuality(videoID string) (map[string]*extractors.Stream, error) {
reqURL := fmt.Sprintf(`https://rumble.com/embedJS/u3/?request=video&ver=2&v=%s&ext={"ad_count":null}&ad_wt=0`, videoID)

res, err := request.Request(http.MethodGet, reqURL, nil, nil)
if err != nil {
return nil, errors.WithStack(err)
}
defer res.Body.Close() // nolint

var reader io.ReadCloser
switch res.Header.Get("Content-Encoding") {
case "gzip":
reader, _ = gzip.NewReader(res.Body)
case "deflate":
reader = flate.NewReader(res.Body)
default:
reader = res.Body
}
defer reader.Close() // nolint

b, err := io.ReadAll(reader)
if err != nil {
return nil, errors.WithStack(err)
}

var rs rumbleStreams
if err := json.Unmarshal(b, &rs); err != nil {
return nil, errors.WithStack(err)
}

streams := make(map[string]*extractors.Stream, 9)
rs.makeAllVODStreams(streams)
_ = rs.makeAllLiveStreams(streams)

return streams, nil
}

func makeStreamMeta(q, ext string, info *streamInfo) *extractors.Stream {
urlMeta := &extractors.Part{
URL: info.URL,
Size: info.Meta.Size,
Ext: ext,
}

return &extractors.Stream{
Parts: []*extractors.Part{urlMeta},
Size: info.Meta.Size,
Quality: q,
}
}
28 changes: 28 additions & 0 deletions extractors/rumble/rumble_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package rumble

import (
"testing"

"github.com/iawia002/lux/extractors"
"github.com/iawia002/lux/test"
)

func TestRumble(t *testing.T) {
tests := []struct {
name string
args test.Args
}{
{
name: "normal test",
args: test.Args{
URL: "https://rumble.com/v24swn0-just-say-yes-to-climate-lockdowns.html",
Title: "Just Say YES to Climate Lockdowns!",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
New().Extract(tt.args.URL, extractors.Options{})
})
}
}

0 comments on commit 9142f2a

Please sign in to comment.