Skip to content

Commit

Permalink
Merge pull request #2 from forbiddencoding/refactor/remove-experiment…
Browse files Browse the repository at this point in the history
…al-html-parsing

(refactor): remove experimental html parsing
  • Loading branch information
forbiddencoding committed Sep 13, 2023
2 parents d9e6c93 + f526f8f commit d12cc5a
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 189 deletions.
2 changes: 0 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
module github.com/forbiddencoding/howlongtobeat

go 1.21

require golang.org/x/net v0.14.0
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
77 changes: 0 additions & 77 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ import (
"encoding/json"
"errors"
"fmt"
"golang.org/x/net/html"
"io"
"net/http"
)

Expand All @@ -46,19 +44,6 @@ func (c *Client) jsonParser(val any) parseResponseFunc {
}
}

// htmlParserByID returns a function that will parse the HTML body of an http.Response, and unmarshal the JSON found
// in the first child of the element with the provided ID into the provided struct.
func (c *Client) htmlParserByID(val any, ID string) parseResponseFunc {
return func(resp *http.Response) error {
body, err := io.ReadAll(resp.Body)
if err != nil {
return err
}

return c.parseHTML(body, ID, val)
}
}

func (c *Client) htmlScriptDataParserByID(val any, ID string) parseResponseFunc {
return func(resp *http.Response) error {
startTag := []byte(fmt.Sprintf(`<script id="%s" type="application/json">`, ID))
Expand Down Expand Up @@ -108,65 +93,3 @@ func (c *Client) htmlScriptDataParserByID(val any, ID string) parseResponseFunc
return json.Unmarshal(data, &val)
}
}

// parseHTML reads the HTML from the provided bytes, finds the first element with the provided ID,
// and unmarshal the JSON found in its first child into the provided struct.
func (c *Client) parseHTML(body []byte, ID string, val any) error {
document, err := html.Parse(io.NopCloser(bytes.NewReader(body)))
if err != nil {
return err
}

element := c.getElementByID(document, ID)
if element == nil || element.Data == "" {
return errors.New("element not found")
}

if element.FirstChild == nil || element.FirstChild.Data == "" {
return errors.New("element first child not found")
}

return json.Unmarshal([]byte(element.FirstChild.Data), &val)
}

func (c *Client) getAttribute(n *html.Node, key string) (string, bool) {
for _, attr := range n.Attr {
if attr.Key == key {
return attr.Val, true
}
}

return "", false
}

func (c *Client) checkID(n *html.Node, id string) bool {
if n.Type == html.ElementNode {
s, ok := c.getAttribute(n, "id")

if ok && s == id {
return true
}
}

return false
}

func (c *Client) traverseTree(n *html.Node, id string) *html.Node {
if c.checkID(n, id) {
return n
}

for child := n.FirstChild; child != nil; child = child.NextSibling {
res := c.traverseTree(child, id)

if res != nil {
return res
}
}

return nil
}

func (c *Client) getElementByID(n *html.Node, id string) *html.Node {
return c.traverseTree(n, id)
}
108 changes: 0 additions & 108 deletions parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
)

Expand Down Expand Up @@ -76,81 +75,6 @@ func Test_jsonParser(t *testing.T) {

}

func Test_htmlParserByID(t *testing.T) {
htmlFile, err := os.Open("test_files/test_html_parser.html")
if err != nil {
t.Fatalf("error opening HTML test file: %v", err)
}
defer htmlFile.Close()

mockData, err := io.ReadAll(htmlFile)
if err != nil {
t.Fatalf("error reading HTML test file: %v", err)
}

rs := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write(mockData)
}))
defer rs.Close()

resp, err := http.Get(rs.URL)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

dest := gameDetailsResponse{}
mockClient := &Client{}
parseFunc := mockClient.htmlParserByID(&dest, "__NEXT_DATA__")

if err = parseFunc(resp); err != nil {
t.Fatalf("unexpected error: %v", err)
}

if dest.Props.PageProps.Game.Data.Game[0].GameID != 10270 {
t.Fatalf("unexpected game id: %v", dest.Props.PageProps.Game.Data.Game[0].GameID)
}
}

func Test_parseHTML_InvalidID(t *testing.T) {
htmlFile, err := os.Open("test_files/test_html_parser_invalid_id.html")
if err != nil {
t.Fatalf("error opening HTML test file: %v", err)
}
defer htmlFile.Close()

mockData, err := io.ReadAll(htmlFile)
if err != nil {
t.Fatalf("error reading HTML test file: %v", err)
}

dest := gameDetailsResponse{}
mockClient := &Client{}
err = mockClient.parseHTML(mockData, "nonExistingID", &dest)
if strings.Compare(err.Error(), "element not found") != 0 {
t.Fatal("expected error, got nil")
}
}

func Test_parseHTML_EmptyElement(t *testing.T) {
htmlFile, err := os.Open("test_files/test_html_parser_empty.html")
if err != nil {
t.Fatalf("error opening HTML test file: %v", err)
}
defer htmlFile.Close()

mockData, err := io.ReadAll(htmlFile)
if err != nil {
t.Fatalf("error reading HTML test file: %v", err)
}

mockClient := &Client{}
err = mockClient.parseHTML(mockData, "__NEXT_DATA__", nil)
if strings.Compare(err.Error(), "element first child not found") != 0 {
t.Fatalf("expected error, got %v", err)
}
}

func Test_htmlScriptDataParserByID(t *testing.T) {
htmlFile, err := os.Open("test_files/test_html_parser.html")
if err != nil {
Expand Down Expand Up @@ -187,38 +111,6 @@ func Test_htmlScriptDataParserByID(t *testing.T) {
}
}

func Benchmark_htmlParserByID(b *testing.B) {
htmlFile, err := os.Open("test_files/test_html_parser.html")
if err != nil {
b.Fatalf("error opening HTML test file: %v", err)
}
defer htmlFile.Close()

mockData, err := io.ReadAll(htmlFile)
if err != nil {
b.Fatalf("error reading HTML test file: %v", err)
}

rs := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write(mockData)
}))
defer rs.Close()

resp, err := http.Get(rs.URL)
if err != nil {
b.Fatalf("unexpected error: %v", err)
}

dest := gameDetailsResponse{}
mockClient := &Client{}
parseFunc := mockClient.htmlParserByID(&dest, "__NEXT_DATA__")

if err = parseFunc(resp); err != nil {
b.Fatalf("unexpected error: %v", err)
}
}

func Benchmark_htmlScriptDataParserByID(b *testing.B) {
htmlFile, err := os.Open("test_files/test_html_parser.html")
if err != nil {
Expand Down

0 comments on commit d12cc5a

Please sign in to comment.