Skip to content

Commit

Permalink
hackernews draft
Browse files Browse the repository at this point in the history
  • Loading branch information
azimut committed Sep 6, 2021
1 parent ba68ad6 commit 7c8d9f9
Show file tree
Hide file tree
Showing 8 changed files with 318 additions and 0 deletions.
24 changes: 24 additions & 0 deletions cmd/hackerview/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package main

import (
"bytes"
"fmt"
"io/ioutil"

"github.com/PuerkitoBio/goquery"
"github.com/azimut/cli-view/internal/hackernews"
)

func main() {
file, err := ioutil.ReadFile("/home/sendai/testfield/hn.html")
if err != nil {
panic(err)
}
r := bytes.NewReader(file)
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
panic(err)
}
comments := hackernews.MakeComments(doc)
fmt.Println(comments[0].Childs[0])
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/azimut/cli-view
go 1.14

require (
github.com/PuerkitoBio/goquery v1.7.1
github.com/charmbracelet/bubbles v0.8.0
github.com/charmbracelet/bubbletea v0.14.1
github.com/olekukonko/tablewriter v0.0.5 // indirect
Expand Down
13 changes: 13 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
github.com/PuerkitoBio/goquery v1.7.1 h1:oE+T06D+1T7LNrn91B4aERsRIeCLJ/oPSa6xB9FPnz4=
github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY=
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/atotto/clipboard v0.1.2/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/charmbracelet/bubbles v0.8.0 h1:+l2op90Ag37Vn+30O1hbg/0wBl+e+sxHhgY1F/rvdHs=
github.com/charmbracelet/bubbles v0.8.0/go.mod h1:5WX1sSSjNCgCrzvRMN/z23HxvWaa+AI16Ch0KPZPeDs=
Expand Down Expand Up @@ -35,8 +39,11 @@ github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02n
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20201012173705-84dcc777aaee/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand All @@ -46,8 +53,14 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 h1:nxC68pudNYkKU6jWhgrqdreuF
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4 h1:myAQVi0cGEoqQVR5POX+8RR2mrocKqNN1hmeMqhX27k=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210422114643-f5beecf764ed h1:Ei4bQjjpYUsS4efOUz+5Nz++IVkHk87n2zBA0NxBWc0=
golang.org/x/term v0.0.0-20210422114643-f5beecf764ed/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
jaytaylor.com/html2text v0.0.0-20200412013138-3577fbdbcff7 h1:mub0MmFLOn8XLikZOAhgLD1kXJq8jgftSrrv7m00xFo=
jaytaylor.com/html2text v0.0.0-20200412013138-3577fbdbcff7/go.mod h1:OxvTsCwKosqQ1q7B+8FwXqg4rKZ/UG9dUW+g/VL2xH4=
127 changes: 127 additions & 0 deletions internal/hackernews/comment.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package hackernews

import (
"strconv"
"time"

"github.com/PuerkitoBio/goquery"
)

func MakeComments(doc *goquery.Document) []*Comment {
prev := &Comment{}
var parents []*Comment
var comments []*Comment
doc.Find("table.comment-tree tr.comtr").Each(func(i int, sel *goquery.Selection) {
current := NewComment(sel)
if current.isChildOf(prev) {
prev.Childs = append(prev.Childs, current)
parents = append(parents, prev)
}
if current.isSiblingOf(prev) {
if len(parents) > 0 {
parents[len(parents)-1].Childs = append(parents[len(parents)-1].Childs, current)
} else {
comments = append(comments, current)
}
}
if current.isAncestorOf(prev) {
diff := prev.indent - current.indent
parents = parents[:len(parents)-diff]
if len(parents) > 0 {
parents[len(parents)-1].Childs = append(parents[len(parents)-1].Childs, current)
} else {
comments = append(comments, current)
}
}
prev = current
})
return comments
}

func NewComment(sel *goquery.Selection) *Comment {
return &Comment{
id: commentId(sel),
msg: commentMsg(sel),
//togg: commentTogg(sel),
user: commentUser(sel),
//date: commentDate(sel),
indent: commentIndent(sel),
}
}

func (current *Comment) isChildOf(other *Comment) bool {
if current.indent > other.indent {
return true
}
return false
}

func (current *Comment) isSiblingOf(other *Comment) bool {
if current.indent == other.indent {
return true
}
return false
}

func (current *Comment) isAncestorOf(other *Comment) bool {
if current.indent < other.indent {
return true
}
return false
}

func commentTogg(sel *goquery.Selection) int {
rawTogg, exists := sel.Find("a.togg").Attr("n")
if !exists {
panic("no toggle n on comment")
}
togg, err := strconv.Atoi(rawTogg)
if err != nil {
panic(err)
}
return togg
}

func commentIndent(sel *goquery.Selection) int {
rawindent, exists := sel.Find("td.ind").Attr("indent")
if !exists {
panic("no indent for comment")
}
indent, err := strconv.Atoi(rawindent)
if err != nil {
panic(err)
}
return indent
}

func commentMsg(sel *goquery.Selection) string {
return sel.Find("span.commtext").Text()
}

func commentId(sel *goquery.Selection) int {
rawid, exists := sel.Attr("id")
if !exists {
panic("comment id not found")
}
id, err := strconv.Atoi(rawid)
if err != nil {
panic(err)
}
return id
}

func commentUser(sel *goquery.Selection) string {
return sel.Find("a.hnuser").Text()
}

func commentDate(sel *goquery.Selection) time.Time {
rawdate, exists := sel.Find("span.age").Attr("title")
if !exists {
panic("could not find span.age title")
}
date, err := time.Parse("%-%M-%DT%h:%m:%s", rawdate)
if err != nil {
panic(err)
}
return date
}
31 changes: 31 additions & 0 deletions internal/hackernews/comment_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package hackernews

import (
"bytes"
"io/ioutil"
"testing"

"github.com/PuerkitoBio/goquery"
)

func TestMakeComments(t *testing.T) {
file, err := ioutil.ReadFile("/home/sendai/testfield/hn.html")
if err != nil {
t.Error(err)
}
r := bytes.NewReader(file)
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
t.Error(err)
}
comments := MakeComments(doc)
if len(comments) != 19 {
t.Errorf("expected 19 root comments got %d", len(comments))
}
if len(comments[0].Childs) != 4 {
t.Errorf("expected 4 child comments for [0] got %d", len(comments[0].Childs))
}
if len(comments[0].Childs[0].Childs) != 1 {
t.Errorf("expected 1 child comment for .Childs[0].Childs got %d", len(comments[0].Childs[0].Childs))
}
}
70 changes: 70 additions & 0 deletions internal/hackernews/header.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package hackernews

import (
"strconv"
"strings"
"time"

"github.com/PuerkitoBio/goquery"
)

func NewHeader(doc *goquery.Document) *Op {
return &Op{
ncomments: getNComments(doc),
title: getTitle(doc),
score: getScore(doc),
user: getUser(doc),
url: getUrl(doc),
}
}

func getUser(doc *goquery.Document) string {
return doc.Find("td.subtext a.hnuser").Text()
}

func getDate(doc *goquery.Document) time.Time {
rawdate, exists := doc.Find("td.subtext span.age").Attr("title")
if !exists {
panic("could not find span.age title")
}
date, err := time.Parse("%-%M-%DT%h:%m:%s", rawdate)
if err != nil {
panic(err)
}
return date
}

func getNComments(doc *goquery.Document) int {
rawn := doc.Find("td.subtext a").Last().Text()
n, err := strconv.Atoi(rawn)
if err != nil {
panic(err)
}
return n
}

func getTitle(doc *goquery.Document) string {
anchor, exists := doc.Find("tr#pagespace").Attr("title")
if !exists {
panic("no title")
}
return anchor
}

func getUrl(doc *goquery.Document) string {
url, exists := doc.Find("a.storylink").Attr("href")
if !exists {
panic("no story href")
}
return url
}

func getScore(doc *goquery.Document) int {
rawscore := doc.Find("span.score").Text()
rawscore = strings.TrimRight(rawscore, " points")
score, err := strconv.Atoi(rawscore)
if err != nil {
panic(err)
}
return score
}
31 changes: 31 additions & 0 deletions internal/hackernews/header_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package hackernews

import (
"bytes"
"io/ioutil"
"testing"

"github.com/PuerkitoBio/goquery"
)

func TestNewHeader(t *testing.T) {
file, err := ioutil.ReadFile("/home/sendai/testfield/hn.html")
if err != nil {
t.Error(err)
}
r := bytes.NewReader(file)
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
t.Error(err)
}
header := NewHeader(doc)
if header.url != "https://www.newyorker.com/news/news-desk/the-red-warning-light-on-richard-bransons-space-flight" {
t.Errorf("invalid URL")
}
if header.score != 312 {
t.Errorf("invalid score, expected 312")
}
if header.user != "zlsa" {
t.Errorf("invalid user, expected \"zlsa\"")
}
}
21 changes: 21 additions & 0 deletions internal/hackernews/type.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package hackernews

import "time"

type Op struct {
url string
title string
score int
ncomments int
user string
date time.Time
}

type Comment struct {
id int
msg string
Childs []*Comment
indent int
user string
date time.Time
}

0 comments on commit 7c8d9f9

Please sign in to comment.