diff --git a/main.go b/main.go index 02f820d..2f9ace2 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( "moviestills/websites" "os" "os/signal" + "path/filepath" "reflect" "strings" "syscall" @@ -43,139 +44,134 @@ func main() { var options config.Options arg.MustParse(&options) - // We override the default prefix label for "info" messages to - // align it perfectly on the Terminal with other labels. Otherwise, - // since "INFO" is shorter than the other labels, the width - // of the different labels is not the same. - log.Info = *log.Info.WithPrefix(log.Prefix{Text: " INFOS ", Style: log.Info.Prefix.Style}) - - // Disable style and colors for output - if options.NoStyle { - log.DisableStyling() - } - - // Disable colors only for output - if options.NoColors { - log.DisableColor() - } - // Interface of the app log.DefaultHeader.Println("Movie Stills", config.VERSION) + // Adjust logging styles + setupLogging(&options) + // Display available scrapers implemented if options.ListScrapers { listAvailableScrapers(sites) return } - log.DefaultSection.Println("Configuration") - printConfiguration(&options) - // Check presence of website argument if options.Website == "" { log.Error.Println("A website must be set through arguments.") + listAvailableScrapers(sites) os.Exit(1) } + website := strings.ToLower(options.Website) + // Verify if we have a scrapper for the given website. // If we do, "site_func" will now contain a function listed in // the sites map that matches a module for this specific // website stored in the "websites" folder. - siteFunc, scraperExists := sites[strings.ToLower(options.Website)] - if !scraperExists { - log.Error.Println("We don't have a scraper for this website.") - // List available scrapers + if _, exists := sites[website]; !exists { + log.Error.Println("We don't have a scraper for:", log.White(website)) listAvailableScrapers(sites) os.Exit(1) } - // If we're here, it means we have a valid scraper for a valid website! + log.DefaultSection.Println("Configuration") + printConfiguration(&options) - // Create the "cache" directory. - // This folder stores the scraped websites pages. - // If we can't create it, stop right there. + // Create the necessary directories (cache and data) + setupDirectories(&options) + + // Create and configure scraper for each website + scraper := colly.NewCollector( + colly.CacheDir(filepath.Join(options.CacheDir, website)), + ) + + // Set up scraper settings + configureScraper(&scraper, &options) + + // Run the scraper for the current website + siteFunc := sites[website] + siteFunc(&scraper, &options) + + log.Info.Println("Finished scraping", log.White(website)) + +} + +func clearScreen() { + print("\033[H\033[2J") +} + +func handleShutdown() { + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM, syscall.SIGINT) + go func() { + <-sigChan + log.Info.Println("Shutting down...") + os.Exit(130) + }() +} + +func setupLogging(options *config.Options) { + // Adjust the logging prefix + log.Info = *log.Info.WithPrefix(log.Prefix{Text: " INFOS ", Style: log.Info.Prefix.Style}) + + // Disable styles and colors based on options + if options.NoStyle { + log.DisableStyling() + } + if options.NoColors { + log.DisableColor() + } +} + +func setupDirectories(options *config.Options) { + // Create the cache directory if _, err := utils.CreateFolder(options.CacheDir); err != nil { log.Error.Println("The cache directory", log.White(options.CacheDir), "can't be created:", log.Red(err)) os.Exit(1) } - // Create the "data" directory. - // This folder stores the movie snapshots. - // If we can't create it, stop right there. + // Create the data directory if _, err := utils.CreateFolder(options.DataDir); err != nil { log.Error.Println("The data directory", log.White(options.DataDir), "can't be created:", log.Red(err)) os.Exit(1) } +} - // Instantiate main scraper - scraper := colly.NewCollector( - colly.CacheDir(options.CacheDir), - ) - +func configureScraper(scraper **colly.Collector, options *config.Options) { // Set up a proxy if options.Proxy != "" { - if err := scraper.SetProxy(options.Proxy); err != nil { + if err := (*scraper).SetProxy(options.Proxy); err != nil { log.Error.Println("Could not set proxy", log.White(options.Proxy), log.Red(err)) } } // Set request timeout - scraper.SetRequestTimeout(options.TimeOut) + (*scraper).SetRequestTimeout(options.TimeOut) // Enable asynchronous jobs if asked if options.Async { - scraper.Async = true + (*scraper).Async = true } // Enable Debugging level if asked through the CLI if options.Debug { log.EnableDebugMessages() - scraper.SetDebugger(&debug.PTermDebugger{}) + (*scraper).SetDebugger(&debug.PTermDebugger{}) } // Use random user agent and referer to avoid getting banned - extensions.RandomUserAgent(scraper) - extensions.Referer(scraper) + extensions.RandomUserAgent((*scraper)) + extensions.Referer((*scraper)) // Limit parallelism and add random delay to avoid getting IP banned - if err := scraper.Limit(&colly.LimitRule{ + if err := (*scraper).Limit(&colly.LimitRule{ DomainGlob: "*", Parallelism: options.Parallel, RandomDelay: 1 * options.RandomDelay, }); err != nil { log.Warning.Println("Can't change scraper limit options:", log.Red(err)) } - - log.DefaultSection.Println("Scraping") - log.Info.Println("Starting", options.Website, "Scraper") - - // Here we call the website module depending on the website provided - // in the CLI by the user. - // This will call a file/module/func made specifically to scrap this website. - // All available scrapers are stored in the "websites" folder. - siteFunc(&scraper, &options) - - log.Info.Println("Finished Scraping", options.Website, "!") - -} - -// Clear Terminal Screen -func clearScreen() { - print("\033[H\033[2J") -} - -// Signal handling function -func handleShutdown() { - sigChan := make(chan os.Signal, 1) - - // Listen for SIGINT (Ctrl+C) and SIGTERM (termination) - signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM, syscall.SIGINT) - - go func() { - <-sigChan - log.Info.Println("Shutting down...") - os.Exit(130) - }() } // Print configuration as a bullet list. Most diff --git a/websites/blubeaver.go b/websites/blubeaver.go index 36a4700..91212a3 100644 --- a/websites/blubeaver.go +++ b/websites/blubeaver.go @@ -45,10 +45,6 @@ func BluBeaverScraper(scraper **colly.Collector, options *config.Options) { movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(BluBeaverURL); err != nil { - log.Error.Println("Can't visit index page", log.White(BluBeaverURL), ":", log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -187,10 +183,11 @@ func BluBeaverScraper(scraper **colly.Collector, options *config.Options) { }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(BluBeaverURL); err != nil { + log.Error.Println("Can't visit index page", log.White(BluBeaverURL), ":", log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() } diff --git a/websites/blusscreens.go b/websites/blusscreens.go index 54b82c0..05ab40f 100644 --- a/websites/blusscreens.go +++ b/websites/blusscreens.go @@ -65,10 +65,6 @@ func BlusScraper(scraper **colly.Collector, options *config.Options) { movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(BlusURL); err != nil { - log.Error.Println("Can't visit index page", log.White(BlusURL), log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -242,12 +238,13 @@ func BlusScraper(scraper **colly.Collector, options *config.Options) { } }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(BlusURL); err != nil { + log.Error.Println("Can't visit index page", log.White(BlusURL), log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() } // Save summary of scrapped movies to a JSON file diff --git a/websites/dvdbeaver.go b/websites/dvdbeaver.go index 318b5e2..de43ee2 100644 --- a/websites/dvdbeaver.go +++ b/websites/dvdbeaver.go @@ -49,10 +49,6 @@ func DVDBeaverScraper(scraper **colly.Collector, options *config.Options) { movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(BeaverURL); err != nil { - log.Error.Println("Can't visit index page:", log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -71,9 +67,6 @@ func DVDBeaverScraper(scraper **colly.Collector, options *config.Options) { if err := movieListScraper.Visit(movieListURL); err != nil { log.Error.Println("Can't visit movie list page", log.White(movieListURL), log.Red(err)) } - - // In case we enabled asynchronous jobs - movieListScraper.Wait() }) // Before making a request print "Visiting ..." @@ -207,10 +200,11 @@ func DVDBeaverScraper(scraper **colly.Collector, options *config.Options) { } }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(BeaverURL); err != nil { + log.Error.Println("Can't visit index page:", log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() } diff --git a/websites/evanerichards.go b/websites/evanerichards.go index 25a501d..d6eda4c 100644 --- a/websites/evanerichards.go +++ b/websites/evanerichards.go @@ -38,10 +38,6 @@ func EvanERichardsScraper(scraper **colly.Collector, options *config.Options) { movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(EvanERichardsURL); err != nil { - log.Error.Println("Can't visit index page", log.White(BluBeaverURL), ":", log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -128,10 +124,11 @@ func EvanERichardsScraper(scraper **colly.Collector, options *config.Options) { } }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(EvanERichardsURL); err != nil { + log.Error.Println("Can't visit index page", log.White(BluBeaverURL), ":", log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() } diff --git a/websites/film-grab.go b/websites/film-grab.go index 14d8f85..e67a5d8 100644 --- a/websites/film-grab.go +++ b/websites/film-grab.go @@ -37,10 +37,6 @@ func FilmGrabScraper(scraper **colly.Collector, options *config.Options) { movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(FilmGrabURL); err != nil { - log.Error.Println("Can't visit index page", log.White(FilmGrabURL), ":", log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -118,10 +114,11 @@ func FilmGrabScraper(scraper **colly.Collector, options *config.Options) { }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(FilmGrabURL); err != nil { + log.Error.Println("Can't visit index page", log.White(FilmGrabURL), ":", log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() } diff --git a/websites/highdefdiscnews.go b/websites/highdefdiscnews.go index 0bd076b..5d331e4 100644 --- a/websites/highdefdiscnews.go +++ b/websites/highdefdiscnews.go @@ -37,10 +37,6 @@ func HighDefDiscNewsScraper(scraper **colly.Collector, options *config.Options) movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(HighDefDiscNewsURL); err != nil { - log.Error.Println("Can't visit index page", log.White(HighDefDiscNewsURL), ":", log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -119,12 +115,13 @@ func HighDefDiscNewsScraper(scraper **colly.Collector, options *config.Options) }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(HighDefDiscNewsURL); err != nil { + log.Error.Println("Can't visit index page", log.White(HighDefDiscNewsURL), ":", log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() } // Isolate the movie's title by getting rid of various words on the right. diff --git a/websites/movie_screencaps.go b/websites/movie_screencaps.go index f21f28e..8bf644d 100644 --- a/websites/movie_screencaps.go +++ b/websites/movie_screencaps.go @@ -41,10 +41,6 @@ func ScreenCapsScraper(scraper **colly.Collector, options *config.Options) { movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(ScreenCapsURL); err != nil { - log.Error.Println("Can't visit index page", log.White(ScreenCapsURL), ":", log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -165,10 +161,11 @@ func ScreenCapsScraper(scraper **colly.Collector, options *config.Options) { }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(ScreenCapsURL); err != nil { + log.Error.Println("Can't visit index page", log.White(ScreenCapsURL), ":", log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() } diff --git a/websites/screenmusings.go b/websites/screenmusings.go index 00542a1..4d3b310 100644 --- a/websites/screenmusings.go +++ b/websites/screenmusings.go @@ -34,10 +34,6 @@ func ScreenMusingsScraper(scraper **colly.Collector, options *config.Options) { movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(ScreenMusingsURL); err != nil { - log.Error.Println("Can't visit index page", log.White(ScreenMusingsURL), ":", log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -133,10 +129,11 @@ func ScreenMusingsScraper(scraper **colly.Collector, options *config.Options) { }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(ScreenMusingsURL); err != nil { + log.Error.Println("Can't visit index page", log.White(ScreenMusingsURL), ":", log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() } diff --git a/websites/stillsfrmfilms.go b/websites/stillsfrmfilms.go index f73a60c..7d896ae 100644 --- a/websites/stillsfrmfilms.go +++ b/websites/stillsfrmfilms.go @@ -38,10 +38,6 @@ func StillsFrmFilmsScraper(scraper **colly.Collector, options *config.Options) { movieScraper := (*scraper).Clone() movieScraper.AllowURLRevisit = false - if err := (*scraper).Visit(StillsFrmFilmsURL); err != nil { - log.Error.Println("Can't visit index page", log.White(StillsFrmFilmsURL), ":", log.Red(err)) - } - // Print error just in case (*scraper).OnError(func(r *colly.Response, err error) { log.Error.Println(r.Request.URL, "\t", log.White(r.StatusCode), "\nError:", log.Red(err)) @@ -130,10 +126,11 @@ func StillsFrmFilmsScraper(scraper **colly.Collector, options *config.Options) { }) - // Ensure that all requests are completed before exiting - if (*scraper).Async { - (*scraper).Wait() - movieScraper.Wait() + if err := (*scraper).Visit(StillsFrmFilmsURL); err != nil { + log.Error.Println("Can't visit index page", log.White(StillsFrmFilmsURL), ":", log.Red(err)) } + // Ensure that all requests are completed before exiting + (*scraper).Wait() + movieScraper.Wait() }