Skip to content

Commit

Permalink
ard: more deep search topics
Browse files Browse the repository at this point in the history
  • Loading branch information
alex1702 committed Aug 8, 2024
2 parents 73be726 + c00d6e3 commit 7a4bbca
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
20 changes: 20 additions & 0 deletions src/main/java/mServer/crawler/sender/ard/ArdCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@ public class ArdCrawler extends MediathekCrawler {
private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER
= DateTimeFormatter.ofPattern("yyyy-MM-dd");

public static final String[] MISSING_TOPIC_IDS = new String[]{
// Dahoam is dahoam
"Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy9icm9hZGNhc3RTZXJpZXM6L2JyZGUvZmVybnNlaGVuL2JheWVyaXNjaGVzLWZlcm5zZWhlbi9zZW5kdW5nZW4vZGFob2FtLWlzLWRhaG9hbQ",
// Rote Rosen
"Y3JpZDovL3dkci5kZS9vbmUvcm90ZXJvc2Vu",
// Sturm der Liebe
"Y3JpZDovL2Rhc2Vyc3RlLmRlL3N0dXJtIGRlciBsaWViZQ",
// in aller freundschaft -die jungen ärzte
"Y3JpZDovL21kci5kZS9zZW5kZXJlaWhlbi9zdGFmZmVsc2VyaWUtaW4tYWxsZXItZnJldW5kc2NoYWZ0LWRpZS1qdW5nZW4tYWVyenRl"
};

public ArdCrawler(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME, 0, 1, startPrio);
}
Expand Down Expand Up @@ -127,6 +138,8 @@ private Set<ArdFilmInfoDto> getTopicsEntries() throws ExecutionException, Interr
}

Log.sysLog("ard mediathek topics: " + topics.size());
addAdditionalTopics(topics);
Log.sysLog("ard mediathek topics with additional: " + topics.size());
ConcurrentLinkedQueue<CrawlerUrlDTO> topicUrls = new ConcurrentLinkedQueue<>(topics);

final ArdTopicPageTask topicTask = new ArdTopicPageTask(this, topicUrls);
Expand All @@ -135,6 +148,13 @@ private Set<ArdFilmInfoDto> getTopicsEntries() throws ExecutionException, Interr
return filmInfos;
}

// temporary workaround for missing topics
private void addAdditionalTopics(Set<CrawlerUrlDTO> topics) {
for (String topicId : MISSING_TOPIC_IDS) {
topics.add(new CrawlerUrlDTO(String.format(ArdConstants.TOPICS_URL, topicId, ArdConstants.TOPIC_PAGE_SIZE)));
}
}

private Set<CrawlerUrlDTO> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException {
ArdTopicsTask topicsTask
= new ArdTopicsTask(this, sender, createTopicsOverviewUrl(sender));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.google.gson.reflect.TypeToken;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.tool.Log;
import jakarta.ws.rs.client.WebTarget;
import mServer.crawler.sender.MediathekReader;
import mServer.crawler.sender.ard.ArdFilmInfoDto;
Expand Down Expand Up @@ -82,8 +83,16 @@ public class ArdTopicPageTask extends ArdTaskBase<ArdFilmInfoDto, CrawlerUrlDTO>
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy9icm9hZGNhc3RTZXJpZXM6L2JyZGUvZmVybnNlaGVuL2JheWVyaXNjaGVzLWZlcm5zZWhlbi9zZW5kdW5nZW4vZGFob2FtLWlzLWRhaG9hbQ");
// Rote Rosen
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9vbmUvcm90ZXJvc2Vu");
// Heimatflimmern
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9oZWltYXRmbGltbWVybg");
// Sturm der Liebe
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2Rhc2Vyc3RlLmRlL3N0dXJtIGRlciBsaWViZQ");
// in aller freundschaft -die jungen ärzte
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL21kci5kZS9zZW5kZXJlaWhlbi9zdGFmZmVsc2VyaWUtaW4tYWxsZXItZnJldW5kc2NoYWZ0LWRpZS1qdW5nZW4tYWVyenRl");
// Euro 2024
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3Nwb3J0c2NoYXUuZGUvc3BvcnRzY2hhdS1ldXJvLTIwMjQ");
// Olympia 2024
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3Nwb3J0c2NoYXUuZGUvc3BvcnRzY2hhdS1vbHltcGlh");
// um Himmels willen
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9VbSBIaW1tZWxzIFdpbGxlbiBPTkU");
}

public ArdTopicPageTask(MediathekReader aCrawler,
Expand Down Expand Up @@ -147,6 +156,7 @@ private ConcurrentLinkedQueue<CrawlerUrlDTO> createSubPageUrls(

private int getMaximumSubpages(String id) {
if (TOPICS_LOAD_ALL_PAGES.contains(id)) {
Log.sysLog("ARD search all: " + id);
return 999;
}
return 0;
Expand Down

0 comments on commit 7a4bbca

Please sign in to comment.