Skip to content

Commit

Permalink
Merge pull request #974 from mediathekview/devOrfOn
Browse files Browse the repository at this point in the history
Dev orf on
  • Loading branch information
codingPF authored Apr 17, 2024
2 parents cb98d9a + 20f8137 commit 9630cb8
Show file tree
Hide file tree
Showing 73 changed files with 65,403 additions and 2,339 deletions.
25 changes: 14 additions & 11 deletions MServer-Config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#### Server configurations ####

# The maximum amount of cpu threads to be used.
maximumCpuThreads: 10
maximumCpuThreads: 1

# The maximum duration in minutes the server should run.<br>
# If set to 0 the server runs without a time limit.
Expand All @@ -24,15 +24,17 @@ senderIncluded:
#- ARTE_PL
#- ARTE_IT
#- ARTE_ES
#- 3SAT
#- DREISAT
#- FUNK
#- KIKA
#- DW
#- ORF
# - DW
- ORF
#- PHOENIX
#- SRF
- SR
#- SR
#- ZDF

#SRF,SR,PHONIX,ORF,KIKA,DW,3SAT<

# If set the server will be awake after the crawler run and restarts the run after the given amount.
#schedules:
Expand Down Expand Up @@ -133,7 +135,7 @@ topicsSearchEnabled: true
# The maximum amount of sub pages to be crawled.<br>
# Example: If a Sendung overview side has 10 pages with videos for this Sendung and
# the amount set by this is 5 then the crawler crawls pages 1 to 5.
maximumSubpages: 1
maximumSubpages: 5

# The maximum amount of days going to past will be crawled for the "Sendung Verpasst?" section.
maximumDaysForSendungVerpasstSection: 7
Expand All @@ -155,8 +157,7 @@ senderConfigurations:
#10,20,40 ok
maximumSubpages: 0
ORF:
#2,4,8 ok
maximumUrlsPerTask: 40
maximumRequestsPerSecond: 10.0
ARTE_DE:
maximumUrlsPerTask: 1
maximumDaysForSendungVerpasstSectionFuture: 0
Expand All @@ -178,10 +179,12 @@ senderConfigurations:
maximumRequestsPerSecond: 10.0
FUNK:
maximumUrlsPerTask: 99
DW:
maximumSubpages: 0
SR:
DREISAT:
maximumSubpages: 5
maximumDaysForSendungVerpasstSection: 60
PHOENIX:
maximumSubpages: 500


# configure string variables
crawlerApiParams:
Expand Down
15 changes: 13 additions & 2 deletions src/main/java/de/mediathekview/mserver/base/utils/JsonUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,18 @@ public static Optional<Integer> getAttributeAsInt(final JsonObject jsonObject, f
}

public static Optional<String> getElementValueAsString(final JsonElement aJsonElement, final String... aElementIds) {
Optional<String> rs = Optional.empty();
Optional<JsonElement> rs = JsonUtils.getElement(aJsonElement, aElementIds);
if (rs.isPresent()) {
return Optional.of(rs.get().getAsString());
}
return Optional.empty();
}

public static Optional<JsonElement> getElement(final JsonElement aJsonElement, final String... aElementIds) {
Optional<JsonElement> rs = Optional.empty();
if (aElementIds == null || aElementIds.length == 0) {
return rs;
}
JsonObject aJsonObject = aJsonElement.getAsJsonObject();
for (int i = 0; i < aElementIds.length-1; i++) {
String elementId = aElementIds[i];
Expand All @@ -91,7 +102,7 @@ public static Optional<String> getElementValueAsString(final JsonElement aJsonEl
//
String elementId = aElementIds[aElementIds.length-1];
if (aJsonObject != null && aJsonObject.has(elementId) && !aJsonObject.get(elementId).isJsonNull()) {
rs = Optional.of(aJsonObject.get(elementId).getAsString());
rs = Optional.of(aJsonObject.get(elementId));
}
//
return rs;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package de.mediathekview.mserver.base.webaccess;

import okhttp3.ConnectionPool;
import okhttp3.Headers;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
Expand All @@ -11,7 +12,12 @@
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;

import com.google.gson.Gson;
import com.google.gson.JsonElement;

import java.io.IOException;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;

import static jakarta.ws.rs.core.HttpHeaders.CONTENT_LENGTH;
Expand Down Expand Up @@ -41,11 +47,32 @@ public JsoupConnection(final int timeout, final int threadPoolSize) {
* @throws IOException If no connection to the url could be opened.
*/
public String requestBodyAsString(final String url) throws IOException {
return requestBodyAsString(url, null);

}
/**
* Request an url and receive the body as String. Add headers as a string map.
* @param url
* @param headerMap
* @return
* @throws IOException
*/
public String requestBodyAsString(final String url, final Map<String, String> headerMap) throws IOException {
int retry = 0;
int httpResponseCode;
final String responseString = "";
do {
final Request request = new Request.Builder().url(url).build();
okhttp3.Headers.Builder headerBuilder = new Headers.Builder();
if (headerMap != null) {
for (Entry<String, String> headerValue : headerMap.entrySet()) {
headerBuilder.add(headerValue.getKey(), headerValue.getValue());
}
}
Request request = new Request.Builder()
.url(url)
.headers(headerBuilder.build())
.build();

try (final Response response = client.newCall(request).execute()) {
httpResponseCode = response.code();
if (response.body() == null || httpResponseCode == 404 || httpResponseCode == 410) {
Expand All @@ -62,6 +89,17 @@ public String requestBodyAsString(final String url) throws IOException {
return responseString;
}

/**
* Request an url and receive the body as HTML JSOUP Document
*
* @param url The url to request.
* @return request body as HTML JSOUP Document
* @throws IOException If no connection to the url could be opened.
*/
public JsonElement requestBodyAsJsonElement(final String url, final Map<String, String> headerMap) throws IOException {
return new Gson().fromJson(requestBodyAsString(url, headerMap), JsonElement.class);
}

/**
* Request an url and receive the body as HTML JSOUP Document
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import de.mediathekview.mserver.crawler.dw.DwCrawler;
import de.mediathekview.mserver.crawler.funk.FunkCrawler;
import de.mediathekview.mserver.crawler.kika.KikaApiCrawler;
import de.mediathekview.mserver.crawler.orf.OrfCrawler;
import de.mediathekview.mserver.crawler.orfon.OrfOnCrawler;
import de.mediathekview.mserver.crawler.phoenix.PhoenixCrawler;
import de.mediathekview.mserver.crawler.sr.SrCrawler;
import de.mediathekview.mserver.crawler.srf.SrfCrawler;
Expand Down Expand Up @@ -519,8 +519,10 @@ private void initializeCrawler(final MServerConfigManager rootConfig) {
new KikaApiCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
crawlerMap.put(
Sender.DW, new DwCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
//crawlerMap.put(
// Sender.ORF, new OrfCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
crawlerMap.put(
Sender.ORF, new OrfCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
Sender.ORF, new OrfOnCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
crawlerMap.put(
Sender.PHOENIX,
new PhoenixCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ public Document requestBodyAsXmlDocument(String url) throws IOException {
* @return size of the response in KB or -1 in case we could not determine the size.
*/
public long determineFileSizeInKB(String url) {
getRateLimiter().acquire();
return getConnection().determineFileSize(url) / 1024;
}

Expand All @@ -203,7 +202,6 @@ public long determineFileSizeInKB(String url) {
* @return return true if the request was successfully processed by the server
*/
public boolean requestUrlExists(String url) {
getRateLimiter().acquire();
return getConnection().requestUrlExists(url);
}
/**
Expand Down

This file was deleted.

140 changes: 0 additions & 140 deletions src/main/java/de/mediathekview/mserver/crawler/orf/OrfCrawler.java

This file was deleted.

Loading

0 comments on commit 9630cb8

Please sign in to comment.