Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARTE: migrate to new category api #918

Merged
merged 3 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
group = 'de.mediathekview'
archivesBaseName = "MServer"
version = '3.1.218'
version = '3.1.219'

def jarName = 'MServer.jar'
def mainClass = 'mServer.Main'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
*/
public class ArteCategoryFilmListDeserializer implements JsonDeserializer<ArteCategoryFilmsDTO> {

private static final String JSON_ELEMENT_CONTENT = "content";
private static final String JSON_ELEMENT_DATA = "data";
private static final String JSON_ELEMENT_NEXTPAGE = "nextPage";
private static final String JSON_ELEMENT_PROGRAMID = "programId";
private static final String JSON_ELEMENT_VALUE = "value";
private static final String JSON_ELEMENT_ZONES = "zones";

@Override
public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException {
Expand All @@ -29,36 +30,37 @@ public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, Js
if(aJsonElement.getAsJsonObject().has(JSON_ELEMENT_VALUE)) {
rootElement = aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VALUE);
}
final JsonElement dataElement = rootElement.getAsJsonObject().get(JSON_ELEMENT_DATA);
if (dataElement == null || dataElement.isJsonNull() || !dataElement.isJsonArray()) {
Log.errorLog(12834940, "data element not found");
final JsonElement zoneElement = rootElement.getAsJsonObject().get(JSON_ELEMENT_ZONES);
if (zoneElement == null || zoneElement.isJsonNull() || !zoneElement.isJsonArray()) {
Log.errorLog(12834940, "zones element not found");
return dto;
}

for (JsonElement jsonElement : dataElement.getAsJsonArray()) {
String programId = jsonElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).getAsString();
if (programId != null) {
if (programId.startsWith("RC-")) {
try {
long collectionId = Long.parseLong(programId.replace("RC-", ""));
dto.addCollection(String.format("RC-%06d", collectionId));
} catch (NumberFormatException e) {
Log.errorLog(12834939, "Invalid collection id: " + programId);
for (JsonElement jsonElement : zoneElement.getAsJsonArray()) {
if(jsonElement.getAsJsonObject().has(JSON_ELEMENT_CONTENT)) {
final JsonObject contentObject = jsonElement.getAsJsonObject().get(JSON_ELEMENT_CONTENT).getAsJsonObject();
if (contentObject.has(JSON_ELEMENT_DATA)) {
for(JsonElement dataElement : contentObject.get(JSON_ELEMENT_DATA).getAsJsonArray()) {
String programId = dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).getAsString();
if (programId != null) {
if (programId.startsWith("RC-")) {
try {
long collectionId = Long.parseLong(programId.replace("RC-", ""));
dto.addCollection(String.format("RC-%06d", collectionId));
} catch (NumberFormatException e) {
Log.errorLog(12834939, "Invalid collection id: " + programId);
}
} else {
dto.addProgramId(programId);
}
}
}
} else {
dto.addProgramId(programId);
}
}
}

dto.setNextPage(hasNextPage(rootElement.getAsJsonObject()));
dto.setNextPage(false);

return dto;
}

private static boolean hasNextPage(JsonObject aJsonObject) {

JsonElement nextPageElement = aJsonObject.get(JSON_ELEMENT_NEXTPAGE);
return !nextPageElement.isJsonNull();
}
}
58 changes: 20 additions & 38 deletions src/main/java/mServer/crawler/sender/arte/MediathekArte.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,23 @@ public class MediathekArte extends MediathekReader {
private static final Logger LOG = LogManager.getLogger(MediathekArte.class);
private static final String ARTE_API_TAG_URL_PATTERN = "https://api.arte.tv/api/opa/v3/videos?channel=%s&arteSchedulingDay=%s";

private static final String URL_SUBCATEGORY
= "https://www.arte.tv/api/rproxy/emac/v3/%s/web/data/MOST_RECENT_SUBCATEGORY/?subCategoryCode=%s&page=%s&limit=50";

private static final String[] SUBCATEGORIES = new String[]{
"WEB", "AUT",
"AJO", "AUV", "KUL", "DCY", "ENQ", "JUN",
"ACC", "CMG", "FLM", "CMU", "MCL",
"CHU", "FIC", "SES",
"ART", "POP", "IDE",
"ADS", "BAR", "CLA", "JAZ", "MUA", "MUD", "OPE", "MUE", "HIP", "MET",
"ENB", "ENN", "SAN", "TEC",
"ATA", "EVA", "NEA", "VIA",
"CIV", "LGP", "XXE"
private static final String URL_CATEGORY = "https://www.arte.tv/api/rproxy/emac/v4/%s/web/pages/%s";

private static final String[] CATEGORIES = {
"ARS",
"DOR",
"CIN",
"SER",
"ACT",
"CPO",
"SCI",
"DEC",
"HIS"
};

private static final String COLLECTION_URL = "https://api.arte.tv/api/opa/v3/programs/%s/%s";

private static final DateTimeFormatter ARTE_API_DATEFORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static final boolean PARSE_SUBCATEGORY_SUB_PAGES = false; // Flag, ob Unterseiten der Unterkategorien verarbeitet werden soll

public static final String ARTE_EN = "ARTE.EN";
public static final String ARTE_ES = "ARTE.ES";
Expand All @@ -86,7 +84,7 @@ protected synchronized void meldungStart() {
super.meldungStart();

senderLanguages.put(Const.ARTE_DE, "de");
senderLanguages.put(Const.ARTE_FR, "fr");
// senderLanguages.put(Const.ARTE_FR, "fr");
/*if (LocalDate.now().getDayOfYear() % 2 == 0) {
senderLanguages.put(ARTE_EN, "en");
senderLanguages.put(ARTE_ES, "es");
Expand Down Expand Up @@ -146,9 +144,9 @@ public void addToList() {

private void addCategories() {
senderLanguages.forEach((sender, langCode) -> {
for (String subCategory : SUBCATEGORIES) {
String subCategoryUrl = String.format(URL_SUBCATEGORY, langCode.toLowerCase(), subCategory, 1);
listeThemen.add(new String[]{sender, langCode, subCategory, subCategoryUrl});
for (String category : CATEGORIES) {
String categoryUrl = String.format(URL_CATEGORY, langCode.toLowerCase(), category);
listeThemen.add(new String[]{sender, langCode, category, categoryUrl});
}
});
}
Expand Down Expand Up @@ -213,15 +211,15 @@ public void run() {
String[] link;
while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) {
meldungProgress(link[2] + "/" + link[3] /* url */);
loadSubCategory(link[0], link[1], link[2], link[3]);
loadCategory(link[0], link[1], link[2], link[3]);
}
} catch (Exception ex) {
Log.errorLog(894330854, ex, "");
}
meldungThreadUndFertig();
}

private void loadSubCategory(String sender, String langCode, String aCategory, String aUrl) {
private void loadCategory(String sender, String langCode, String aCategory, String aUrl) {
Gson gson = new GsonBuilder()
.registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCategoryFilmListDeserializer())
.create();
Expand All @@ -232,29 +230,13 @@ private void loadSubCategory(String sender, String langCode, String aCategory, S
.registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCollectionChildDeserializer())
.create();

// erste Seite laden
int i = 2;
ArteCategoryFilmsDTO dto = loadSubCategoryPage(gson, sender, aUrl);
if (dto != null) {
loadCollections(sender, langCode, gsonCollectionParent, gsonCollectionChild, dto);

ArteCategoryFilmsDTO nextDto = dto;
while (PARSE_SUBCATEGORY_SUB_PAGES && nextDto != null && nextDto.hasNextPage()) {

// weitere Seiten laden und zu programId-liste des ersten DTO hinzufügen
String url = String.format(URL_SUBCATEGORY, langCode.toLowerCase(), aCategory, i);
nextDto = loadSubCategoryPage(gson, sender, url);
if (nextDto != null) {
loadCollections(sender, langCode, gsonCollectionParent, gsonCollectionChild, nextDto);
nextDto.getProgramIds().forEach(programId -> dto.addProgramId(programId));
}

i++;
}

Log.sysLog(String.format("%s: %d, %d", aCategory, dto.getProgramIds().size(), dto.getCollectionIds().size()));
// alle programIds verarbeiten
ListeFilme loadedFilme = loadPrograms(sender, langCode, dto);
loadedFilme.forEach((film) -> addFilm(film));
loadedFilme.forEach(film -> addFilm(film));
Log.sysLog(String.format("%s: Subcategory %s: %d Filme", sender, aCategory, loadedFilme.size()));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ public ArteCategoryFilmListDeserializerTest(String aJsonFile, String[] aProgramI
@Parameterized.Parameters
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][]{
{"/arte/arte_subcategory_old_page1.json", new String[]{"078666-012-A", "078664-000-A", "080928-000-A", "074484-000-A", "074485-000-A", "079479-002-A", "080921-000-A", "082406-000-A", "072391-000-A", "080920-000-A"}, true},
{"/arte/arte_subcategory_old_page_last.json", new String[]{"062866-009-A"}, false},
{"/arte/arte_subcategory_page.json", new String[]{"107023-009-A","086862-000-A","107342-038-A","081587-000-A","072442-000-A"}, true}
{"/arte/arte_category.json", new String[]{"112511-000-A", "047389-000-A", "109066-000-A", "082669-000-A", "003982-000-A", "021109-000-A"}, false},
});
}

Expand Down
Loading
Loading