Skip to content

Commit

Permalink
fix: implement sharding compatible cleanup for various bits
Browse files Browse the repository at this point in the history
Signed-off-by: Robin Appelman <robin@icewind.nl>
  • Loading branch information
icewind1991 committed Aug 21, 2024
1 parent 7932b86 commit 6adafb0
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 81 deletions.
86 changes: 69 additions & 17 deletions apps/files/lib/BackgroundJob/DeleteOrphanedItems.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,34 +52,86 @@ public function run($argument) {
* @param string $typeCol
* @return int Number of deleted entries
*/
protected function cleanUp($table, $idCol, $typeCol) {
protected function cleanUp(string $table, string $idCol, string $typeCol): int {
$deletedEntries = 0;

$query = $this->connection->getQueryBuilder();
$query->select('t1.' . $idCol)
->from($table, 't1')
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid'))
->andWhere($query->expr()->isNull('t2.fileid'))
->groupBy('t1.' . $idCol)
->setMaxResults(self::CHUNK_SIZE);

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete($table)
->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid')));
->where($deleteQuery->expr()->eq($idCol, $deleteQuery->createParameter('objectid')));

if ($this->connection->getShardDefinition('filecache')) {
$sourceIdChunks = $this->getItemIds($table, $idCol, $typeCol, 1000);
foreach ($sourceIdChunks as $sourceIdChunk) {
$deletedSources = $this->findMissingSources($sourceIdChunk);
$deleteQuery->setParameter('objectid', $deletedSources, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}
} else {
$query = $this->connection->getQueryBuilder();
$query->select('t1.' . $idCol)
->from($table, 't1')
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid'))
->andWhere($query->expr()->isNull('t2.fileid'))
->groupBy('t1.' . $idCol)
->setMaxResults(self::CHUNK_SIZE);

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete($table)
->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid')));

$deletedInLastChunk = self::CHUNK_SIZE;
while ($deletedInLastChunk === self::CHUNK_SIZE) {
$chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
$deletedInLastChunk = count($chunk);
$deletedInLastChunk = self::CHUNK_SIZE;
while ($deletedInLastChunk === self::CHUNK_SIZE) {
$chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
$deletedInLastChunk = count($chunk);

$deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
$deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}
}

return $deletedEntries;
}

/**
* @param string $table
* @param string $idCol
* @param string $typeCol
* @param int $chunkSize
* @return \Iterator<int[]>
* @throws \OCP\DB\Exception
*/
private function getItemIds(string $table, string $idCol, string $typeCol, int $chunkSize): \Iterator {
$query = $this->connection->getQueryBuilder();
$query->select($idCol)
->from($table)
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->groupBy($idCol)
->andWhere($query->expr()->gt($idCol, $query->createParameter('min_id')))
->setMaxResults($chunkSize);

$minId = 0;
while (true) {
$query->setParameter('min_id', $minId);
$rows = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
if (count($rows) > 0) {
$minId = $rows[count($rows) - 1];
yield $rows;
} else {
break;
}
}
}

private function findMissingSources(array $ids): array {
$qb = $this->connection->getQueryBuilder();
$qb->select('fileid')
->from('filecache')
->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY)));
$found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
return array_diff($ids, $found);
}

/**
* Deleting orphaned system tag mappings
*
Expand Down
102 changes: 66 additions & 36 deletions apps/files/lib/Command/DeleteOrphanedFiles.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,34 +35,29 @@ protected function configure(): void {

public function execute(InputInterface $input, OutputInterface $output): int {
$deletedEntries = 0;
$fileIdsByStorage = [];

$query = $this->connection->getQueryBuilder();
$query->select('fc.fileid')
->from('filecache', 'fc')
->where($query->expr()->isNull('s.numeric_id'))
->leftJoin('fc', 'storages', 's', $query->expr()->eq('fc.storage', 's.numeric_id'))
->setMaxResults(self::CHUNK_SIZE);
$deletedStorages = array_diff($this->getReferencedStorages(), $this->getExistingStorages());

$deleteExtended = !$input->getOption('skip-filecache-extended');
if ($deleteExtended) {
$fileIdsByStorage = $this->getFileIdsForStorages($deletedStorages);
}

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete('filecache')
->where($deleteQuery->expr()->eq('fileid', $deleteQuery->createParameter('objectid')));
->where($deleteQuery->expr()->in('storage', $deleteQuery->createParameter('storage_ids')));

$deletedInLastChunk = self::CHUNK_SIZE;
while ($deletedInLastChunk === self::CHUNK_SIZE) {
$deletedInLastChunk = 0;
$result = $query->execute();
while ($row = $result->fetch()) {
$deletedInLastChunk++;
$deletedEntries += $deleteQuery->setParameter('objectid', (int) $row['fileid'])
->execute();
}
$result->closeCursor();
$deletedStorageChunks = array_chunk($deletedStorages, self::CHUNK_SIZE);
foreach ($deletedStorageChunks as $deletedStorageChunk) {
$deleteQuery->setParameter('storage_ids', $deletedStorageChunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}

$output->writeln("$deletedEntries orphaned file cache entries deleted");

if (!$input->getOption('skip-filecache-extended')) {
$deletedFileCacheExtended = $this->cleanupOrphanedFileCacheExtended();
if ($deleteExtended) {
$deletedFileCacheExtended = $this->cleanupOrphanedFileCacheExtended($fileIdsByStorage);
$output->writeln("$deletedFileCacheExtended orphaned file cache extended entries deleted");
}

Expand All @@ -72,28 +67,63 @@ public function execute(InputInterface $input, OutputInterface $output): int {
return self::SUCCESS;
}

private function cleanupOrphanedFileCacheExtended(): int {
$deletedEntries = 0;

private function getReferencedStorages(): array {
$query = $this->connection->getQueryBuilder();
$query->select('fce.fileid')
->from('filecache_extended', 'fce')
->leftJoin('fce', 'filecache', 'fc', $query->expr()->eq('fce.fileid', 'fc.fileid'))
->where($query->expr()->isNull('fc.fileid'))
->setMaxResults(self::CHUNK_SIZE);
$query->select('storage')
->from('filecache')
->groupBy('storage')
->runAcrossAllShards();
return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
}

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete('filecache_extended')
->where($deleteQuery->expr()->in('fileid', $deleteQuery->createParameter('idsToDelete')));
private function getExistingStorages(): array {
$query = $this->connection->getQueryBuilder();
$query->select('numeric_id')
->from('storages')
->groupBy('numeric_id');
return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
}

$result = $query->executeQuery();
while ($result->rowCount() > 0) {
$idsToDelete = $result->fetchAll(\PDO::FETCH_COLUMN);
/**
* @param int[] $storageIds
* @return array<int, int[]>
*/
private function getFileIdsForStorages(array $storageIds): array {
$query = $this->connection->getQueryBuilder();
$query->select('storage', 'fileid')
->from('filecache')
->where($query->expr()->in('storage', $query->createParameter('storage_ids')));

$result = [];
$storageIdChunks = array_chunk($storageIds, self::CHUNK_SIZE);
foreach ($storageIdChunks as $storageIdChunk) {
$query->setParameter('storage_ids', $storageIdChunk, IQueryBuilder::PARAM_INT_ARRAY);
$chunk = $query->executeQuery()->fetchAll();
foreach ($chunk as $row) {
$result[$row['storage']][] = $row['fileid'];
}
}
return $result;
}

$deleteQuery->setParameter('idsToDelete', $idsToDelete, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
/**
* @param array<int, int[]> $fileIdsByStorage
* @return int
*/
private function cleanupOrphanedFileCacheExtended(array $fileIdsByStorage): int {
$deletedEntries = 0;

$result = $query->executeQuery();
$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete('filecache_extended')
->where($deleteQuery->expr()->in('fileid', $deleteQuery->createParameter('file_ids')));

foreach ($fileIdsByStorage as $storageId => $fileIds) {
$deleteQuery->hintShardKey('storage', $storageId, true);
$fileChunks = array_chunk($fileIds, self::CHUNK_SIZE);
foreach ($fileChunks as $fileChunk) {
$deleteQuery->setParameter('file_ids', $fileChunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}
}

return $deletedEntries;
Expand Down
14 changes: 10 additions & 4 deletions apps/files/tests/Command/DeleteOrphanedFilesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,19 @@ protected function tearDown(): void {
}

protected function getFile($fileId) {
$stmt = $this->connection->executeQuery('SELECT * FROM `*PREFIX*filecache` WHERE `fileid` = ?', [$fileId]);
return $stmt->fetchAll();
$query = $this->connection->getQueryBuilder();
$query->select('*')
->from('filecache')
->where($query->expr()->eq('fileid', $query->createNamedParameter($fileId)));
return $query->executeQuery()->fetchAll();
}

protected function getMounts($storageId) {
$stmt = $this->connection->executeQuery('SELECT * FROM `*PREFIX*mounts` WHERE `storage_id` = ?', [$storageId]);
return $stmt->fetchAll();
$query = $this->connection->getQueryBuilder();
$query->select('*')
->from('mounts')
->where($query->expr()->eq('storage_id', $query->createNamedParameter($storageId)));
return $query->executeQuery()->fetchAll();
}

/**
Expand Down
41 changes: 41 additions & 0 deletions apps/files_sharing/lib/DeleteOrphanedSharesJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ public function __construct(
* @param array $argument unused argument
*/
public function run($argument) {
if ($this->db->getShardDefinition('filecache')) {
$this->shardingCleanup();
return;
}

$qbSelect = $this->db->getQueryBuilder();
$qbSelect->select('id')
->from('share', 's')
Expand Down Expand Up @@ -96,4 +101,40 @@ public function run($argument) {
}, $this->db);
} while ($deleted >= self::CHUNK_SIZE && $this->time->getTime() <= $cutOff);
}

private function shardingCleanup(): void {
$qb = $this->db->getQueryBuilder();
$qb->selectDistinct('file_source')
->from('share', 's');
$sourceFiles = $qb->executeQuery()->fetchAll(PDO::FETCH_COLUMN);

$deleteQb = $this->db->getQueryBuilder();
$deleteQb->delete('share')
->where(
$deleteQb->expr()->in('file_source', $deleteQb->createParameter('ids'), IQueryBuilder::PARAM_INT_ARRAY)
);

$chunks = array_chunk($sourceFiles, self::CHUNK_SIZE);
foreach ($chunks as $chunk) {
$deletedFiles = $this->findMissingSources($chunk);
$this->atomic(function () use ($deletedFiles, $deleteQb) {
$deleteQb->setParameter('ids', $deletedFiles, IQueryBuilder::PARAM_INT_ARRAY);
$deleted = $deleteQb->executeStatement();
$this->logger->debug("{deleted} orphaned share(s) deleted", [
'app' => 'DeleteOrphanedSharesJob',
'deleted' => $deleted,
]);
return $deleted;
}, $this->db);
}
}

private function findMissingSources(array $ids): array {
$qb = $this->db->getQueryBuilder();
$qb->select('fileid')
->from('filecache')
->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY)));
$found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
return array_diff($ids, $found);
}
}
4 changes: 2 additions & 2 deletions lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ public function executeStatement(?IDBConnection $connection = null): int {
return $this->builder->executeStatement($connection);
}

public function hintShardKey(string $column, mixed $value) {
$this->builder->hintShardKey($column, $value);
public function hintShardKey(string $column, mixed $value, bool $overwrite = false) {
$this->builder->hintShardKey($column, $value, $overwrite);
return $this;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/private/DB/QueryBuilder/QueryBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -1366,7 +1366,7 @@ public function quoteAlias($alias) {
return $this->helper->quoteColumnName($alias);
}

public function hintShardKey(string $column, mixed $value) {
public function hintShardKey(string $column, mixed $value, bool $overwrite = false) {
return $this;
}

Expand Down
6 changes: 5 additions & 1 deletion lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,11 @@ private function registerOrder(string $column, string $order): void {
];
}

public function hintShardKey(string $column, mixed $value) {
public function hintShardKey(string $column, mixed $value, bool $overwrite = false) {
if ($overwrite) {
$this->primaryKeys = [];
$this->shardKeys = [];
}
if ($this->shardDefinition?->isKey($column)) {
$this->primaryKeys[] = $value;
}
Expand Down
Loading

0 comments on commit 6adafb0

Please sign in to comment.