Skip to content

Commit

Permalink
Replace per-file libmagic strings with MIME types in v6 packages
Browse files Browse the repository at this point in the history
libmagic strings are useful for humans but less so for computers,
in particular when you start having things like

     PNG image data, 16 x 16, 8-bit/color RGBA, non-interlaced

All very fascinating but that level of image detail does not need to be
in package metadata. The libmagic strings also make the exact output
quite dependent on the exact libmagic version. They obviously want to
improve their output but for rpm's purposes, this is unwanted
instability. MIME types are far more predictable and also machine
processable in a whole different level, and that is what we'll use
for v6 packages.

We need to still produce fully compatible v4 packages though, and there are
external tools that look at the file class data in that tag, so we can't
just reuse the tag for MIME in v6 either. Which means we need to
duplicate all this goo, annoyingly. We get to drop some of it in v7,
one day...

Besides the concrete dictionary + per-file index header tags, add a
header extension to fill in gaps of data where possible and build into
a consumable format and add --filemime query alias for it, all very
very similar to --fileclass.

Add/adjust tests to match and show the difference between v4 and v6:
v4 packages have fileclass but only extension-populated mime types,
and v6 is the exact opposite.

Fixes: #1096
  • Loading branch information
pmatilai authored and ffesti committed Sep 24, 2024
1 parent 82bb832 commit 6191388
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 55 deletions.
40 changes: 32 additions & 8 deletions build/rpmfc.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,17 @@ struct rpmfc_s {
vector<rpmfcAttr> atypes; /*!< known file attribute types */

vector<string> fn; /*!< (no. files) file names */
vector<string> fmime;/*!< (no. files) file mime types */
vector<string> ftype;/*!< (no. files) file types */
ARGV_t *fattrs; /*!< (no. files) file attribute tokens */
vector<rpm_color_t> fcolor; /*!< (no. files) file colors */
vector<rpmsid> fmdictx;/*!< (no. files) file mime dictionary indices */
vector<rpmsid> fcdictx;/*!< (no. files) file class dictionary indices */
vector<uint32_t> fddictx;/*!< (no. files) file depends dictionary start */
vector<uint32_t> fddictn;/*!< (no. files) file depends dictionary no. entries */
vector<uint32_t> ddictx; /*!< (no. dependencies) file->dependency mapping */
rpmstrPool cdict; /*!< file class dictionary */
rpmstrPool mdict; /*!< file class dictionary */
rpmfcFileDeps fileDeps; /*!< file dependency mapping */

fattrHash fahash; /*!< attr:file mapping */
Expand Down Expand Up @@ -871,6 +874,7 @@ rpmfc rpmfcFree(rpmfc fc)
rpmdsFree(fd.dep);

rpmstrPoolFree(fc->cdict);
rpmstrPoolFree(fc->mdict);

rpmstrPoolFree(fc->pool);
delete fc;
Expand Down Expand Up @@ -1232,16 +1236,19 @@ rpmRC rpmfcClassify(rpmfc fc, ARGV_t argv, rpm_mode_t * fmode)
fc->nfiles = argvCount(argv);
fc->fn.assign(fc->nfiles, "");
fc->ftype.assign(fc->nfiles, "");
fc->fmime.assign(fc->nfiles, "");
fc->fattrs = (ARGV_t *)xcalloc(fc->nfiles, sizeof(*fc->fattrs));
fc->fcolor.assign(fc->nfiles, 0);
fc->fcdictx.assign(fc->nfiles, 0);
fc->fmdictx.assign(fc->nfiles, 0);

/* Initialize the per-file dictionary indices. */
fc->fddictx.assign(fc->nfiles, 0);
fc->fddictn.assign(fc->nfiles, 0);

/* Build (sorted) file class dictionary. */
/* Build (sorted) file class and mime dictionaries. */
fc->cdict = rpmstrPoolCreate();
fc->mdict = rpmstrPoolCreate();

#pragma omp parallel
{
Expand Down Expand Up @@ -1346,6 +1353,7 @@ rpmRC rpmfcClassify(rpmfc fc, ARGV_t argv, rpm_mode_t * fmode)
/* Add attributes based on file type and/or path */
rpmfcAttributes(fc, ix, ftype, fmime, s);

fc->fmime[ix] = fmime;
if (fcolor != RPMFC_WHITE && (fcolor & RPMFC_INCLUDE))
fc->ftype[ix] = ftype;

Expand All @@ -1364,8 +1372,10 @@ rpmRC rpmfcClassify(rpmfc fc, ARGV_t argv, rpm_mode_t * fmode)
/* Add to file class dictionary and index array */
for (int ix = 0; ix < fc->nfiles; ix++) {
const string & ftype = fc->ftype[ix];
const string & fmime = fc->fmime[ix];
/* Pool id's start from 1, for headers we want it from 0 */
fc->fcdictx[ix] = rpmstrPoolId(fc->cdict, ftype.c_str(), 1) - 1;
fc->fmdictx[ix] = rpmstrPoolId(fc->mdict, fmime.c_str(), 1) - 1;

if (ftype.empty())
fc->fwhite++;
Expand All @@ -1379,6 +1389,7 @@ rpmRC rpmfcClassify(rpmfc fc, ARGV_t argv, rpm_mode_t * fmode)
exit:
/* No more additions after this, freeze pool to minimize memory use */
rpmstrPoolFreeze(fc->cdict, 0);
rpmstrPoolFreeze(fc->mdict, 0);

return rc;
}
Expand Down Expand Up @@ -1690,14 +1701,27 @@ rpmRC rpmfcGenerateDepends(const rpmSpec spec, Package pkg)
/* Add per-file colors(#files) */
headerPutUint32(pkg->header, RPMTAG_FILECOLORS, fc->fcolor.data(), fc->nfiles);

/* Add classes(#classes) */
for (rpmsid id = 1; id <= rpmstrPoolNumStr(fc->cdict); id++) {
headerPutString(pkg->header, RPMTAG_CLASSDICT,
rpmstrPoolStr(fc->cdict, id));
}
if (pkg->rpmver >= 6) {
/* Add mime types(#mime types) */
for (rpmsid id = 1; id <= rpmstrPoolNumStr(fc->mdict); id++) {
headerPutString(pkg->header, RPMTAG_MIMEDICT,
rpmstrPoolStr(fc->mdict, id));
}

/* Add per-file mime types(#files) */
headerPutUint32(pkg->header, RPMTAG_FILEMIMEINDEX,
fc->fmdictx.data(), fc->nfiles);
} else {
/* Add classes(#classes) */
for (rpmsid id = 1; id <= rpmstrPoolNumStr(fc->cdict); id++) {
headerPutString(pkg->header, RPMTAG_CLASSDICT,
rpmstrPoolStr(fc->cdict, id));
}

/* Add per-file classes(#files) */
headerPutUint32(pkg->header, RPMTAG_FILECLASS, fc->fcdictx.data(), fc->nfiles);
/* Add per-file classes(#files) */
headerPutUint32(pkg->header, RPMTAG_FILECLASS,
fc->fcdictx.data(), fc->nfiles);
}

/* Add dependency dictionary(#dependencies) */
if (!fc->ddictx.empty()) {
Expand Down
6 changes: 4 additions & 2 deletions docs/manual/tags.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ Dirnames | 1118 | string array | dirname(3) components of contained pat
Filedigestalgo | 5011 | int32 | ID of file digest algorithm. If missing, considered `0` for `md5`.
Longarchivesize | 271 | int64 | (Uncompressed) payload size when > 4GB.
Longsize | 5009 | int64 | Installed package size when > 4GB.
Mimedict | 5116 | int32 | Dictionary of MIME types, only >= v6.
Payloadcompressor | 1125 | string | Payload compressor name (as passed to rpmio `Fopen()`)
Payloadflags | 1126 | string | Payload compressor level (as passed to rpmio `Fopen()`)
Payloadformat | 1124 | string | Payload format (`cpio`)
Expand All @@ -109,6 +110,7 @@ Filegroupname | 1040 | string array | Unix group name.
Fileinodes | 1096 | int32 array | Abstract inode number (hardlink calculation only).
Filelangs | 1097 | string array | Optional language of the file (eg man page translations)
Filelinktos | 1036 | string array | Symlink target for symlink files.
Filemimeindex | 5115 | int32 array | Index into MIME dictionary (see Mimedict tag), only >= v6.
Filemodes | 1030 | int16 array | Unix file mode.
Filemtimes | 1034 | int32 array | Unix file modification timestamp (aka mtime).
Filerdevs | 1033 | int16 array | Device ID (of device files)
Expand All @@ -121,10 +123,10 @@ Longfilesizes | 5008 | int64 array | File size (when files > 4GB are present)

Tag Name | Value| Type | Description
--------------------|------|--------------|------------
Classdict | 1142 | string array | File class (libmagic string) dictionary
Classdict | 1142 | string array | File class (libmagic string) dictionary (only v4)
Dependsdict | 1145 | int32 array | File dependencies dictionary
Filecaps | 5010 | string array | `cap_to_text(3)` textual representation of file capabilities.
Fileclass | 1141 | int32 array | Index into Classdict
Fileclass | 1141 | int32 array | Index into Classdict (only v4)
Filecolors | 1140 | int32 array | File "color" - 1 for 32bit ELF, 2 for 64bit ELF and 0 otherwise
Filedependsn | 1144 | int32 array | Number of file dependencies in Dependsdict, starting from Filedependsx
Filedependsx | 1143 | int32 array | Index into Dependsdict denoting start of this file's dependencies.
Expand Down
9 changes: 8 additions & 1 deletion include/rpm/rpmfi.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,12 +237,19 @@ rpm_color_t rpmfiColor(rpmfi fi);
rpm_color_t rpmfiFColor(rpmfi fi);

/** \ingroup rpmfi
* Return current file class from file info set iterator.
* Return current file class from file info set iterator (v4 packages).
* @param fi file info set iterator
* @return current file class, 0 on invalid
*/
const char * rpmfiFClass(rpmfi fi);

/** \ingroup rpmfi
* Return current file mime type from file info set iterator (v6 packages)
* @param fi file info set iterator
* @return current file mime type, 0 on invalid
*/
const char * rpmfiFMime(rpmfi fi);

/** \ingroup rpmfi
* Return current file depends dictionary from file info set iterator.
* @param fi file info set iterator
Expand Down
19 changes: 14 additions & 5 deletions include/rpm/rpmfiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ enum rpmfiFlags_e {
RPMFI_NOFILEFLAGS = (1 << 17),
RPMFI_NOFILESIGNATURES = (1 << 18),
RPMFI_NOVERITYSIGNATURES = (1 << 19),
RPMFI_NOFILEMIME = (1 << 20),
};

typedef rpmFlags rpmfiFlags;
Expand All @@ -161,14 +162,14 @@ typedef rpmFlags rpmfiFlags;
(RPMFI_NOFILECLASS | RPMFI_NOFILELANGS | \
RPMFI_NOFILEMTIMES | RPMFI_NOFILERDEVS | \
RPMFI_NOFILESIGNATURES | RPMFI_NOVERITYSIGNATURES | \
RPMFI_NOFILEVERIFYFLAGS)
RPMFI_NOFILEVERIFYFLAGS | RPMFI_NOFILEMIME)

#define RPMFI_FLAGS_INSTALL \
(RPMFI_NOFILECLASS | RPMFI_NOFILEVERIFYFLAGS)
(RPMFI_NOFILECLASS | RPMFI_NOFILEVERIFYFLAGS | RPMFI_NOFILEMIME)

#define RPMFI_FLAGS_VERIFY \
(RPMFI_NOFILECLASS | RPMFI_NOFILEDEPS | RPMFI_NOFILELANGS | \
RPMFI_NOFILECOLORS)
RPMFI_NOFILECOLORS | RPMFI_NOFILEMIME)

#define RPMFI_FLAGS_QUERY \
(RPMFI_NOFILECLASS | RPMFI_NOFILEDEPS | RPMFI_NOFILELANGS | \
Expand All @@ -181,7 +182,7 @@ typedef rpmFlags rpmfiFlags;
RPMFI_NOFILEDIGESTS | RPMFI_NOFILEMTIMES | RPMFI_NOFILERDEVS | \
RPMFI_NOFILEINODES | RPMFI_NOFILECOLORS | \
RPMFI_NOFILESIGNATURES | RPMFI_NOVERITYSIGNATURES | \
RPMFI_NOFILEVERIFYFLAGS | RPMFI_NOFILEFLAGS)
RPMFI_NOFILEVERIFYFLAGS | RPMFI_NOFILEFLAGS | RPMFI_NOFILEMIME)

#define RPMFI_FLAGS_ONLY_FILENAMES \
(RPMFI_FLAGS_FILETRIGGER | RPMFI_NOFILESTATES)
Expand Down Expand Up @@ -401,13 +402,21 @@ rpm_loff_t rpmfilesFSize(rpmfiles fi, int ix);
rpm_color_t rpmfilesFColor(rpmfiles fi, int ix);

/** \ingroup rpmfiles
* Return file class from file info set.
* Return file class from file info set (v4 packages)
* @param fi file info set
* @param ix file index
* @return file class, 0 on invalid
*/
const char * rpmfilesFClass(rpmfiles fi, int ix);

/** \ingroup rpmfiles
* Return file mime type from file info set (v6 packages)
* @param fi file info set
* @param ix file index
* @return file mime type, 0 on invalid
*/
const char * rpmfilesFMime(rpmfiles fi, int ix);

/** \ingroup rpmfiles
* Return file depends dictionary from file info set.
* @param fi file info set
Expand Down
3 changes: 3 additions & 0 deletions include/rpm/rpmtag.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ typedef enum rpmTag_e {
RPMTAG_PAYLOADSIZE = 5112, /* l */
RPMTAG_PAYLOADSIZEALT = 5113, /* l */
RPMTAG_RPMFORMAT = 5114, /* i */
RPMTAG_FILEMIMEINDEX = 5115, /* i[] */
RPMTAG_MIMEDICT = 5116, /* s[] */
RPMTAG_FILEMIMES = 5117, /* s[] extension */

RPMTAG_FIRSTFREE_TAG /*!< internal */
} rpmTag;
Expand Down
25 changes: 25 additions & 0 deletions lib/rpmfi.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ struct rpmfiles_s {
rpm_count_t ncdict; /*!< No. of class entries. */
uint32_t * fcdictx; /*!< File class dictionary index (header) */

char ** mdict; /*!< File mime dictionary (header) */
rpm_count_t nmdict; /*!< No. of mime entries. */
uint32_t * fmdictx; /*!< File mime dictionary index (header) */

uint32_t * ddict; /*!< File depends dictionary (header) */
rpm_count_t nddict; /*!< No. of depends entries. */
uint32_t * fddictx; /*!< File depends dictionary start (header) */
Expand Down Expand Up @@ -689,6 +693,19 @@ const char * rpmfilesFClass(rpmfiles fi, int ix)
return fclass;
}

const char * rpmfilesFMime(rpmfiles fi, int ix)
{
const char * fmime = NULL;
int mdictx;

if (fi != NULL && fi->fmdictx != NULL && ix >= 0 && ix < rpmfilesFC(fi)) {
mdictx = fi->fmdictx[ix];
if (fi->mdict != NULL && mdictx >= 0 && mdictx < fi->nmdict)
fmime = fi->mdict[mdictx];
}
return fmime;
}

uint32_t rpmfilesFDepends(rpmfiles fi, int ix, const uint32_t ** fddictp)
{
int fddictx = -1;
Expand Down Expand Up @@ -1244,6 +1261,7 @@ rpmfiles rpmfilesFree(rpmfiles fi)
fi->fcaps = _free(fi->fcaps);

fi->cdict = _free(fi->cdict);
fi->mdict = _free(fi->mdict);

fi->fuser = _free(fi->fuser);
fi->fgroup = _free(fi->fgroup);
Expand All @@ -1264,6 +1282,7 @@ rpmfiles rpmfilesFree(rpmfiles fi)

fi->fcolors = _free(fi->fcolors);
fi->fcdictx = _free(fi->fcdictx);
fi->fmdictx = _free(fi->fmdictx);
fi->ddict = _free(fi->ddict);
fi->fddictx = _free(fi->fddictx);
fi->fddictn = _free(fi->fddictn);
Expand Down Expand Up @@ -1590,6 +1609,11 @@ static int rpmfilesPopulate(rpmfiles fi, Header h, rpmfiFlags flags)
fi->ncdict = rpmtdCount(&td);
_hgfi(h, RPMTAG_FILECLASS, &td, scareFlags, fi->fcdictx);
}
if (!(flags & RPMFI_NOFILEMIME)) {
_hgfinc(h, RPMTAG_MIMEDICT, &td, scareFlags, fi->mdict);
fi->nmdict = rpmtdCount(&td);
_hgfi(h, RPMTAG_FILEMIMEINDEX, &td, scareFlags, fi->fmdictx);
}
if (!(flags & RPMFI_NOFILEDEPS)) {
_hgfinc(h, RPMTAG_DEPENDSDICT, &td, scareFlags, fi->ddict);
fi->nddict = rpmtdCount(&td);
Expand Down Expand Up @@ -1870,6 +1894,7 @@ RPMFI_ITERFUNC(const char *, FGroup, i)
RPMFI_ITERFUNC(const char *, FCaps, i)
RPMFI_ITERFUNC(const char *, FLangs, i)
RPMFI_ITERFUNC(const char *, FClass, i)
RPMFI_ITERFUNC(const char *, FMime, i)
RPMFI_ITERFUNC(rpmfileState, FState, i)
RPMFI_ITERFUNC(rpmfileAttrs, FFlags, i)
RPMFI_ITERFUNC(rpmVerifyAttrs, VFlags, i)
Expand Down
45 changes: 45 additions & 0 deletions lib/tagexts.c
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,50 @@ static int fileclassTag(Header h, rpmtd td, headerGetFlags hgflags)
return ftypeTag(h, td, hgflags, makeFClass);
}

/*
* Attempt to generate file mime type if missing from header:
* we can easily generate this for symlinks and other special types.
* Always return malloced strings to simplify life in filemimeTag().
*/
static char *makeFMime(rpmfi fi)
{
char *fmime = NULL;
const char *hm = rpmfiFMime(fi);

if (hm != NULL && hm[0] != '\0') {
fmime = xstrdup(hm);
} else {
switch (rpmfiFMode(fi) & S_IFMT) {
case S_IFBLK:
fmime = xstrdup("inode/blockdevice");
break;
case S_IFCHR:
fmime = xstrdup("inode/chardevice");
break;
case S_IFDIR:
fmime = xstrdup("inode/directory");
break;
case S_IFIFO:
fmime = xstrdup("inode/fifo");
break;
case S_IFSOCK:
fmime = xstrdup("inode/socket");
break;
case S_IFLNK:
fmime = xstrdup("inode/symlink");
break;
}
}

return (fmime != NULL) ? fmime : xstrdup("");
}

/* Retrieve/generate file mime types */
static int filemimesTag(Header h, rpmtd td, headerGetFlags hgflags)
{
return ftypeTag(h, td, hgflags, makeFMime);
}

/**
* Retrieve file provides.
* @param h header
Expand Down Expand Up @@ -1048,6 +1092,7 @@ static const struct headerTagFunc_s rpmHeaderTagExtensions[] = {
{ RPMTAG_CONFLICTNEVRS, conflictnevrsTag },
{ RPMTAG_FILENLINKS, filenlinksTag },
{ RPMTAG_SYSUSERS, sysusersTag },
{ RPMTAG_FILEMIMES, filemimesTag },
{ 0, NULL }
};

Expand Down
3 changes: 3 additions & 0 deletions rpmpopt.in
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ rpm alias --filesbypkg --qf '[%-25{=NAME} %{FILENAMES}\n]' \
rpm alias --fileclass --qf '[%{FILENAMES}\t%{FILECLASS}\n]' \
--POPTdesc=$"list file names with their classes"

rpm alias --filemime --qf '[%{FILENAMES}\t%{FILEMIMES}\n]' \
--POPTdesc=$"list file names with their mime types"

rpm alias --filecolor --qf '[%{FILENAMES}\t%{FILECOLORS}\n]' \
--POPTdesc=$"list file names with their colors"

Expand Down
Loading

0 comments on commit 6191388

Please sign in to comment.