Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add usage collection for savedObject tagging #83160

Merged
merged 9 commits into from
Nov 20, 2020
3 changes: 2 additions & 1 deletion x-pack/plugins/saved_objects_tagging/kibana.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"ui": true,
"configPath": ["xpack", "saved_object_tagging"],
"requiredPlugins": ["features", "management", "savedObjectsTaggingOss"],
"requiredBundles": ["kibanaReact"]
"requiredBundles": ["kibanaReact"],
"optionalPlugins": ["usageCollection"]
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ export const registerRoutesMock = jest.fn();
jest.doMock('./routes', () => ({
registerRoutes: registerRoutesMock,
}));

export const createTagUsageCollectorMock = jest.fn();
jest.doMock('./usage', () => ({
createTagUsageCollector: createTagUsageCollectorMock,
}));
27 changes: 26 additions & 1 deletion x-pack/plugins/saved_objects_tagging/server/plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,32 @@
* you may not use this file except in compliance with the Elastic License.
*/

import { registerRoutesMock } from './plugin.test.mocks';
import { registerRoutesMock, createTagUsageCollectorMock } from './plugin.test.mocks';

import { coreMock } from '../../../../src/core/server/mocks';
import { featuresPluginMock } from '../../features/server/mocks';
import { usageCollectionPluginMock } from '../../../../src/plugins/usage_collection/server/mocks';
import { SavedObjectTaggingPlugin } from './plugin';
import { savedObjectsTaggingFeature } from './features';

describe('SavedObjectTaggingPlugin', () => {
let plugin: SavedObjectTaggingPlugin;
let featuresPluginSetup: ReturnType<typeof featuresPluginMock.createSetup>;
let usageCollectionSetup: ReturnType<typeof usageCollectionPluginMock.createSetupContract>;

beforeEach(() => {
plugin = new SavedObjectTaggingPlugin(coreMock.createPluginInitializerContext());
featuresPluginSetup = featuresPluginMock.createSetup();
usageCollectionSetup = usageCollectionPluginMock.createSetupContract();
// `usageCollection` 'mocked' implementation use the real `CollectorSet` implementation
// that throws when registering things that are not collectors.
// We just want to assert that it was called here, so jest.fn is fine.
usageCollectionSetup.registerCollector = jest.fn();
});

afterEach(() => {
registerRoutesMock.mockReset();
createTagUsageCollectorMock.mockReset();
});

describe('#setup', () => {
Expand All @@ -43,5 +55,18 @@ describe('SavedObjectTaggingPlugin', () => {
savedObjectsTaggingFeature
);
});

it('registers the usage collector if `usageCollection` is present', async () => {
const tagUsageCollector = Symbol('saved_objects_tagging');
createTagUsageCollectorMock.mockReturnValue(tagUsageCollector);

await plugin.setup(coreMock.createSetup(), {
features: featuresPluginSetup,
usageCollection: usageCollectionSetup,
});

expect(usageCollectionSetup.registerCollector).toHaveBeenCalledTimes(1);
expect(usageCollectionSetup.registerCollector).toHaveBeenCalledWith(tagUsageCollector);
});
});
});
31 changes: 27 additions & 4 deletions x-pack/plugins/saved_objects_tagging/server/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,36 @@
* you may not use this file except in compliance with the Elastic License.
*/

import { CoreSetup, CoreStart, PluginInitializerContext, Plugin } from 'src/core/server';
import { Observable } from 'rxjs';
import {
CoreSetup,
CoreStart,
PluginInitializerContext,
Plugin,
SharedGlobalConfig,
} from 'src/core/server';
import { PluginSetupContract as FeaturesPluginSetup } from '../../features/server';
import { UsageCollectionSetup } from '../../../../src/plugins/usage_collection/server';
import { savedObjectsTaggingFeature } from './features';
import { tagType } from './saved_objects';
import { ITagsRequestHandlerContext } from './types';
import { registerRoutes } from './routes';
import { TagsRequestHandlerContext } from './request_handler_context';
import { registerRoutes } from './routes';
import { createTagUsageCollector } from './usage';

interface SetupDeps {
features: FeaturesPluginSetup;
usageCollection?: UsageCollectionSetup;
}

export class SavedObjectTaggingPlugin implements Plugin<{}, {}, SetupDeps, {}> {
constructor(context: PluginInitializerContext) {}
private readonly legacyConfig$: Observable<SharedGlobalConfig>;

public setup({ savedObjects, http }: CoreSetup, { features }: SetupDeps) {
constructor(context: PluginInitializerContext) {
this.legacyConfig$ = context.config.legacy.globalConfig$;
}

public setup({ savedObjects, http }: CoreSetup, { features, usageCollection }: SetupDeps) {
savedObjects.registerType(tagType);

const router = http.createRouter();
Expand All @@ -34,6 +48,15 @@ export class SavedObjectTaggingPlugin implements Plugin<{}, {}, SetupDeps, {}> {

features.registerKibanaFeature(savedObjectsTaggingFeature);

if (usageCollection) {
usageCollection.registerCollector(
createTagUsageCollector({
usageCollection,
legacyConfig$: this.legacyConfig$,
})
);
}

return {};
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

import { ElasticsearchClient } from 'src/core/server';
import { TaggingUsageData, ByTypeTaggingUsageData } from './types';

/**
* Manual type reflection of the `tagDataAggregations` resulting payload
*/
interface AggregatedTagUsageResponseBody {
aggregations: {
by_type: {
buckets: Array<{
key: string;
doc_count: number;
nested_ref: {
tag_references: {
doc_count: number;
tag_id: {
buckets: Array<{
key: string;
doc_count: number;
}>;
};
};
};
}>;
};
};
}

export const fetchTagUsageData = async ({
esClient,
kibanaIndex,
}: {
esClient: ElasticsearchClient;
kibanaIndex: string;
}): Promise<TaggingUsageData> => {
const { body } = await esClient.search<AggregatedTagUsageResponseBody>({
index: [kibanaIndex],
ignore_unavailable: true,
filter_path: 'aggregations',
body: {
size: 0,
query: {
bool: {
must: [hasTagReferenceClause],
},
},
aggs: tagDataAggregations,
},
});
rudolf marked this conversation as resolved.
Show resolved Hide resolved

const byTypeUsages: Record<string, ByTypeTaggingUsageData> = {};
const allUsedTags = new Set<string>();
let totalTaggedObjects = 0;

const typeBuckets = body.aggregations.by_type.buckets;
typeBuckets.forEach((bucket) => {
const type = bucket.key;
const taggedDocCount = bucket.doc_count;
const usedTagIds = bucket.nested_ref.tag_references.tag_id.buckets.map(
(tagBucket) => tagBucket.key
);

totalTaggedObjects += taggedDocCount;
usedTagIds.forEach((tagId) => allUsedTags.add(tagId));

byTypeUsages[type] = {
taggedObjects: taggedDocCount,
usedTags: usedTagIds.length,
};
});

return {
usedTags: allUsedTags.size,
taggedObjects: totalTaggedObjects,
types: byTypeUsages,
};
};

const hasTagReferenceClause = {
nested: {
path: 'references',
query: {
bool: {
must: [
{
term: {
'references.type': 'tag',
},
},
],
},
},
},
};

const tagDataAggregations = {
by_type: {
terms: {
field: 'type',
},
aggs: {
nested_ref: {
nested: {
path: 'references',
},
aggs: {
tag_references: {
filter: {
term: {
'references.type': 'tag',
},
},
aggs: {
tag_id: {
terms: {
field: 'references.id',
},
},
},
},
},
},
},
},
};
7 changes: 7 additions & 0 deletions x-pack/plugins/saved_objects_tagging/server/usage/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

export { createTagUsageCollector } from './tag_usage_collector';
23 changes: 23 additions & 0 deletions x-pack/plugins/saved_objects_tagging/server/usage/schema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

import { MakeSchemaFrom } from '../../../../../src/plugins/usage_collection/server';
import { TaggingUsageData, ByTypeTaggingUsageData } from './types';

const perTypeSchema: MakeSchemaFrom<ByTypeTaggingUsageData> = {
usedTags: { type: 'integer' },
taggedObjects: { type: 'integer' },
};

export const tagUsageCollectorSchema: MakeSchemaFrom<TaggingUsageData> = {
usedTags: { type: 'integer' },
taggedObjects: { type: 'integer' },

types: {
dashboard: perTypeSchema,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we collect tags usage data for more than just these two saved object types. I think we should try to avoid having this kind of dynamic fields in our schema and it will also increase the field count on our telemetry cluster.

What about:

perSavedObjectType: {
  usedTags: { type: 'integer' },
  taggedObjects: { type: 'integer' },
  savedObjectType: {type: 'string'},
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perSavedObjectType: { ...

Should we use a nested or array type in that case?

That would work. However I would be sure that is would be alright to the way telemetry consumes / uses the data.

When I look at the other collectors, none seems to be used nested fields, which is why I'm asking:

export const applicationUsageSchema = {
// OSS
dashboards: commonSchema,
dev_tools: commonSchema,
discover: commonSchema,
home: commonSchema,
kibana: commonSchema, // It's a forward app so we'll likely never report it
management: commonSchema,

note that this could only be a design flaw when the collector was implemented. @elastic/kibana-telemetry?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey! This is a legacy issue that no longer applies: in the past, before having the schema implementation, the indexer in the remote service indexed the data in the same format it received it from Kibana. This meant that we had to keep the mappings in mind when developing the collectors (and nested was never an option because Kibana does not natively support viz with that type yet).
New collectors/fields shouldn't have those contraints anymore because the new indexer actually reshapes the payloads and splits the info into multiple indices based on the different needs to consume the information by our PMs and Analysts.
If we foresee the number of keys growing dynamically, I think the array approach would be best.

cc @mindbat so he can share his thoughts from the indexer-maintainer POV 🙂

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New collectors/fields shouldn't have those contraints anymore because the new indexer actually reshapes the payloads and splits the info into multiple indices based on the different needs to consume the information by our PMs and Analysts.

@afharo: We're not sending Kibana data to Telemetry Next yet and the legacy indexer doesn't reshape data by default. We would still need to update the indexer to consume the new data, reshape it and index it into a custom extended index.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So should we stick with the static, per type, mappings for now?

Copy link
Contributor

@rudolf rudolf Nov 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if we just index data as-is, can you elaborate why we couldn't use an array field type for the mappings?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and nested was never an option because Kibana does not natively support viz with that type yet

I missed @afharo's comment and I assume this applies to array fields too #58175

It makes sense that we want to do some processing inside Telemetry Next, but I think it would be much more flexible if Kibana had more control around which documents are created. If telemetry is uploaded as NDJSON then usage collectors could return several documents instead of always having a single large payload that needs to be handled and split by the server. This is probably a big effort, but maintaining a static schema for dynamic data like applications and saved object types that change every release feels like a maintenance nightmare.

So I guess for this PR we just need to add all the known saved object types to the mappings.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I guess for this PR we just need to add all the known saved object types to the mappings.

We only need to add types that currently supports the tagging feature. That would just be dashboard, visualizations and maps. Will add maps. Further additions should only be done when the app/type implements the tagging feature.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 7b6d76e: added map to the schema, and updated the readme to state that developers should update the schema when adding tagging support to additional SO types.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rudolf

Even if we just index data as-is, can you elaborate why we couldn't use an array field type for the mappings?

Without any transformations on the remote service indexer, we'd end up with data mapped as an array in elasticsearch and face the usual problem of not being able to meaningfully query or visualize that data. It's a situation we're trying to fix with ui_metric right now, for example.

visualization: perTypeSchema,
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

import { Observable } from 'rxjs';
import { take } from 'rxjs/operators';
import { SharedGlobalConfig } from 'src/core/server';
import { UsageCollectionSetup } from '../../../../../src/plugins/usage_collection/server';
import { TaggingUsageData } from './types';
import { fetchTagUsageData } from './fetch_tag_usage_data';
import { tagUsageCollectorSchema } from './schema';

export const createTagUsageCollector = ({
usageCollection,
legacyConfig$,
}: {
usageCollection: UsageCollectionSetup;
legacyConfig$: Observable<SharedGlobalConfig>;
}) => {
return usageCollection.makeUsageCollector<TaggingUsageData>({
type: 'saved_objects_tagging',
isReady: () => true,
schema: tagUsageCollectorSchema,
fetch: async ({ esClient }) => {
// there is a bug with the monitoring bulk uploader cause `fetch` to be invoked without the esClient.
// we can't do anything in that case and must exit. the `as any` force-cast is here as a TS hack to avoid
// polluting the `makeUsageCollector` generic type.
if (!esClient) {
pgayvallet marked this conversation as resolved.
Show resolved Hide resolved
return undefined as any;
}
const { kibana } = await legacyConfig$.pipe(take(1)).toPromise();
return fetchTagUsageData({ esClient, kibanaIndex: kibana.index });
pgayvallet marked this conversation as resolved.
Show resolved Hide resolved
},
});
};
22 changes: 22 additions & 0 deletions x-pack/plugins/saved_objects_tagging/server/usage/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

/**
* @internal
*/
export interface TaggingUsageData {
usedTags: number;
taggedObjects: number;
types: Record<string, ByTypeTaggingUsageData>;
}

/**
* @internal
*/
export interface ByTypeTaggingUsageData {
usedTags: number;
taggedObjects: number;
}
Loading