diff --git a/.gitignore b/.gitignore index c5a8cf8ac9c0b..eb75a231fa8c6 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ __pycache__ cover .DS_Store .eggs +.env .envrc .idea .mypy_cache diff --git a/UPDATING.md b/UPDATING.md index e56deff7a14f5..c0e03afbe9a03 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -16,6 +16,7 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + # Updating Superset This file documents any backwards-incompatible changes in Superset and @@ -23,287 +24,294 @@ assists people when migrating to a new version. ## Next -* [11575](https://github.com/apache/incubator-superset/pull/11575) The Row Level Security (RLS) config flag has been moved to a feature flag. To migrate, add `ROW_LEVEL_SECURITY: True` to the `FEATURE_FLAGS` dict in `superset_config.py`. +- [11509](https://github.com/apache/incubator-superset/pull/11509): Config value `TABLE_NAMES_CACHE_CONFIG` has been renamed to `DATA_CACHE_CONFIG`, which will now also hold query results cache from connected datasources (previously held in `CACHE_CONFIG`), in addition to the table names. If you will set `DATA_CACHE_CONFIG` to a new cache backend different than your previous `CACHE_CONFIG`, plan for additional cache warmup to avoid degrading charting performance for the end users. + +- [11575](https://github.com/apache/incubator-superset/pull/11575) The Row Level Security (RLS) config flag has been moved to a feature flag. To migrate, add `ROW_LEVEL_SECURITY: True` to the `FEATURE_FLAGS` dict in `superset_config.py`. -* NOTICE: config flag ENABLE_REACT_CRUD_VIEWS has been set to `True` by default, set to `False` if - you prefer to vintage look and feel :) +- [11259](https://github.com/apache/incubator-superset/pull/11259): config flag ENABLE_REACT_CRUD_VIEWS has been set to `True` by default, set to `False` if you prefer to the vintage look and feel. However, we may discontine support on the vintage list view in the future. -* [11244](https://github.com/apache/incubator-superset/pull/11244): The `REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD` feature flag has been removed after being set to True for multiple months. +- [11244](https://github.com/apache/incubator-superset/pull/11244): The `REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD` feature flag has been removed after being set to True for multiple months. -* [11098](https://github.com/apache/incubator-superset/pull/11098): includes a database migration that adds a `uuid` column to most models, and updates `Dashboard.position_json` to include chart UUIDs. Depending on number of objects, the migration may take up to 5 minutes, requiring planning for downtime. -* [11172](https://github.com/apache/incubator-superset/pull/11172): Turning +- [11098](https://github.com/apache/incubator-superset/pull/11098): includes a database migration that adds a `uuid` column to most models, and updates `Dashboard.position_json` to include chart UUIDs. Depending on number of objects, the migration may take up to 5 minutes, requiring planning for downtime. +- [11172](https://github.com/apache/incubator-superset/pull/11172): Turning off language selectors by default as i18n is incomplete in most languages and requires more work. You can easily turn on the languages you want to expose in your environment in superset_config.py -* [11172](https://github.com/apache/incubator-superset/pull/11172): Breaking change: SQL templating is turned off be default. To turn it on set `ENABLE_TEMPLATE_PROCESSING` to True on `DEFAULT_FEATURE_FLAGS` +- [11172](https://github.com/apache/incubator-superset/pull/11172): Breaking change: SQL templating is turned off be default. To turn it on set `ENABLE_TEMPLATE_PROCESSING` to True on `DEFAULT_FEATURE_FLAGS` -* [11155](https://github.com/apache/incubator-superset/pull/11155): The `FAB_UPDATE_PERMS` config parameter is no longer required as the Superset application correctly informs FAB under which context permissions should be updated. +- [11155](https://github.com/apache/incubator-superset/pull/11155): The `FAB_UPDATE_PERMS` config parameter is no longer required as the Superset application correctly informs FAB under which context permissions should be updated. -* [10887](https://github.com/apache/incubator-superset/pull/10887): Breaking change: The custom cache backend changed in order to support the Flask-Caching factory method approach and thus must be registered as a custom type. See [here](https://flask-caching.readthedocs.io/en/latest/#custom-cache-backends) for specifics. +- [10887](https://github.com/apache/incubator-superset/pull/10887): Breaking change: The custom cache backend changed in order to support the Flask-Caching factory method approach and thus must be registered as a custom type. See [here](https://flask-caching.readthedocs.io/en/latest/#custom-cache-backends) for specifics. -* [10674](https://github.com/apache/incubator-superset/pull/10674): Breaking change: PUBLIC_ROLE_LIKE_GAMMA was removed is favour of the new PUBLIC_ROLE_LIKE so it can be set it whatever role you want. +- [10674](https://github.com/apache/incubator-superset/pull/10674): Breaking change: PUBLIC_ROLE_LIKE_GAMMA was removed is favour of the new PUBLIC_ROLE_LIKE so it can be set it whatever role you want. -* [10590](https://github.com/apache/incubator-superset/pull/10590): Breaking change: this PR will convert iframe chart into dashboard markdown component, and remove all `iframe`, `separator`, and `markup` slices (and support) from Superset. If you have important data in those slices, please backup manually. +- [10590](https://github.com/apache/incubator-superset/pull/10590): Breaking change: this PR will convert iframe chart into dashboard markdown component, and remove all `iframe`, `separator`, and `markup` slices (and support) from Superset. If you have important data in those slices, please backup manually. -* [10562](https://github.com/apache/incubator-superset/pull/10562): EMAIL_REPORTS_WEBDRIVER is deprecated use WEBDRIVER_TYPE instead. +- [10562](https://github.com/apache/incubator-superset/pull/10562): EMAIL_REPORTS_WEBDRIVER is deprecated use WEBDRIVER_TYPE instead. -* [10567](https://github.com/apache/incubator-superset/pull/10567): Default WEBDRIVER_OPTION_ARGS are Chrome-specific. If you're using FF, should be `--headless` only +- [10567](https://github.com/apache/incubator-superset/pull/10567): Default WEBDRIVER_OPTION_ARGS are Chrome-specific. If you're using FF, should be `--headless` only -* [10241](https://github.com/apache/incubator-superset/pull/10241): change on Alpha role, users started to have access to "Annotation Layers", "Css Templates" and "Import Dashboards". +- [10241](https://github.com/apache/incubator-superset/pull/10241): change on Alpha role, users started to have access to "Annotation Layers", "Css Templates" and "Import Dashboards". -* [10324](https://github.com/apache/incubator-superset/pull/10324): Facebook Prophet has been introduced as an optional dependency to add support for timeseries forecasting in the chart data API. To enable this feature, install Superset with the optional dependency `prophet` or directly `pip install fbprophet`. +- [10324](https://github.com/apache/incubator-superset/pull/10324): Facebook Prophet has been introduced as an optional dependency to add support for timeseries forecasting in the chart data API. To enable this feature, install Superset with the optional dependency `prophet` or directly `pip install fbprophet`. -* [10320](https://github.com/apache/incubator-superset/pull/10320): References to blacklst/whitelist language have been replaced with more appropriate alternatives. All configs refencing containing `WHITE`/`BLACK` have been replaced with `ALLOW`/`DENY`. Affected config variables that need to be updated: `TIME_GRAIN_BLACKLIST`, `VIZ_TYPE_BLACKLIST`, `DRUID_DATA_SOURCE_BLACKLIST`. +- [10320](https://github.com/apache/incubator-superset/pull/10320): References to blacklst/whitelist language have been replaced with more appropriate alternatives. All configs refencing containing `WHITE`/`BLACK` have been replaced with `ALLOW`/`DENY`. Affected config variables that need to be updated: `TIME_GRAIN_BLACKLIST`, `VIZ_TYPE_BLACKLIST`, `DRUID_DATA_SOURCE_BLACKLIST`. ## 0.37.1 -* [10794](https://github.com/apache/incubator-superset/pull/10794): Breaking change: `uuid` python package is not supported on Jinja2 anymore, only uuid functions are exposed eg: `uuid1`, `uuid3`, `uuid4`, `uuid5`. +- [10794](https://github.com/apache/incubator-superset/pull/10794): Breaking change: `uuid` python package is not supported on Jinja2 anymore, only uuid functions are exposed eg: `uuid1`, `uuid3`, `uuid4`, `uuid5`. ## 0.37.0 -* [9964](https://github.com/apache/incubator-superset/pull/9964): Breaking change on Flask-AppBuilder 3. If you're using OAuth, find out what needs to be changed [here](https://github.com/dpgaspar/Flask-AppBuilder/blob/master/README.rst#change-log). +- [9964](https://github.com/apache/incubator-superset/pull/9964): Breaking change on Flask-AppBuilder 3. If you're using OAuth, find out what needs to be changed [here](https://github.com/dpgaspar/Flask-AppBuilder/blob/master/README.rst#change-log). -* [10233](https://github.com/apache/incubator-superset/pull/10233): a change which deprecates the `ENABLE_FLASK_COMPRESS` config option in favor of the Flask-Compress `COMPRESS_REGISTER` config option which serves the same purpose. +- [10233](https://github.com/apache/incubator-superset/pull/10233): a change which deprecates the `ENABLE_FLASK_COMPRESS` config option in favor of the Flask-Compress `COMPRESS_REGISTER` config option which serves the same purpose. -* [10222](https://github.com/apache/incubator-superset/pull/10222): a change which changes how payloads are cached. Previous cached objects cannot be decoded and thus will be reloaded from source. +- [10222](https://github.com/apache/incubator-superset/pull/10222): a change which changes how payloads are cached. Previous cached objects cannot be decoded and thus will be reloaded from source. -* [10130](https://github.com/apache/incubator-superset/pull/10130): a change which deprecates the `dbs.perm` column in favor of SQLAlchemy [hybird attributes](https://docs.sqlalchemy.org/en/13/orm/extensions/hybrid.html). +- [10130](https://github.com/apache/incubator-superset/pull/10130): a change which deprecates the `dbs.perm` column in favor of SQLAlchemy [hybird attributes](https://docs.sqlalchemy.org/en/13/orm/extensions/hybrid.html). -* [10034](https://github.com/apache/incubator-superset/pull/10034): a change which deprecates the public security manager `assert_datasource_permission`, `assert_query_context_permission`, `assert_viz_permission`, and `rejected_tables` methods with the `raise_for_access` method which also handles assertion logic for SQL tables. +- [10034](https://github.com/apache/incubator-superset/pull/10034): a change which deprecates the public security manager `assert_datasource_permission`, `assert_query_context_permission`, `assert_viz_permission`, and `rejected_tables` methods with the `raise_for_access` method which also handles assertion logic for SQL tables. -* [10031](https://github.com/apache/incubator-superset/pull/10030): a change which renames the following public security manager methods: `can_access_datasource` to `can_access_table`, `all_datasource_access` to `can_access_all_datasources`, `all_database_access` to `can_access_all_databases`, `database_access` to `can_access_database`, `schema_access` to `can_access_schema`, and -`datasource_access` to `can_access_datasource`. Regrettably it is not viable to provide aliases for the deprecated methods as this would result in a name clash. Finally the `can_access_table` (previously `can_access_database`) method signature has changed, i.e., the optional `schema` argument no longer exists. +- [10031](https://github.com/apache/incubator-superset/pull/10030): a change which renames the following public security manager methods: `can_access_datasource` to `can_access_table`, `all_datasource_access` to `can_access_all_datasources`, `all_database_access` to `can_access_all_databases`, `database_access` to `can_access_database`, `schema_access` to `can_access_schema`, and + `datasource_access` to `can_access_datasource`. Regrettably it is not viable to provide aliases for the deprecated methods as this would result in a name clash. Finally the `can_access_table` (previously `can_access_database`) method signature has changed, i.e., the optional `schema` argument no longer exists. -* [10030](https://github.com/apache/incubator-superset/pull/10030): a change which renames the public security manager `schemas_accessible_by_user` method to `get_schemas_accessible_by_user`. +- [10030](https://github.com/apache/incubator-superset/pull/10030): a change which renames the public security manager `schemas_accessible_by_user` method to `get_schemas_accessible_by_user`. -* [9786](https://github.com/apache/incubator-superset/pull/9786): with the upgrade of `werkzeug` from version `0.16.0` to `1.0.1`, the `werkzeug.contrib.cache` module has been moved to a standalone package [cachelib](https://pypi.org/project/cachelib/). For example, to import the `RedisCache` class, please use the following import: `from cachelib.redis import RedisCache`. +- [9786](https://github.com/apache/incubator-superset/pull/9786): with the upgrade of `werkzeug` from version `0.16.0` to `1.0.1`, the `werkzeug.contrib.cache` module has been moved to a standalone package [cachelib](https://pypi.org/project/cachelib/). For example, to import the `RedisCache` class, please use the following import: `from cachelib.redis import RedisCache`. -* [9794](https://github.com/apache/incubator-superset/pull/9794): introduces `create view as` functionality in the sqllab. This change will require the `query` table migration and potential service downtime as that table has quite some traffic. +- [9794](https://github.com/apache/incubator-superset/pull/9794): introduces `create view as` functionality in the sqllab. This change will require the `query` table migration and potential service downtime as that table has quite some traffic. -* [9572](https://github.com/apache/incubator-superset/pull/9572): a change which by default means that the Jinja `current_user_id`, `current_username`, and `url_param` context calls no longer need to be wrapped via `cache_key_wrapper` in order to be included in the cache key. The `cache_key_wrapper` function should only be required for Jinja add-ons. +- [9572](https://github.com/apache/incubator-superset/pull/9572): a change which by default means that the Jinja `current_user_id`, `current_username`, and `url_param` context calls no longer need to be wrapped via `cache_key_wrapper` in order to be included in the cache key. The `cache_key_wrapper` function should only be required for Jinja add-ons. ## 0.36.0 -* [8867](https://github.com/apache/incubator-superset/pull/8867): a change which adds the `tmp_schema_name` column to the `query` table which requires locking the table. Given the `query` table is heavily used performance may be degraded during the migration. Scheduled downtime may be advised. +- [8867](https://github.com/apache/incubator-superset/pull/8867): a change which adds the `tmp_schema_name` column to the `query` table which requires locking the table. Given the `query` table is heavily used performance may be degraded during the migration. Scheduled downtime may be advised. -* [9238](https://github.com/apache/incubator-superset/pull/9238): the config option `TIME_GRAIN_FUNCTIONS` has been renamed to `TIME_GRAIN_EXPRESSIONS` to better reflect the content of the dictionary. +- [9238](https://github.com/apache/incubator-superset/pull/9238): the config option `TIME_GRAIN_FUNCTIONS` has been renamed to `TIME_GRAIN_EXPRESSIONS` to better reflect the content of the dictionary. -* [9218](https://github.com/apache/incubator-superset/pull/9218): SQLite connections have been disabled by default -for analytics databases. You can optionally enable SQLite by setting `PREVENT_UNSAFE_DB_CONNECTIONS` to `False`. -It is not recommended to change this setting, as arbitrary SQLite connections can lead to security vulnerabilities. +- [9218](https://github.com/apache/incubator-superset/pull/9218): SQLite connections have been disabled by default + for analytics databases. You can optionally enable SQLite by setting `PREVENT_UNSAFE_DB_CONNECTIONS` to `False`. + It is not recommended to change this setting, as arbitrary SQLite connections can lead to security vulnerabilities. -* [9133](https://github.com/apache/incubator-superset/pull/9133): Security list of permissions and list views has been -disable by default. You can optionally enable them back again by setting the following config keys: -`FAB_ADD_SECURITY_PERMISSION_VIEW`, `FAB_ADD_SECURITY_VIEW_MENU_VIEW`, `FAB_ADD_SECURITY_PERMISSION_VIEWS_VIEW` to `True`. +- [9133](https://github.com/apache/incubator-superset/pull/9133): Security list of permissions and list views has been + disable by default. You can optionally enable them back again by setting the following config keys: + `FAB_ADD_SECURITY_PERMISSION_VIEW`, `FAB_ADD_SECURITY_VIEW_MENU_VIEW`, `FAB_ADD_SECURITY_PERMISSION_VIEWS_VIEW` to `True`. -* [9173](https://github.com/apache/incubator-superset/pull/9173): Changes the encoding of the query source from an int to an enum. +- [9173](https://github.com/apache/incubator-superset/pull/9173): Changes the encoding of the query source from an int to an enum. -* [9120](https://github.com/apache/incubator-superset/pull/9120): Changes the default behavior of ad-hoc sharing of -queries in SQLLab to one that links to the saved query rather than one that copies the query data into the KVStore -model and links to the record there. This is a security-related change that makes SQLLab query -sharing respect the existing role-based access controls. Should you wish to retain the existing behavior, set two feature flags: -`"KV_STORE": True` will re-enable the `/kv/` and `/kv/store/` endpoints, and `"SHARE_QUERIES_VIA_KV_STORE": True` -will tell the front-end to utilize them for query sharing. +- [9120](https://github.com/apache/incubator-superset/pull/9120): Changes the default behavior of ad-hoc sharing of + queries in SQLLab to one that links to the saved query rather than one that copies the query data into the KVStore + model and links to the record there. This is a security-related change that makes SQLLab query + sharing respect the existing role-based access controls. Should you wish to retain the existing behavior, set two feature flags: + `"KV_STORE": True` will re-enable the `/kv/` and `/kv/store/` endpoints, and `"SHARE_QUERIES_VIA_KV_STORE": True` + will tell the front-end to utilize them for query sharing. -* [9109](https://github.com/apache/incubator-superset/pull/9109): Expire `filter_immune_slices` and -`filter_immune_filter_fields` to favor dashboard scoped filter metadata `filter_scopes`. +- [9109](https://github.com/apache/incubator-superset/pull/9109): Expire `filter_immune_slices` and + `filter_immune_filter_fields` to favor dashboard scoped filter metadata `filter_scopes`. -* [9046](https://github.com/apache/incubator-superset/pull/9046): Replaces `can_only_access_owned_queries` by -`all_query_access` favoring a white list approach. Since a new permission is introduced use `superset init` -to create and associate it by default to the `Admin` role. Note that, by default, all non `Admin` users will -not be able to access queries they do not own. +- [9046](https://github.com/apache/incubator-superset/pull/9046): Replaces `can_only_access_owned_queries` by + `all_query_access` favoring a white list approach. Since a new permission is introduced use `superset init` + to create and associate it by default to the `Admin` role. Note that, by default, all non `Admin` users will + not be able to access queries they do not own. -* [8901](https://github.com/apache/incubator-superset/pull/8901): The datasource's update -timestamp has been added to the query object's cache key to ensure updates to -datasources are always reflected in associated query results. As a consequence all -previously cached results will be invalidated when updating to the next version. +- [8901](https://github.com/apache/incubator-superset/pull/8901): The datasource's update + timestamp has been added to the query object's cache key to ensure updates to + datasources are always reflected in associated query results. As a consequence all + previously cached results will be invalidated when updating to the next version. -* [8699](https://github.com/apache/incubator-superset/pull/8699): A `row_level_security_filters` -table has been added, which is many-to-many with `tables` and `ab_roles`. The applicable filters -are added to the sqla query, and the RLS ids are added to the query cache keys. If RLS is enabled in config.py (`ENABLE_ROW_LEVEL_SECURITY = True`; by default, it is disabled), they can be -accessed through the `Security` menu, or when editting a table. +- [8699](https://github.com/apache/incubator-superset/pull/8699): A `row_level_security_filters` + table has been added, which is many-to-many with `tables` and `ab_roles`. The applicable filters + are added to the sqla query, and the RLS ids are added to the query cache keys. If RLS is enabled in config.py (`ENABLE_ROW_LEVEL_SECURITY = True`; by default, it is disabled), they can be + accessed through the `Security` menu, or when editting a table. -* [8732](https://github.com/apache/incubator-superset/pull/8732): Swagger user interface is now enabled by default. -A new permission `show on SwaggerView` is created by `superset init` and given to the `Admin` Role. To disable the UI, -set `FAB_API_SWAGGER_UI = False` on config. +- [8732](https://github.com/apache/incubator-superset/pull/8732): Swagger user interface is now enabled by default. + A new permission `show on SwaggerView` is created by `superset init` and given to the `Admin` Role. To disable the UI, + set `FAB_API_SWAGGER_UI = False` on config. -* [8721](https://github.com/apache/incubator-superset/pull/8721): When using the cache -warmup Celery task you should now specify the `SUPERSET_WEBSERVER_PROTOCOL` variable -in your configuration (probably either "http" or "https"). This defaults to "http". +- [8721](https://github.com/apache/incubator-superset/pull/8721): When using the cache + warmup Celery task you should now specify the `SUPERSET_WEBSERVER_PROTOCOL` variable + in your configuration (probably either "http" or "https"). This defaults to "http". -* [8512](https://github.com/apache/incubator-superset/pull/8512): `DRUID_IS_ACTIVE` now -defaults to False. To enable Druid-API-based functionality, override the -`DRUID_IS_ACTIVE` configuration variable by setting it to `True` for your deployment. +- [8512](https://github.com/apache/incubator-superset/pull/8512): `DRUID_IS_ACTIVE` now + defaults to False. To enable Druid-API-based functionality, override the + `DRUID_IS_ACTIVE` configuration variable by setting it to `True` for your deployment. -* [8450](https://github.com/apache/incubator-superset/pull/8450): The time range picker -now uses UTC for the tooltips and default placeholder timestamps (sans timezone). +- [8450](https://github.com/apache/incubator-superset/pull/8450): The time range picker + now uses UTC for the tooltips and default placeholder timestamps (sans timezone). -* [8418](https://github.com/apache/incubator-superset/pull/8418): FLASK_APP / Worker App -have changed. FLASK_APP should be updated to `superset.app:create_app()` and Celery Workers -should be started with `--app=superset.tasks.celery_app:app` +- [8418](https://github.com/apache/incubator-superset/pull/8418): FLASK_APP / Worker App + have changed. FLASK_APP should be updated to `superset.app:create_app()` and Celery Workers + should be started with `--app=superset.tasks.celery_app:app` -* [9017](https://github.com/apache/incubator-superset/pull/9017): `SIP_15_ENABLED` now -defaults to True which ensures that for all new SQL charts the time filter will behave -like [start, end). Existing deployments should either disable this feature to keep the -status quo or inform their users of this change prior to enabling the flag. The -`SIP_15_GRACE_PERIOD_END` option provides a mechanism for specifying how long chart -owners have to migrate their charts (the default is indefinite). +- [9017](https://github.com/apache/incubator-superset/pull/9017): `SIP_15_ENABLED` now + defaults to True which ensures that for all new SQL charts the time filter will behave + like [start, end). Existing deployments should either disable this feature to keep the + status quo or inform their users of this change prior to enabling the flag. The + `SIP_15_GRACE_PERIOD_END` option provides a mechanism for specifying how long chart + owners have to migrate their charts (the default is indefinite). ## 0.35.0 -* [8370](https://github.com/apache/incubator-superset/pull/8370): Deprecates +- [8370](https://github.com/apache/incubator-superset/pull/8370): Deprecates the `HTTP_HEADERS` variable in favor of `DEFAULT_HTTP_HEADERS` and `OVERRIDE_HTTP_HEADERS`. To retain the same behavior you should use `OVERRIDE_HTTP_HEADERS` instead of `HTTP_HEADERS`. `HTTP_HEADERS` will still work but may be removed in a future update. -* We're deprecating the concept of "restricted metric", this feature +- We're deprecating the concept of "restricted metric", this feature was not fully working anyhow. -* [8117](https://github.com/apache/incubator-superset/pull/8117): If you are -using `ENABLE_PROXY_FIX = True`, review the newly-introducted variable, -`PROXY_FIX_CONFIG`, which changes the proxy behavior in accordance with -[Werkzeug](https://werkzeug.palletsprojects.com/en/0.15.x/middleware/proxy_fix/) +- [8117](https://github.com/apache/incubator-superset/pull/8117): If you are + using `ENABLE_PROXY_FIX = True`, review the newly-introducted variable, + `PROXY_FIX_CONFIG`, which changes the proxy behavior in accordance with + [Werkzeug](https://werkzeug.palletsprojects.com/en/0.15.x/middleware/proxy_fix/) -* [8069](https://github.com/apache/incubator-superset/pull/8069): introduces -[MessagePack](https://github.com/msgpack/msgpack-python) and -[PyArrow](https://arrow.apache.org/docs/python/) for async query results -backend serialization. To disable set `RESULTS_BACKEND_USE_MSGPACK = False` -in your configuration. +- [8069](https://github.com/apache/incubator-superset/pull/8069): introduces + [MessagePack](https://github.com/msgpack/msgpack-python) and + [PyArrow](https://arrow.apache.org/docs/python/) for async query results + backend serialization. To disable set `RESULTS_BACKEND_USE_MSGPACK = False` + in your configuration. -* [8371](https://github.com/apache/incubator-superset/pull/8371): makes -`tables.table_name`, `dbs.database_name`, `datasources.cluster_name`, and `clusters.cluster_name` non-nullable. -Depending on the integrity of the data, manual intervention may be required. +- [8371](https://github.com/apache/incubator-superset/pull/8371): makes + `tables.table_name`, `dbs.database_name`, `datasources.cluster_name`, and `clusters.cluster_name` non-nullable. + Depending on the integrity of the data, manual intervention may be required. ## 0.34.0 -* [7848](https://github.com/apache/incubator-superset/pull/7848): If you are -running redis with celery, celery bump to 4.3.0 requires redis-py upgrade to -3.2.0 or later. +- [7848](https://github.com/apache/incubator-superset/pull/7848): If you are + running redis with celery, celery bump to 4.3.0 requires redis-py upgrade to + 3.2.0 or later. -* [7667](https://github.com/apache/incubator-superset/pull/7667): a change to -make all Unix timestamp (which by definition are in UTC) comparisons refer -to a timestamp in UTC as opposed to local time. +- [7667](https://github.com/apache/incubator-superset/pull/7667): a change to + make all Unix timestamp (which by definition are in UTC) comparisons refer + to a timestamp in UTC as opposed to local time. -* [7653](https://github.com/apache/incubator-superset/pull/7653): a change -which deprecates the table_columns.database_expression column. Expressions -should be handled by the DB engine spec conversion, Python date format, or -custom column expression/type. +- [7653](https://github.com/apache/incubator-superset/pull/7653): a change + which deprecates the table_columns.database_expression column. Expressions + should be handled by the DB engine spec conversion, Python date format, or + custom column expression/type. -* The repo no longer contains translation binaries (`.mo`) files. If you +- The repo no longer contains translation binaries (`.mo`) files. If you want translations in your build, you now have to run the command `babel-compile --target superset/translations` as part of your builds -* [5451](https://github.com/apache/incubator-superset/pull/5451): a change -which adds missing non-nullable fields to the `datasources` table. Depending on -the integrity of the data, manual intervention may be required. - -* [5452](https://github.com/apache/incubator-superset/pull/5452): a change -which adds missing non-nullable fields and uniqueness constraints (which may be -case insensitive depending on your database configuration) to the `columns`and -`table_columns` tables. Depending on the integrity of the data, manual -intervention may be required. -* `fabmanager` command line is deprecated since Flask-AppBuilder 2.0.0, use -the new `flask fab ` integrated with *Flask cli*. -* `SUPERSET_UPDATE_PERMS` environment variable was replaced by -`FAB_UPDATE_PERMS` config boolean key. To disable automatic -creation of permissions set `FAB_UPDATE_PERMS = False` on config. -* [5453](https://github.com/apache/incubator-superset/pull/5453): a change -which adds missing non-nullable fields and uniqueness constraints (which may be -case insensitive depending on your database configuration) to the metrics -and sql_metrics tables. Depending on the integrity of the data, manual -intervention may be required. -* [7616](https://github.com/apache/incubator-superset/pull/7616): this bug fix -changes time_compare deltas to correctly evaluate to the number of days prior -instead of number of days in the future. It will change the data for advanced -analytics time_compare so `1 year` from 5/1/2019 will be calculated as 365 days -instead of 366 days. +- [5451](https://github.com/apache/incubator-superset/pull/5451): a change + which adds missing non-nullable fields to the `datasources` table. Depending on + the integrity of the data, manual intervention may be required. + +- [5452](https://github.com/apache/incubator-superset/pull/5452): a change + which adds missing non-nullable fields and uniqueness constraints (which may be + case insensitive depending on your database configuration) to the `columns`and + `table_columns` tables. Depending on the integrity of the data, manual + intervention may be required. +- `fabmanager` command line is deprecated since Flask-AppBuilder 2.0.0, use + the new `flask fab ` integrated with _Flask cli_. +- `SUPERSET_UPDATE_PERMS` environment variable was replaced by + `FAB_UPDATE_PERMS` config boolean key. To disable automatic + creation of permissions set `FAB_UPDATE_PERMS = False` on config. +- [5453](https://github.com/apache/incubator-superset/pull/5453): a change + which adds missing non-nullable fields and uniqueness constraints (which may be + case insensitive depending on your database configuration) to the metrics + and sql_metrics tables. Depending on the integrity of the data, manual + intervention may be required. +- [7616](https://github.com/apache/incubator-superset/pull/7616): this bug fix + changes time_compare deltas to correctly evaluate to the number of days prior + instead of number of days in the future. It will change the data for advanced + analytics time_compare so `1 year` from 5/1/2019 will be calculated as 365 days + instead of 366 days. ## Superset 0.32.0 -* `npm run backend-sync` is deprecated and no longer needed, will fail if called -* [5445](https://github.com/apache/incubator-superset/pull/5445): a change -which prevents encoding of empty string from form data in the database. -This involves a non-schema changing migration which does potentially impact -a large number of records. Scheduled downtime may be advised. +- `npm run backend-sync` is deprecated and no longer needed, will fail if called +- [5445](https://github.com/apache/incubator-superset/pull/5445): a change + which prevents encoding of empty string from form data in the database. + This involves a non-schema changing migration which does potentially impact + a large number of records. Scheduled downtime may be advised. ## Superset 0.31.0 -* If you use `Hive` or `Presto`, we've moved some dependencies that were +- If you use `Hive` or `Presto`, we've moved some dependencies that were in the main package as optional now. To get these packages, run `pip install superset[presto]` and/or `pip install superset[hive]` as required. -* Similarly, if you use Celery's `flower`, `gsheetsdb`, `thrift` or +- Similarly, if you use Celery's `flower`, `gsheetsdb`, `thrift` or `thrift-sasl`, those dependencies have now been made optional in our package, meaning you may have to install them in your environment post 0.31.0 -* boto3 / botocore was removed from the dependency list. If you use s3 -as a place to store your SQL Lab result set or Hive uploads, you may -have to rely on an alternate requirements.txt file to install those -dependencies. -* From 0.31.0 onwards, we recommend not using the npm package `yarn` in -favor of good old `npm install`. While yarn should still work just fine, -you should probably align to guarantee builds similar to the ones we -use in testing and across the community in general. +- boto3 / botocore was removed from the dependency list. If you use s3 + as a place to store your SQL Lab result set or Hive uploads, you may + have to rely on an alternate requirements.txt file to install those + dependencies. +- From 0.31.0 onwards, we recommend not using the npm package `yarn` in + favor of good old `npm install`. While yarn should still work just fine, + you should probably align to guarantee builds similar to the ones we + use in testing and across the community in general. ## Superset 0.30.0 -* 0.30.0 includes a db_migration that removes allow_run_sync. This may -require downtime because during the migration if the db is migrated first, -superset will get 500 errors when the code can't find the field (until -the deploy finishes). + +- 0.30.0 includes a db_migration that removes allow_run_sync. This may + require downtime because during the migration if the db is migrated first, + superset will get 500 errors when the code can't find the field (until + the deploy finishes). ## Superset 0.29.0 -* India was removed from the "Country Map" visualization as the geojson + +- India was removed from the "Country Map" visualization as the geojson file included in the package was very large -* [5933](https://github.com/apache/incubator-superset/pull/5933)/[6078](https://github.com/apache/incubator-superset/pull/6078): changes which add schema and table metadata cache timeout logic at the database level. If left undefined caching of metadata is disabled. +- [5933](https://github.com/apache/incubator-superset/pull/5933)/[6078](https://github.com/apache/incubator-superset/pull/6078): changes which add schema and table metadata cache timeout logic at the database level. If left undefined caching of metadata is disabled. ## Superset 0.28.0 -* Support for Python 2 is deprecated, we only support >=3.6 from + +- Support for Python 2 is deprecated, we only support >=3.6 from `0.28.0` onwards -* Superset 0.28 deprecates the previous dashboard layout. While 0.27 +- Superset 0.28 deprecates the previous dashboard layout. While 0.27 offered a migration workflow to users and allowed them to validate and publish their migrated dashboards individually, 0.28 forces the migration of all dashboards through an automated db migration script. We do recommend that you take a backup prior to this migration. -* Superset 0.28 deprecates the `median` cluster label aggregator for mapbox visualizations. This particular aggregation is not supported on mapbox visualizations going forward. +- Superset 0.28 deprecates the `median` cluster label aggregator for mapbox visualizations. This particular aggregation is not supported on mapbox visualizations going forward. -* Superset 0.28 upgrades `flask-login` to `>=0.3`, which includes a - backwards-incompatible change: `g.user.is_authenticated`, - `g.user.is_anonymous`, and `g.user.is_active` are now properties - instead of methods. +- Superset 0.28 upgrades `flask-login` to `>=0.3`, which includes a + backwards-incompatible change: `g.user.is_authenticated`, + `g.user.is_anonymous`, and `g.user.is_active` are now properties + instead of methods. ## Superset 0.27.0 -* Superset 0.27 start to use nested layout for dashboard builder, which is not -backward-compatible with earlier dashboard grid data. We provide migration script -to automatically convert dashboard grid to nested layout data. To be safe, please -take a database backup prior to this upgrade. It's the only way people could go -back to a previous state. +- Superset 0.27 start to use nested layout for dashboard builder, which is not + backward-compatible with earlier dashboard grid data. We provide migration script + to automatically convert dashboard grid to nested layout data. To be safe, please + take a database backup prior to this upgrade. It's the only way people could go + back to a previous state. ## Superset 0.26.0 -* Superset 0.26.0 deprecates the `superset worker` CLI, which is a simple -wrapper around the `celery worker` command, forcing you into crafting -your own native `celery worker` command. Your command should look something -like `celery worker --app=superset.sql_lab:celery_app --pool=gevent -Ofair` + +- Superset 0.26.0 deprecates the `superset worker` CLI, which is a simple + wrapper around the `celery worker` command, forcing you into crafting + your own native `celery worker` command. Your command should look something + like `celery worker --app=superset.sql_lab:celery_app --pool=gevent -Ofair` ## Superset 0.25.0 + Superset 0.25.0 contains a backwards incompatible changes. If you run a production system you should schedule downtime for this upgrade. The PRs bellow have more information around the breaking changes: -* [9825](https://github.com/apache/incubator-superset/pull/9825): Support for Excel sheet upload added. To enable support, install Superset with the optional dependency `excel` -* [4587](https://github.com/apache/incubator-superset/pull/4587) : a backward +- [9825](https://github.com/apache/incubator-superset/pull/9825): Support for Excel sheet upload added. To enable support, install Superset with the optional dependency `excel` + +- [4587](https://github.com/apache/incubator-superset/pull/4587) : a backward incompatible database migration that requires downtime. Once the db migration succeeds, the web server needs to be restarted with the new version. The previous version will fail -* [4565](https://github.com/apache/incubator-superset/pull/4565) : we've +- [4565](https://github.com/apache/incubator-superset/pull/4565) : we've changed the security model a bit where in the past you would have to define your authentication scheme by inheriting from Flask App Builder's @@ -314,7 +322,7 @@ The PRs bellow have more information around the breaking changes: permissions to another system as needed. For all implementation, you simply have to import and derive `SupersetSecurityManager` in place of the `SecurityManager` -* [4835](https://github.com/apache/incubator-superset/pull/4835) : +- [4835](https://github.com/apache/incubator-superset/pull/4835) : our `setup.py` now only pins versions where required, giving you more latitude in using versions of libraries as needed. We do now provide a `requirements.txt` with pinned versions if you want to run diff --git a/docs/installation.rst b/docs/installation.rst index a832dc2d7834c..4ab158d9b41e8 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -367,8 +367,8 @@ Caching Superset uses `Flask-Cache `_ for caching purpose. Configuring your caching backend is as easy as providing -a ``CACHE_CONFIG``, constant in your ``superset_config.py`` that -complies with the Flask-Cache specifications. +``CACHE_CONFIG`` and ``DATA_CACHE_CONFIG`, constants in ``superset_config.py`` +that complies with `the Flask-Cache specifications `_. Flask-Cache supports multiple caching backends (Redis, Memcached, SimpleCache (in-memory), or the local filesystem). If you are going to use @@ -378,14 +378,13 @@ the `redis `_ Python package: :: pip install redis -For setting your timeouts, this is done in the Superset metadata and goes -up the "timeout searchpath", from your slice configuration, to your -data source's configuration, to your database's and ultimately falls back -into your global default defined in ``CACHE_CONFIG``. +For chart data, Superset goes up a “timeout search path”, from a slice's configuration +to the datasource’s, the database’s, then ultimately falls back to the global default +defined in ``DATA_CACHE_CONFIG``. .. code-block:: python - CACHE_CONFIG = { + DATA_CACHE_CONFIG = { 'CACHE_TYPE': 'redis', 'CACHE_DEFAULT_TIMEOUT': 60 * 60 * 24, # 1 day default (in secs) 'CACHE_KEY_PREFIX': 'superset_results', @@ -400,7 +399,7 @@ object that is compatible with the `Flask-Cache List[str]: """ Get a list of function names that are able to be called on the database. diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index 04c9896cf45d4..77fb128d284fd 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -37,7 +37,7 @@ from sqlalchemy.orm import Session from sqlalchemy.sql.expression import ColumnClause, Select -from superset import app, cache, is_feature_enabled, security_manager +from superset import app, cache_manager, is_feature_enabled, security_manager from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import SupersetTemplateException @@ -930,7 +930,7 @@ def _latest_partition_from_df(cls, df: pd.DataFrame) -> Optional[List[str]]: return None @classmethod - @cache.memoize(timeout=60) + @cache_manager.data_cache.memoize(timeout=60) def latest_partition( cls, table_name: str, @@ -1030,7 +1030,7 @@ def latest_sub_partition( return df.to_dict()[field_to_return][0] @classmethod - @cache.memoize() + @cache_manager.data_cache.memoize() def get_function_names(cls, database: "Database") -> List[str]: """ Get a list of function names that are able to be called on the database. diff --git a/superset/models/core.py b/superset/models/core.py index 382239209f9e0..c4a8369a474bb 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -54,8 +54,9 @@ from sqlalchemy.sql import expression, Select from sqlalchemy_utils import EncryptedType -from superset import app, db_engine_specs, is_feature_enabled, security_manager +from superset import app, db_engine_specs, is_feature_enabled from superset.db_engine_specs.base import TimeGrain +from superset.extensions import cache_manager, security_manager from superset.models.helpers import AuditMixinNullable, ImportExportMixin from superset.models.tags import FavStarUpdater from superset.result_set import SupersetResultSet @@ -452,8 +453,8 @@ def inspector(self) -> Inspector: return sqla.inspect(engine) @cache_util.memoized_func( - key=lambda *args, **kwargs: "db:{}:schema:None:table_list", - attribute_in_key="id", + key=lambda self, *args, **kwargs: f"db:{self.id}:schema:None:table_list", + cache=cache_manager.data_cache, ) def get_all_table_names_in_database( self, @@ -467,7 +468,8 @@ def get_all_table_names_in_database( return self.db_engine_spec.get_all_datasource_names(self, "table") @cache_util.memoized_func( - key=lambda *args, **kwargs: "db:{}:schema:None:view_list", attribute_in_key="id" + key=lambda self, *args, **kwargs: f"db:{self.id}:schema:None:view_list", + cache=cache_manager.data_cache, ) def get_all_view_names_in_database( self, @@ -481,8 +483,8 @@ def get_all_view_names_in_database( return self.db_engine_spec.get_all_datasource_names(self, "view") @cache_util.memoized_func( - key=lambda *args, **kwargs: f"db:{{}}:schema:{kwargs.get('schema')}:table_list", # type: ignore - attribute_in_key="id", + key=lambda self, schema, *args, **kwargs: f"db:{self.id}:schema:{schema}:table_list", # type: ignore + cache=cache_manager.data_cache, ) def get_all_table_names_in_schema( self, @@ -513,8 +515,8 @@ def get_all_table_names_in_schema( logger.warning(ex) @cache_util.memoized_func( - key=lambda *args, **kwargs: f"db:{{}}:schema:{kwargs.get('schema')}:view_list", # type: ignore - attribute_in_key="id", + key=lambda self, schema, *args, **kwargs: f"db:{self.id}:schema:{schema}:view_list", # type: ignore + cache=cache_manager.data_cache, ) def get_all_view_names_in_schema( self, @@ -543,7 +545,8 @@ def get_all_view_names_in_schema( logger.warning(ex) @cache_util.memoized_func( - key=lambda *args, **kwargs: "db:{}:schema_list", attribute_in_key="id" + key=lambda self, *args, **kwargs: f"db:{self.id}:schema_list", + cache=cache_manager.data_cache, ) def get_all_schema_names( self, diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index 2f28bbd2474cd..d3a8363dce043 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -41,17 +41,11 @@ from sqlalchemy.orm.session import object_session from sqlalchemy.sql import join, select -from superset import ( - app, - cache, - ConnectorRegistry, - db, - is_feature_enabled, - security_manager, -) +from superset import app, ConnectorRegistry, db, is_feature_enabled, security_manager from superset.connectors.base.models import BaseDatasource from superset.connectors.druid.models import DruidColumn, DruidMetric from superset.connectors.sqla.models import SqlMetric, TableColumn +from superset.extensions import cache_manager from superset.models.helpers import AuditMixinNullable, ImportExportMixin from superset.models.slice import Slice from superset.models.tags import DashboardUpdater @@ -224,10 +218,9 @@ def data(self) -> Dict[str, Any]: "last_modified_time": self.changed_on.replace(microsecond=0).timestamp(), } - @cache.memoize( + @cache_manager.cache.memoize( # manage cache version manually make_name=lambda fname: f"{fname}-v2.1", - timeout=config["DASHBOARD_CACHE_TIMEOUT"], unless=lambda: not is_feature_enabled("DASHBOARD_CACHE"), ) def full_data(self) -> Dict[str, Any]: @@ -267,7 +260,7 @@ def update_thumbnail(self) -> None: @debounce(0.1) def clear_cache(self) -> None: - cache.delete_memoized(Dashboard.full_data, self) + cache_manager.cache.delete_memoized(Dashboard.full_data, self) @classmethod @debounce(0.1) diff --git a/superset/utils/cache.py b/superset/utils/cache.py index 1e51909bfa434..f0b24b26aea3c 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -14,12 +14,24 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Any, Callable, Optional +import logging +from datetime import datetime, timedelta +from functools import wraps +from typing import Any, Callable, Optional, Union -from flask import request +from flask import current_app as app, request +from flask_caching import Cache +from werkzeug.wrappers.etag import ETagResponseMixin from superset.extensions import cache_manager +# If a user sets `max_age` to 0, for long the browser should cache the +# resource? Flask-Caching will cache forever, but for the HTTP header we need +# to specify a "far future" date. +ONE_YEAR = 365 * 24 * 60 * 60 # 1 year in seconds + +logger = logging.getLogger(__name__) + def view_cache_key(*args: Any, **kwargs: Any) -> str: # pylint: disable=unused-argument args_hash = hash(frozenset(request.args.items())) @@ -27,7 +39,7 @@ def view_cache_key(*args: Any, **kwargs: Any) -> str: # pylint: disable=unused- def memoized_func( - key: Callable[..., str] = view_cache_key, attribute_in_key: Optional[str] = None, + key: Callable[..., str] = view_cache_key, cache: Cache = cache_manager.cache, ) -> Callable[..., Any]: """Use this decorator to cache functions that have predefined first arg. @@ -40,38 +52,106 @@ def memoized_func( timeout of cache is set to 600 seconds by default, except cache_timeout = {timeout in seconds} is passed to the decorated function. - memoized_func uses simple_cache and stored the data in memory. - Key is a callable function that takes function arguments and - returns the caching key. + :param key: a callable function that takes function arguments and returns + the caching key. + :param cache: a FlaskCache instance that will store the cache. """ def wrap(f: Callable[..., Any]) -> Callable[..., Any]: - if cache_manager.tables_cache: - - def wrapped_f(self: Any, *args: Any, **kwargs: Any) -> Any: - if not kwargs.get("cache", True): - return f(self, *args, **kwargs) - - if attribute_in_key: - cache_key = key(*args, **kwargs).format( - getattr(self, attribute_in_key) - ) - else: - cache_key = key(*args, **kwargs) - o = cache_manager.tables_cache.get(cache_key) - if not kwargs.get("force") and o is not None: - return o - o = f(self, *args, **kwargs) - cache_manager.tables_cache.set( - cache_key, o, timeout=kwargs.get("cache_timeout") - ) - return o - - else: - # noop - def wrapped_f(self: Any, *args: Any, **kwargs: Any) -> Any: + def wrapped_f(self: Any, *args: Any, **kwargs: Any) -> Any: + if not kwargs.get("cache", True): return f(self, *args, **kwargs) + cache_key = key(self, *args, **kwargs) + obj = cache.get(cache_key) + if not kwargs.get("force") and obj is not None: + return obj + obj = f(self, *args, **kwargs) + cache.set(cache_key, obj, timeout=kwargs.get("cache_timeout")) + return obj + return wrapped_f return wrap + + +def etag_cache( + check_perms: Callable[..., Any], + cache: Cache = cache_manager.cache, + max_age: Optional[Union[int, float]] = None, +) -> Callable[..., Any]: + """ + A decorator for caching views and handling etag conditional requests. + + The decorator adds headers to GET requests that help with caching: Last- + Modified, Expires and ETag. It also handles conditional requests, when the + client send an If-Matches header. + + If a cache is set, the decorator will cache GET responses, bypassing the + dataframe serialization. POST requests will still benefit from the + dataframe cache for requests that produce the same SQL. + + """ + if max_age is None: + max_age = app.config["CACHE_DEFAULT_TIMEOUT"] + + def decorator(f: Callable[..., Any]) -> Callable[..., Any]: + @wraps(f) + def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: + # check if the user can access the resource + check_perms(*args, **kwargs) + + # for POST requests we can't set cache headers, use the response + # cache nor use conditional requests; this will still use the + # dataframe cache in `superset/viz.py`, though. + if request.method == "POST": + return f(*args, **kwargs) + + response = None + try: + # build the cache key from the function arguments and any + # other additional GET arguments (like `form_data`, eg). + key_args = list(args) + key_kwargs = kwargs.copy() + key_kwargs.update(request.args) + cache_key = wrapper.make_cache_key( # type: ignore + f, *key_args, **key_kwargs + ) + response = cache.get(cache_key) + except Exception: # pylint: disable=broad-except + if app.debug: + raise + logger.exception("Exception possibly due to cache backend.") + + # if no response was cached, compute it using the wrapped function + if response is None: + response = f(*args, **kwargs) + + # add headers for caching: Last Modified, Expires and ETag + response.cache_control.public = True + response.last_modified = datetime.utcnow() + expiration = max_age or ONE_YEAR # max_age=0 also means far future + response.expires = response.last_modified + timedelta( + seconds=expiration + ) + response.add_etag() + + # if we have a cache, store the response from the request + try: + cache.set(cache_key, response, timeout=max_age) + except Exception: # pylint: disable=broad-except + if app.debug: + raise + logger.exception("Exception possibly due to cache backend.") + + return response.make_conditional(request) + + wrapper.uncached = f # type: ignore + wrapper.cache_timeout = max_age # type: ignore + wrapper.make_cache_key = cache._memoize_make_cache_key( # type: ignore # pylint: disable=protected-access + make_name=None, timeout=max_age + ) + + return wrapper + + return decorator diff --git a/superset/utils/cache_manager.py b/superset/utils/cache_manager.py index 77b1c9b46fe4d..352f62f0273c5 100644 --- a/superset/utils/cache_manager.py +++ b/superset/utils/cache_manager.py @@ -23,17 +23,35 @@ def __init__(self) -> None: super().__init__() self._cache = Cache() - self._tables_cache = Cache() + self._data_cache = Cache() self._thumbnail_cache = Cache() def init_app(self, app: Flask) -> None: - self._cache.init_app(app, app.config["CACHE_CONFIG"]) - self._tables_cache.init_app(app, app.config["TABLE_NAMES_CACHE_CONFIG"]) - self._thumbnail_cache.init_app(app, app.config["THUMBNAIL_CACHE_CONFIG"]) + self._cache.init_app( + app, + { + "CACHE_DEFAULT_TIMEOUT": app.config["CACHE_DEFAULT_TIMEOUT"], + **app.config["CACHE_CONFIG"], + }, + ) + self._data_cache.init_app( + app, + { + "CACHE_DEFAULT_TIMEOUT": app.config["CACHE_DEFAULT_TIMEOUT"], + **app.config["DATA_CACHE_CONFIG"], + }, + ) + self._thumbnail_cache.init_app( + app, + { + "CACHE_DEFAULT_TIMEOUT": app.config["CACHE_DEFAULT_TIMEOUT"], + **app.config["THUMBNAIL_CACHE_CONFIG"], + }, + ) @property - def tables_cache(self) -> Cache: - return self._tables_cache + def data_cache(self) -> Cache: + return self._data_cache @property def cache(self) -> Cache: diff --git a/superset/utils/decorators.py b/superset/utils/decorators.py index 8e5e9acd13920..014a512f0bedc 100644 --- a/superset/utils/decorators.py +++ b/superset/utils/decorators.py @@ -14,26 +14,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import logging import time -from datetime import datetime, timedelta -from functools import wraps from typing import Any, Callable, Dict, Iterator, Union from contextlib2 import contextmanager -from flask import request -from werkzeug.wrappers.etag import ETagResponseMixin -from superset import app, cache from superset.stats_logger import BaseStatsLogger from superset.utils.dates import now_as_float -# If a user sets `max_age` to 0, for long the browser should cache the -# resource? Flask-Caching will cache forever, but for the HTTP header we need -# to specify a "far future" date. -FAR_FUTURE = 365 * 24 * 60 * 60 # 1 year in seconds -logger = logging.getLogger(__name__) - @contextmanager def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[float]: @@ -47,85 +35,6 @@ def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[floa stats_logger.timing(stats_key, now_as_float() - start_ts) -def etag_cache(max_age: int, check_perms: Callable[..., Any]) -> Callable[..., Any]: - """ - A decorator for caching views and handling etag conditional requests. - - The decorator adds headers to GET requests that help with caching: Last- - Modified, Expires and ETag. It also handles conditional requests, when the - client send an If-Matches header. - - If a cache is set, the decorator will cache GET responses, bypassing the - dataframe serialization. POST requests will still benefit from the - dataframe cache for requests that produce the same SQL. - - """ - - def decorator(f: Callable[..., Any]) -> Callable[..., Any]: - @wraps(f) - def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: - # check if the user can access the resource - check_perms(*args, **kwargs) - - # for POST requests we can't set cache headers, use the response - # cache nor use conditional requests; this will still use the - # dataframe cache in `superset/viz.py`, though. - if request.method == "POST": - return f(*args, **kwargs) - - response = None - if cache: - try: - # build the cache key from the function arguments and any - # other additional GET arguments (like `form_data`, eg). - key_args = list(args) - key_kwargs = kwargs.copy() - key_kwargs.update(request.args) - cache_key = wrapper.make_cache_key( # type: ignore - f, *key_args, **key_kwargs - ) - response = cache.get(cache_key) - except Exception: # pylint: disable=broad-except - if app.debug: - raise - logger.exception("Exception possibly due to cache backend.") - - # if no response was cached, compute it using the wrapped function - if response is None: - response = f(*args, **kwargs) - - # add headers for caching: Last Modified, Expires and ETag - response.cache_control.public = True - response.last_modified = datetime.utcnow() - expiration = max_age if max_age != 0 else FAR_FUTURE - response.expires = response.last_modified + timedelta( - seconds=expiration - ) - response.add_etag() - - # if we have a cache, store the response from the request - if cache: - try: - cache.set(cache_key, response, timeout=max_age) - except Exception: # pylint: disable=broad-except - if app.debug: - raise - logger.exception("Exception possibly due to cache backend.") - - return response.make_conditional(request) - - if cache: - wrapper.uncached = f # type: ignore - wrapper.cache_timeout = max_age # type: ignore - wrapper.make_cache_key = cache._memoize_make_cache_key( # type: ignore # pylint: disable=protected-access - make_name=None, timeout=max_age - ) - - return wrapper - - return decorator - - def arghash(args: Any, kwargs: Dict[str, Any]) -> int: """Simple argument hash with kwargs sorted.""" sorted_args = tuple( diff --git a/superset/utils/screenshots.py b/superset/utils/screenshots.py index 0609a659af99c..26043616f1268 100644 --- a/superset/utils/screenshots.py +++ b/superset/utils/screenshots.py @@ -160,7 +160,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments logger.error("Failed at resizing thumbnail %s", ex) payload = None - if payload and cache: + if payload: logger.info("Caching thumbnail: %s", cache_key) cache.set(cache_key, payload) logger.info("Done caching thumbnail") diff --git a/superset/views/core.py b/superset/views/core.py index 0ac0fb19e61ce..fbfac7cb944e2 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -88,8 +88,8 @@ from superset.sql_validators import get_validator_by_name from superset.typing import FlaskResponse from superset.utils import core as utils +from superset.utils.cache import etag_cache from superset.utils.dates import now_as_float -from superset.utils.decorators import etag_cache from superset.views.base import ( api, BaseSupersetView, @@ -123,7 +123,6 @@ from superset.viz import BaseViz config = app.config -CACHE_DEFAULT_TIMEOUT = config["CACHE_DEFAULT_TIMEOUT"] SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT = config["SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT"] stats_logger = config["STATS_LOGGER"] DAR = DatasourceAccessRequest @@ -435,7 +434,7 @@ def generate_json( @api @has_access_api @expose("/slice_json/") - @etag_cache(CACHE_DEFAULT_TIMEOUT, check_perms=check_slice_perms) + @etag_cache(check_perms=check_slice_perms) def slice_json(self, slice_id: int) -> FlaskResponse: form_data, slc = get_form_data(slice_id, use_slice_data=True) if not slc: @@ -494,7 +493,7 @@ def annotation_json( # pylint: disable=no-self-use methods=EXPLORE_JSON_METHODS, ) @expose("/explore_json/", methods=EXPLORE_JSON_METHODS) - @etag_cache(CACHE_DEFAULT_TIMEOUT, check_perms=check_datasource_perms) + @etag_cache(check_perms=check_datasource_perms) def explore_json( self, datasource_type: Optional[str] = None, datasource_id: Optional[int] = None ) -> FlaskResponse: diff --git a/superset/viz.py b/superset/viz.py index 4badf1db889fe..0a8a7028ed968 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -53,7 +53,7 @@ from geopy.point import Point from pandas.tseries.frequencies import to_offset -from superset import app, cache, db, is_feature_enabled, security_manager +from superset import app, db, is_feature_enabled from superset.constants import NULL_STRING from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import ( @@ -61,6 +61,7 @@ QueryObjectValidationError, SpatialException, ) +from superset.extensions import cache_manager, security_manager from superset.models.cache import CacheKey from superset.models.helpers import QueryResult from superset.typing import QueryObjectDict, VizData, VizPayload @@ -107,7 +108,7 @@ def set_and_log_cache( try: cache_value = dict(dttm=cached_dttm, df=df, query=query) stats_logger.incr("set_cache_key") - cache.set(cache_key, cache_value, timeout=cache_timeout) + cache_manager.data_cache.set(cache_key, cache_value, timeout=cache_timeout) if datasource_uid: ck = CacheKey( @@ -121,7 +122,7 @@ def set_and_log_cache( # the key is too large or whatever other reasons logger.warning("Could not cache key {}".format(cache_key)) logger.exception(ex) - cache.delete(cache_key) + cache_manager.data_cache.delete(cache_key) class BaseViz: @@ -430,6 +431,8 @@ def cache_timeout(self) -> int: and self.datasource.database.cache_timeout ) is not None: return self.datasource.database.cache_timeout + if config["DATA_CACHE_CONFIG"].get("CACHE_DEFAULT_TIMEOUT") is not None: + return config["DATA_CACHE_CONFIG"]["CACHE_DEFAULT_TIMEOUT"] return config["CACHE_DEFAULT_TIMEOUT"] def get_json(self) -> str: @@ -513,8 +516,8 @@ def get_df_payload( stacktrace = None df = None cached_dttm = datetime.utcnow().isoformat().split(".")[0] - if cache_key and cache and not self.force: - cache_value = cache.get(cache_key) + if cache_key and cache_manager.data_cache and not self.force: + cache_value = cache_manager.data_cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: @@ -582,12 +585,7 @@ def get_df_payload( self.status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() - if ( - is_loaded - and cache_key - and cache - and self.status != utils.QueryStatus.FAILED - ): + if is_loaded and cache_key and self.status != utils.QueryStatus.FAILED: set_and_log_cache( cache_key, df, diff --git a/superset/viz_sip38.py b/superset/viz_sip38.py index a1ab19b438623..600f44141a1c6 100644 --- a/superset/viz_sip38.py +++ b/superset/viz_sip38.py @@ -45,7 +45,7 @@ from geopy.point import Point from pandas.tseries.frequencies import to_offset -from superset import app, cache, security_manager +from superset import app from superset.constants import NULL_STRING from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import ( @@ -53,6 +53,7 @@ QueryObjectValidationError, SpatialException, ) +from superset.extensions import cache_manager, security_manager from superset.models.helpers import QueryResult from superset.typing import QueryObjectDict, VizData, VizPayload from superset.utils import core as utils @@ -413,7 +414,7 @@ def cache_timeout(self): and self.datasource.database.cache_timeout ) is not None: return self.datasource.database.cache_timeout - return config["CACHE_DEFAULT_TIMEOUT"] + return cache_manager.data_cache.config["CACHE_DEFAULT_TIMEOUT"] def get_json(self): return json.dumps( @@ -475,8 +476,8 @@ def get_df_payload( stacktrace = None df = None cached_dttm = datetime.utcnow().isoformat().split(".")[0] - if cache_key and cache and not self.force: - cache_value = cache.get(cache_key) + if cache_key and cache_manager.data_cache and not self.force: + cache_value = cache_manager.data_cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: @@ -515,12 +516,7 @@ def get_df_payload( self.status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() - if ( - is_loaded - and cache_key - and cache - and self.status != utils.QueryStatus.FAILED - ): + if is_loaded and cache_key and self.status != utils.QueryStatus.FAILED: set_and_log_cache( cache_key, df, diff --git a/tests/cache_tests.py b/tests/cache_tests.py index 4f6581ca9b791..0b887622ef074 100644 --- a/tests/cache_tests.py +++ b/tests/cache_tests.py @@ -17,7 +17,8 @@ """Unit tests for Superset with caching""" import json -from superset import cache, db +from superset import app, db +from superset.extensions import cache_manager from superset.utils.core import QueryStatus from .base_tests import SupersetTestCase @@ -25,15 +26,42 @@ class TestCache(SupersetTestCase): def setUp(self): - cache.clear() + self.login(username="admin") + cache_manager.cache.clear() + cache_manager.data_cache.clear() def tearDown(self): - cache.clear() + cache_manager.cache.clear() + cache_manager.data_cache.clear() + + def test_no_data_cache(self): + app.config["DATA_CACHE_CONFIG"] = {"CACHE_TYPE": "null"} + cache_manager.init_app(app) - def test_cache_value(self): - self.login(username="admin") slc = self.get_slice("Girls", db.session) + json_endpoint = "/superset/explore_json/{}/{}/".format( + slc.datasource_type, slc.datasource_id + ) + resp = self.get_json_resp( + json_endpoint, {"form_data": json.dumps(slc.viz.form_data)} + ) + resp_from_cache = self.get_json_resp( + json_endpoint, {"form_data": json.dumps(slc.viz.form_data)} + ) + self.assertFalse(resp["is_cached"]) + self.assertFalse(resp_from_cache["is_cached"]) + + def test_slice_data_cache(self): + # Override cache config + app.config["CACHE_DEFAULT_TIMEOUT"] = 100 + app.config["DATA_CACHE_CONFIG"] = { + "CACHE_TYPE": "simple", + "CACHE_DEFAULT_TIMEOUT": 10, + "CACHE_KEY_PREFIX": "superset_data_cache", + } + cache_manager.init_app(app) + slc = self.get_slice("Boys", db.session) json_endpoint = "/superset/explore_json/{}/{}/".format( slc.datasource_type, slc.datasource_id ) @@ -45,6 +73,19 @@ def test_cache_value(self): ) self.assertFalse(resp["is_cached"]) self.assertTrue(resp_from_cache["is_cached"]) + # should fallback to default cache timeout + self.assertEqual(resp_from_cache["cache_timeout"], 10) self.assertEqual(resp_from_cache["status"], QueryStatus.SUCCESS) self.assertEqual(resp["data"], resp_from_cache["data"]) self.assertEqual(resp["query"], resp_from_cache["query"]) + # should exists in `data_cache` + self.assertEqual( + cache_manager.data_cache.get(resp_from_cache["cache_key"])["query"], + resp_from_cache["query"], + ) + # should not exists in `cache` + self.assertIsNone(cache_manager.cache.get(resp_from_cache["cache_key"])) + + # reset cache config + app.config["DATA_CACHE_CONFIG"] = {"CACHE_TYPE": "null"} + cache_manager.init_app(app) diff --git a/tests/databases/api_tests.py b/tests/databases/api_tests.py index 8e8d0930b2bbf..2b8504daf9e96 100644 --- a/tests/databases/api_tests.py +++ b/tests/databases/api_tests.py @@ -87,7 +87,7 @@ def test_get_items(self): "function_names", "id", ] - self.assertEqual(response["count"], 2) + self.assertGreater(response["count"], 0) self.assertEqual(list(response["result"][0].keys()), expected_columns) def test_get_items_filter(self): diff --git a/tests/superset_test_config.py b/tests/superset_test_config.py index 71d6eeb2d9162..6a380829fa731 100644 --- a/tests/superset_test_config.py +++ b/tests/superset_test_config.py @@ -75,22 +75,24 @@ def GET_FEATURE_FLAGS_FUNC(ff): PUBLIC_ROLE_LIKE = "Gamma" AUTH_ROLE_PUBLIC = "Public" EMAIL_NOTIFICATIONS = False -CACHE_CONFIG = {"CACHE_TYPE": "simple"} REDIS_HOST = os.environ.get("REDIS_HOST", "localhost") REDIS_PORT = os.environ.get("REDIS_PORT", "6379") REDIS_CELERY_DB = os.environ.get("REDIS_CELERY_DB", 2) REDIS_RESULTS_DB = os.environ.get("REDIS_RESULTS_DB", 3) REDIS_CACHE_DB = os.environ.get("REDIS_CACHE_DB", 4) +CACHE_DEFAULT_TIMEOUT = 600 + CACHE_CONFIG = { "CACHE_TYPE": "redis", - "CACHE_DEFAULT_TIMEOUT": 60 * 60 * 24, # 1 day default (in secs) + "CACHE_DEFAULT_TIMEOUT": 60, "CACHE_KEY_PREFIX": "superset_cache", "CACHE_REDIS_URL": f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CACHE_DB}", } -TABLE_NAMES_CACHE_CONFIG = { +DATA_CACHE_CONFIG = { **CACHE_CONFIG, + "CACHE_DEFAULT_TIMEOUT": 30, "CACHE_KEY_PREFIX": "superset_data_cache", }