Skip to content

Commit

Permalink
Give query case sensitive treatment in query hash (#4254)
Browse files Browse the repository at this point in the history
Generating the query hash from the query text with no lowercasing of the query text
allows case-sensitive parameter values in the dashboard to have different cache entries.

Fixes #2137
  • Loading branch information
osule authored Jul 17, 2023
1 parent 095ac2e commit c8516d3
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Make case insensitive hash of query text
Revision ID: 1038c2174f5d
Revises: fd4fc850d7ea
Create Date: 2023-07-16 23:10:12.885949
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table

from redash.utils import gen_query_hash

# revision identifiers, used by Alembic.
revision = '1038c2174f5d'
down_revision = 'fd4fc850d7ea'
branch_labels = None
depends_on = None



def change_query_hash(conn, table, query_text_to):
for record in conn.execute(table.select()):
query_text = query_text_to(record.query)
conn.execute(
table
.update()
.where(table.c.id == record.id)
.values(query_hash=gen_query_hash(query_text)))


def upgrade():
queries = table(
'queries',
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('query', sa.Text),
sa.Column('query_hash', sa.String(length=10)))

conn = op.get_bind()
change_query_hash(conn, queries, query_text_to=str)


def downgrade():
queries = table(
'queries',
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('query', sa.Text),
sa.Column('query_hash', sa.String(length=10)))

conn = op.get_bind()
change_query_hash(conn, queries, query_text_to=str.lower)
6 changes: 3 additions & 3 deletions redash/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ def slugify(s):

def gen_query_hash(sql):
"""Return hash of the given query after stripping all comments, line breaks
and multiple spaces, and lower casing all text.
and multiple spaces.
TODO: possible issue - the following queries will get the same id:
The following queries will get different ids:
1. SELECT 1 FROM table WHERE column='Value';
2. SELECT 1 FROM table where column='value';
"""
sql = COMMENTS_REGEX.sub("", sql)
sql = "".join(sql.split()).lower()
sql = "".join(sql.split())
return hashlib.md5(sql.encode("utf-8")).hexdigest()


Expand Down

0 comments on commit c8516d3

Please sign in to comment.