-
Notifications
You must be signed in to change notification settings - Fork 84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix/4196 speed up search bundles #4273
Changes from 9 commits
cd21e01
a1e0980
26d1b71
8e49820
7b3202a
08e80b3
20073eb
801c1be
d1a649f
af43e96
c5e888b
fa702d3
a9510c1
adc63af
b91baca
fc66615
b2f129c
2c113a8
7595b01
8002843
7c439f0
1b75dfb
5b62589
af1a585
c770d7c
93c781c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
"""Remove is_superuser from User table | ||
|
||
Revision ID: 22b6c41cd29c | ||
Revises: f720aaefd0b2 | ||
Create Date: 2022-10-04 03:42:55.838245 | ||
|
||
""" | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = '22b6c41cd29c' | ||
down_revision = 'f720aaefd0b2' | ||
|
||
from alembic import op | ||
import sqlalchemy as sa | ||
from sqlalchemy.dialects import mysql | ||
|
||
def upgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.drop_column('user', 'is_superuser') | ||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.add_column('user', sa.Column('is_superuser', mysql.TINYINT(display_width=1), autoincrement=False, nullable=False)) | ||
# ### end Alembic commands ### |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,8 +13,10 @@ | |
from dateutil import parser | ||
from uuid import uuid4 | ||
|
||
import sqlalchemy | ||
from sqlalchemy import and_, or_, not_, select, union, desc, func | ||
from sqlalchemy.sql.expression import literal, true | ||
from sqlalchemy.orm import aliased | ||
|
||
from codalab.bundles import get_bundle_subclass | ||
from codalab.bundles.run_bundle import RunBundle | ||
|
@@ -420,8 +422,16 @@ def make_condition(key, field, value): | |
return field == value | ||
return None | ||
|
||
def add_join(join_table, join_condition): | ||
join_tables.append(join_table) | ||
join_conditions.append(join_condition) | ||
|
||
shortcuts = {'type': 'bundle_type', 'size': 'data_size', 'worksheet': 'host_worksheet'} | ||
|
||
join_tables = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add type annotations? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added for the two arrays I'm using (joins and where_clause_conjuncts). When I try to add type annotations for some of the other variables (like limit), I get a strange error from mypy: So, I've left those un-typed for now, but I'll look into this. |
||
join_conditions = [] | ||
where_clauses = [] | ||
|
||
for keyword in keywords: | ||
keyword = keyword.replace('.*', '%') | ||
# Sugar | ||
|
@@ -443,7 +453,7 @@ def make_condition(key, field, value): | |
) | ||
) | ||
clause = not_(cl_bundle.c.uuid.in_(with_hosts)) | ||
clauses.append(clause) | ||
where_clauses.append(clause) | ||
continue | ||
|
||
m = SEARCH_KEYWORD_REGEX.match(keyword) # key=value | ||
|
@@ -455,7 +465,7 @@ def make_condition(key, field, value): | |
else: | ||
key, value = 'uuid_name', keyword | ||
|
||
clause = None | ||
where_clause = None | ||
# Special functions | ||
if key == '.offset': | ||
offset = int(value) | ||
|
@@ -465,71 +475,54 @@ def make_condition(key, field, value): | |
format_func = value | ||
# Bundle fields | ||
elif key in ('bundle_type', 'id', 'uuid', 'data_hash', 'state', 'command', 'owner_id'): | ||
clause = make_condition(key, getattr(cl_bundle.c, key), value) | ||
where_clause = make_condition(key, getattr(cl_bundle.c, key), value) | ||
elif key == '.shared': # shared with any group I am in with read permission | ||
clause = cl_bundle.c.uuid.in_( | ||
select([cl_group_bundle_permission.c.object_uuid]).where( | ||
and_( | ||
cl_group_bundle_permission.c.group_uuid.in_( | ||
alias( | ||
select([cl_user_group.c.group_uuid]).where( | ||
cl_user_group.c.user_id == user_id | ||
) | ||
) | ||
), | ||
cl_group_bundle_permission.c.permission >= GROUP_OBJECT_PERMISSION_READ, | ||
) | ||
) | ||
add_join(cl_group_bundle_permission, cl_bundle.c.uuid == cl_group_bundle_permission.c.object_uuid) | ||
add_join(cl_user_group, cl_group_bundle_permission.c.group_uuid == cl_user_group.c.group_uuid) | ||
|
||
where_clause = and_( | ||
cl_user_group.c.user_id == user_id, | ||
cl_group_bundle_permission.c.permission >= GROUP_OBJECT_PERMISSION_READ | ||
) | ||
elif key == 'group': # shared with group with read permission | ||
add_join(cl_group_bundle_permission, cl_bundle.c.uuid == cl_group_bundle_permission.c.object_uuid) | ||
group_uuid = get_group_info(value, False)['uuid'] | ||
clause = cl_bundle.c.uuid.in_( | ||
select([cl_group_bundle_permission.c.object_uuid]).where( | ||
and_( | ||
where_clause = and_( | ||
cl_group_bundle_permission.c.group_uuid == group_uuid, | ||
cl_group_bundle_permission.c.permission >= GROUP_OBJECT_PERMISSION_READ, | ||
) | ||
) | ||
) | ||
# Special fields | ||
elif key == 'dependency': | ||
# Match uuid of dependency | ||
condition = make_condition(key, cl_bundle_dependency.c.parent_uuid, value) | ||
if condition is None: # top-level | ||
clause = cl_bundle_dependency.c.child_uuid == cl_bundle.c.uuid | ||
where_clause = cl_bundle_dependency.c.child_uuid == cl_bundle.c.uuid | ||
else: # embedded | ||
clause = cl_bundle.c.uuid.in_( | ||
alias(select([cl_bundle_dependency.c.child_uuid]).where(condition)) | ||
) | ||
add_join(cl_bundle_dependency, cl_bundle.c.uuid == cl_bundle_dependency.c.child_uuid) | ||
where_clause = condition | ||
elif key.startswith('dependency/'): | ||
_, name = key.split('/', 1) | ||
condition = make_condition(key, cl_bundle_dependency.c.parent_uuid, value) | ||
if condition is None: # top-level | ||
clause = and_( | ||
where_clause = and_( | ||
cl_bundle_dependency.c.child_uuid == cl_bundle.c.uuid, # Join constraint | ||
cl_bundle_dependency.c.child_path | ||
== name, # Match the 'type' of dependent (child_path) | ||
) | ||
else: # embedded | ||
clause = cl_bundle.c.uuid.in_( | ||
alias( | ||
select([cl_bundle_dependency.c.child_uuid]).where( | ||
and_( | ||
cl_bundle_dependency.c.child_path | ||
== name, # Match the 'type' of dependent (child_path) | ||
condition, | ||
) | ||
) | ||
) | ||
add_join(cl_bundle_dependency, cl_bundle.c.uuid == cl_bundle_dependency.c.child_uuid) | ||
where_clause = and_( | ||
cl_bundle_dependency.c.child_path | ||
== name, # Match the 'type' of dependent (child_path) | ||
condition, | ||
) | ||
elif key == 'host_worksheet': | ||
condition = make_condition(key, cl_worksheet_item.c.worksheet_uuid, value) | ||
if condition is None: # top-level | ||
clause = cl_worksheet_item.c.bundle_uuid == cl_bundle.c.uuid # Join constraint | ||
else: | ||
clause = cl_bundle.c.uuid.in_( | ||
alias(select([cl_worksheet_item.c.bundle_uuid]).where(condition)) | ||
) | ||
add_join(cl_worksheet_item, cl_bundle.c.uuid == cl_worksheet_item.c.bundle_uuid) | ||
where_clause = condition | ||
elif key in ('.before', '.after'): | ||
try: | ||
target_datetime = parser.isoparse(value) | ||
|
@@ -539,78 +532,68 @@ def make_condition(key, field, value): | |
) | ||
|
||
subclause = None | ||
aliased_bundle_metadata = aliased(cl_bundle_metadata) | ||
if key == '.before': | ||
subclause = cl_bundle_metadata.c.metadata_value <= int( | ||
subclause = aliased_bundle_metadata.c.metadata_value <= int( | ||
target_datetime.timestamp() | ||
) | ||
if key == '.after': | ||
subclause = cl_bundle_metadata.c.metadata_value >= int( | ||
subclause = aliased_bundle_metadata.c.metadata_value >= int( | ||
target_datetime.timestamp() | ||
) | ||
|
||
clause = cl_bundle.c.uuid.in_( | ||
alias( | ||
select([cl_bundle_metadata.c.bundle_uuid]).where( | ||
and_(cl_bundle_metadata.c.metadata_key == 'created', subclause) | ||
) | ||
) | ||
add_join(aliased_bundle_metadata, cl_bundle.c.uuid == aliased_bundle_metadata.c.bundle_uuid) | ||
where_clause = and_( | ||
aliased_bundle_metadata.c.metadata_key == 'created', subclause | ||
) | ||
elif key == 'uuid_name': # Search uuid and name by default | ||
clause = [] | ||
clause.append(cl_bundle.c.uuid.like('%' + value + '%')) | ||
clause.append( | ||
cl_bundle.c.uuid.in_( | ||
alias( | ||
select([cl_bundle_metadata.c.bundle_uuid]).where( | ||
and_( | ||
cl_bundle_metadata.c.metadata_key == 'name', | ||
cl_bundle_metadata.c.metadata_value.like('%' + value + '%'), | ||
) | ||
) | ||
) | ||
) | ||
) | ||
clause = or_(*clause) | ||
aliased_bundle_metadata = aliased(cl_bundle_metadata) | ||
add_join(aliased_bundle_metadata, cl_bundle.c.uuid == aliased_bundle_metadata.c.bundle_uuid) | ||
|
||
where_clause = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be more accurate, I'd call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! (I think they're actually conjuncts in this case since we use and_ to connect them in line 623, but please correct me if I'm wrong!) |
||
where_clause.append(cl_bundle.c.uuid.like('%' + value + '%')) | ||
where_clause.append(and_( | ||
aliased_bundle_metadata.c.metadata_key == 'name', | ||
aliased_bundle_metadata.c.metadata_value.like('%' + value + '%'), | ||
)) | ||
where_clause = or_(*where_clause) | ||
elif key == '': # Match any field | ||
clause = [] | ||
clause.append(cl_bundle.c.uuid.like('%' + value + '%')) | ||
clause.append(cl_bundle.c.command.like('%' + value + '%')) | ||
clause.append( | ||
cl_bundle.c.uuid.in_( | ||
alias( | ||
select([cl_bundle_metadata.c.bundle_uuid]).where( | ||
cl_bundle_metadata.c.metadata_value.like('%' + value + '%') | ||
) | ||
) | ||
) | ||
aliased_bundle_metadata = aliased(cl_bundle_metadata) | ||
add_join(aliased_bundle_metadata, cl_bundle.c.uuid == aliased_bundle_metadata.c.bundle_uuid) | ||
|
||
where_clause = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like you can just write There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
where_clause.append(cl_bundle.c.uuid.like('%' + value + '%')) | ||
where_clause.append(cl_bundle.c.command.like('%' + value + '%')) | ||
where_clause.append( | ||
aliased_bundle_metadata.c.metadata_value.like('%' + value + '%') | ||
) | ||
clause = or_(*clause) | ||
where_clause = or_(*where_clause) | ||
# Otherwise, assume metadata. | ||
else: | ||
condition = make_condition(key, cl_bundle_metadata.c.metadata_value, value) | ||
aliased_bundle_metadata = aliased(cl_bundle_metadata) | ||
condition = make_condition(key, aliased_bundle_metadata.c.metadata_value, value) | ||
if condition is None: # top-level | ||
clause = and_( | ||
cl_bundle.c.uuid == cl_bundle_metadata.c.bundle_uuid, | ||
cl_bundle_metadata.c.metadata_key == key, | ||
where_clause = and_( | ||
cl_bundle.c.uuid == aliased_bundle_metadata.c.bundle_uuid, | ||
aliased_bundle_metadata.c.metadata_key == key, | ||
) | ||
else: # embedded | ||
clause = cl_bundle.c.uuid.in_( | ||
select([cl_bundle_metadata.c.bundle_uuid]).where( | ||
and_(cl_bundle_metadata.c.metadata_key == key, condition) | ||
) | ||
add_join(aliased_bundle_metadata, cl_bundle.c.uuid == aliased_bundle_metadata.c.bundle_uuid) | ||
|
||
where_clause = and_( | ||
aliased_bundle_metadata.c.metadata_key == key, | ||
condition | ||
) | ||
|
||
if clause is not None: | ||
clauses.append(clause) | ||
if where_clause is not None: | ||
where_clauses.append(where_clause) | ||
|
||
clause = and_(*clauses) | ||
where_clause = and_(*where_clauses) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be confusing to reuse There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
if user_id != self.root_user_id: | ||
# Restrict to the bundles that we have access to. | ||
add_join(cl_group_bundle_permission, cl_bundle.c.uuid == cl_group_bundle_permission.c.object_uuid) | ||
access_via_owner = cl_bundle.c.owner_id == user_id | ||
access_via_group = cl_bundle.c.uuid.in_( | ||
select([cl_group_bundle_permission.c.object_uuid]).where( | ||
and_( | ||
access_via_group = and_( | ||
or_( # Join constraint (group) | ||
cl_group_bundle_permission.c.group_uuid | ||
== self.public_group_uuid, # Public group | ||
|
@@ -624,28 +607,25 @@ def make_condition(key, field, value): | |
), | ||
cl_group_bundle_permission.c.permission | ||
>= GROUP_OBJECT_PERMISSION_READ, # Match the uuid of the parent | ||
) | ||
) | ||
) | ||
clause = and_(clause, or_(access_via_owner, access_via_group)) | ||
|
||
where_clause = and_(where_clause, or_(access_via_owner, access_via_group)) | ||
|
||
|
||
table = cl_bundle | ||
for join_table, join_condition in zip(join_tables, join_conditions): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of using parallel arrays, which might be error prone, consider making a new There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
table = table.join(join_table, join_condition) | ||
# Aggregate (sum) | ||
if sum_key[0] is not None: | ||
# Construct a table with only the uuid and the num (and make sure it's distinct!) | ||
query = alias( | ||
select([cl_bundle.c.uuid, sum_key[0].label('num')]).distinct().where(clause) | ||
select([cl_bundle.c.uuid, sum_key[0].label('num')]).select_from(table).distinct().where(where_clause) | ||
) | ||
# Sum the numbers | ||
query = select([func.sum(query.c.num)]) | ||
else: | ||
query = ( | ||
select([cl_bundle.c.uuid] + aux_fields) | ||
.distinct() | ||
.where(clause) | ||
.offset(offset) | ||
.limit(limit) | ||
) | ||
|
||
query = select([cl_bundle.c.uuid] + aux_fields).select_from(table) | ||
query = query.distinct().where(where_clause).offset(offset).limit(limit) | ||
# Sort | ||
if sort_key[0] is not None: | ||
query = query.order_by(sort_key[0]) | ||
|
@@ -2443,7 +2423,6 @@ def add_user( | |
"date_joined": now, | ||
"has_access": has_access, | ||
"is_verified": is_verified, | ||
"is_superuser": False, | ||
"password": User.encode_password(password, crypt_util.get_random_string()), | ||
"time_quota": self.default_user_info['time_quota'], | ||
"parallel_run_quota": self.default_user_info['parallel_run_quota'], | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need docker_config files? Or should we remove them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just removed them. Sorry about that! |
||
"compose-versions": { | ||
"2.6.1": { | ||
"active-percentage": 1, | ||
"include-pro-users": true, | ||
"default": 0.00 | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"composeV2": "" | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not part of speeding up search bundles, right? Ideally, would split the superuser thing into a different PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shoot. It is in a different PR that I submitted Thursday (still hasn't been merged b/c needs review). I must have branched off from my local branch I used for that PR when I created the branch for this PR. Sorry about that! Perhaps I should revert those changes in this branch?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe you can get that PR in first, since it's pretty straightforward, and then you don't have to undo anything.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, I'll ping the chat for a review. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be easiest to revert those changes in this branch, so it doesn't have to depend on that PR. Up to you though.