Skip to content

Part 1 : Adds RLS and CLS control Policies #2048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -349,4 +349,24 @@ public static void enforceFeatureEnabledOrThrow(
+ "it is still possible to enforce the uniqueness of table locations within a catalog.")
.defaultValue(false)
.buildFeatureConfiguration();

public static final FeatureConfiguration<Boolean> FINE_GRAINED_ACCESS_CONTROL_POLICIES =
PolarisConfiguration.<Boolean>builder()
.key("FINE_GRAINED_ACCESS_CONTROL_POLICIES")
.catalogConfig("polaris.config.fine-grained-access-control-policies.enabled")
.description(
"If set to true, fine grained access control policies can be created, updated, deleted.")
.defaultValue(false)
.buildFeatureConfiguration();

public static final FeatureConfiguration<Boolean>
ALLOW_ATTACHING_FINE_GRAINED_POLICIES_TO_ENTITIES =
PolarisConfiguration.<Boolean>builder()
.key("ALLOW_ATTACHING_FINE_GRAINED_POLICIES_TO_ENTITIES_ENABLED")
.catalogConfig(
"polaris.config.allow-attaching-fine-grained-policies-to-entities.enabled")
.description(
"If set to true, fine grained access control policies can be attached to entities.")
.defaultValue(false)
.buildFeatureConfiguration();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.polaris.core.policy;

import static org.apache.polaris.core.policy.PredefinedPolicyTypes.ACCESS_CONTROL;

import com.google.common.collect.Lists;
import java.util.List;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.ExpressionVisitors;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.polaris.core.auth.AuthenticatedPolarisPrincipal;
import org.apache.polaris.core.policy.content.AccessControlPolicyContent;
import org.apache.polaris.core.policy.validator.InvalidPolicyException;

public class AccessControlPolicyUtil {
private AccessControlPolicyUtil() {}

// context variables used in access control policies
public static final String CURRENT_PRINCIPAL_ROLE = "$current_principal_role";
public static final String CURRENT_PRINCIPAL = "$current_principal";

public static String replaceContextVariable(
String content, PolicyType policyType, AuthenticatedPolarisPrincipal authenticatedPrincipal) {
if (policyType == ACCESS_CONTROL) {
try {
AccessControlPolicyContent policyContent = AccessControlPolicyContent.fromString(content);
List<Expression> evaluatedRowFilterExpressions = Lists.newArrayList();

for (Expression rowFilterExpression : policyContent.getRowFilters()) {
Expression evaluatedExpression =
ExpressionVisitors.visit(
rowFilterExpression,
new ContextVariableReplacementVisitor(authenticatedPrincipal));
evaluatedRowFilterExpressions.add(evaluatedExpression);
}

// also nullify the principal role.
policyContent.setPrincipalRole(null);
policyContent.setRowFilters(evaluatedRowFilterExpressions);
return AccessControlPolicyContent.toString(policyContent);
} catch (Exception e) {
throw new InvalidPolicyException(
"Invalid access control policy content: " + e.getMessage(), e);
}
}
return content;
}

public static boolean filterApplicablePolicy(
PolicyEntity policyEntity, AuthenticatedPolarisPrincipal authenticatedPrincipal) {
if (policyEntity.getPolicyType().equals(ACCESS_CONTROL)) {
AccessControlPolicyContent content =
AccessControlPolicyContent.fromString(policyEntity.getContent());
String applicablePrincipal = content.getPrincipalRole();
return applicablePrincipal == null
|| authenticatedPrincipal.getActivatedPrincipalRoleNames().isEmpty()
|| authenticatedPrincipal
.getActivatedPrincipalRoleNames()
.contains(content.getPrincipalRole());
}

return true;
}

/** Expression visitor that replaces context variables with evaluated expressions */
private static class ContextVariableReplacementVisitor
extends ExpressionVisitors.ExpressionVisitor<Expression> {
private final AuthenticatedPolarisPrincipal authenticatedPrincipal;

public ContextVariableReplacementVisitor(AuthenticatedPolarisPrincipal authenticatedPrincipal) {
this.authenticatedPrincipal = authenticatedPrincipal;
}

@Override
public <T> Expression predicate(UnboundPredicate<T> pred) {
String refName = pred.ref().name();

if (CURRENT_PRINCIPAL_ROLE.equals(refName)) {
return evaluateCurrentPrincipalRole(pred);
} else if (CURRENT_PRINCIPAL.equals(refName)) {
return evaluateCurrentPrincipal(pred);
}

// Return the original predicate if it doesn't reference context variables
return pred;
}

@Override
public Expression alwaysTrue() {
return Expressions.alwaysTrue();
}

@Override
public Expression alwaysFalse() {
return Expressions.alwaysFalse();
}

@Override
public Expression not(Expression result) {
return Expressions.not(result);
}

@Override
public Expression and(Expression leftResult, Expression rightResult) {
return Expressions.and(leftResult, rightResult);
}

@Override
public Expression or(Expression leftResult, Expression rightResult) {
return Expressions.or(leftResult, rightResult);
}

private Expression evaluateCurrentPrincipalRole(UnboundPredicate<?> pred) {
String val = (String) pred.literal().value();
boolean containsRole = authenticatedPrincipal.getActivatedPrincipalRoleNames().contains(val);

return getExpression(pred, containsRole);
}

private Expression evaluateCurrentPrincipal(UnboundPredicate<?> pred) {
String val = (String) pred.literal().value();
boolean principalMatches = authenticatedPrincipal.getName().equals(val);

return getExpression(pred, principalMatches);
}

private Expression getExpression(UnboundPredicate<?> pred, boolean principalMatches) {
if (pred.op().equals(Expression.Operation.EQ)) {
return principalMatches ? Expressions.alwaysTrue() : Expressions.alwaysFalse();
} else if (pred.op().equals(Expression.Operation.NOT_EQ)) {
return principalMatches ? Expressions.alwaysFalse() : Expressions.alwaysTrue();
} else {
// For other operations, return the original predicate
return pred;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ public enum PredefinedPolicyTypes implements PolicyType {
DATA_COMPACTION(0, "system.data-compaction", true),
METADATA_COMPACTION(1, "system.metadata-compaction", true),
ORPHAN_FILE_REMOVAL(2, "system.orphan-file-removal", true),
SNAPSHOT_EXPIRY(3, "system.snapshot-expiry", true);
SNAPSHOT_EXPIRY(3, "system.snapshot-expiry", true),
ACCESS_CONTROL(4, "system.access-control", false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The term ACCESS_CONTROL is too generic IMHO. How about TABLE_DATA_ACCESS_EXPRESSIONS?

The "expression" part related to the fact that this policy uses Iceberg expressions to represent filters.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC we'll use the same policy to also have non-expression based filtering, but I think that something like TABLE_ACCESS or TABLE_DATA_ACCESS is a good idea.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can the same policy type support different contents? What is the approach to processing different contents within the same policy type?

Copy link
Contributor Author

@singhpk234 singhpk234 Jul 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, I modeled all 3 policies to be represented in a single policy spec
columnProjections

  • column hiding (only authorized against RBAC)
  • column projections (DDM -> to be applied with the UDF's)
  • rowFilter expressions (iceberg expression which can contain UDF references)

please let me know if you prefer it otherwise ?

Copy link
Contributor

@dimas-b dimas-b Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not have a firm opinion on the specific form of these policies (yet).

However, I'd like to make this system extensible. That is, if we have another kind of policy for row filtering, assigning the generic ACCESS_CONTROL name to the current one will make the new policy kind of marginal. This is why I propose for policy type names to be more specific up front.


private final int code;
private final String name;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.polaris.core.policy.content;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.google.common.base.Strings;
import java.util.List;
import java.util.Set;
import org.apache.iceberg.expressions.Expression;
import org.apache.polaris.core.policy.validator.InvalidPolicyException;

public class AccessControlPolicyContent implements PolicyContent {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(design) it may be seen as a preference, but it seems the overall language community is moving towards immutable objects as data carriers (like java records) and wonder if this is something we should adopt here as well

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can have a separate discussion for this in the community about the adoption/direction to use Immutable objects in the future. For this PR I think be consistent with existing data structure is fine.


// Optional, if there means policies is applicable to the role
private String principalRole;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this policy apply to a role? What is the mechanism? I could not find this in the linked doc 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to purely support people who just have role based access control, providers like MS, AWS etc ... for them they can just store their row filters / projections against the role and the applicable policy

I go into the details of this here - https://docs.google.com/document/d/12nhS0GX1U1PqEBKp74bIBZsL9kB5duDlN9diHJAhJsM/edit?tab=t.0#bookmark=id.ij6iuno9gsic

Copy link
Contributor

@dimas-b dimas-b Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I mean is: Is this role a Polaris Principal role? If we have multiple policies, how do we find the set of applicable policies? (I'm not sure I saw details on the in the doc 😅 )

More broadly: Is the binding to roles actually part of the policy?

This is not an objection... more of a point to think about.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have multiple policies, how do we find the set of applicable policies? (I'm not sure I saw details on the in the doc 😅 )

This is an existing feature of the policy store, you can retrieve all policies applicable to a given entity. Each policy can correspond to one role.


// TODO: model them as iceberg transforms
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a plan to redo this policy definition after merging or before merging this PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can amend them in another version, if a release goes in with, policies support versioning https://polaris.apache.org/in-dev/unreleased/policy/


The only concern of not using iceberg transform's right now is that they are at the moment limited, yes existing column projection can be modeled as iceberg identity transform ... but if we want to support data masking then transforms should contain references of iceberg UDF's but right now the support is not there hence refrained, open to it if we want to model them like that .

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM 👍

private List<String> columnProjections;

// Iceberg expressions without context functions for now.
// Use a custom deserializer for the list of Iceberg Expressions
@JsonDeserialize(using = IcebergExpressionListDeserializer.class)
@JsonSerialize(using = IcebergExpressionListSerializer.class)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This binds Polaris API to internal serialization code in Iceberg. Iceberg changes in Expression serialization will affect Polaris APIs. I'd like to avoid this dependency.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems fine to me given that Expression is also defined in the IRC spec: https://github.com/apache/iceberg/blob/main/open-api/rest-catalog-open-api.yaml#L2162. So Iceberg side need to make sure any changes there won't violate the spec. We've also depends on CatalogHandler, RestResponse, TableIdentifier, and other classes/models from iceberg. We can change to a custom one later if we need some customizations.

private List<Expression> rowFilters;
Copy link
Contributor

@dimas-b dimas-b Jul 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can this work "without context functions" (code comment above)? How will Polaris code interface with these expressions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're used to generate a view, right?


private static final String DEFAULT_POLICY_SCHEMA_VERSION = "2025-02-03";

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this date represent?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like for other policies, this is a sort of policy "version"

private static final Set<String> POLICY_SCHEMA_VERSIONS = Set.of(DEFAULT_POLICY_SCHEMA_VERSION);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unused?


public static AccessControlPolicyContent fromString(String content) {
if (Strings.isNullOrEmpty(content)) {
throw new InvalidPolicyException("Policy is empty");
}

AccessControlPolicyContent policy;
try {
policy = PolicyContentUtil.MAPPER.readValue(content, AccessControlPolicyContent.class);
} catch (Exception e) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(design) Not all exceptions are worth catching? Should a RuntimeException like OutOfMemoryException for example be transformed into an InvalidPolicyException instance?

throw new InvalidPolicyException(e);
}

boolean isProjectionsEmpty =
policy.getColumnProjections() == null || policy.getColumnProjections().isEmpty();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(design) Maybe we can instruct Jackson to treat null values as empty collection and avoid null checks?

boolean isRowFilterEmpty = policy.getRowFilters() == null || policy.getRowFilters().isEmpty();
if (isProjectionsEmpty && isRowFilterEmpty) {
throw new InvalidPolicyException("Policy must contain 'columnProjections' or 'rowFilters'.");
}

return policy;
}

public static String toString(AccessControlPolicyContent content) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(design) Why a static method vs an instance method like toJsonString()? (you would not need to check if content is null for example)

if (content == null) {
return null;
}
try {
return PolicyContentUtil.MAPPER.writeValueAsString(content);
} catch (JsonProcessingException e) {
throw new InvalidPolicyException("Failed to convert policy content to JSON string", e);
}
}

// Constructors, getters, and setters
public AccessControlPolicyContent() {}

public String getPrincipalRole() {
return principalRole;
}

public void setPrincipalRole(String principalRole) {
this.principalRole = principalRole;
}

public List<String> getColumnProjections() {
return columnProjections;
}

public void setAllowedColumns(List<String> columnProjections) {
this.columnProjections = columnProjections;
}

public List<Expression> getRowFilters() {
return rowFilters;
}

public void setRowFilters(List<Expression> rowFilters) {
this.rowFilters = rowFilters;
}

@Override
public String toString() {
return "AccessControlPolicyContent{"
+ "principalRole='"
+ principalRole
+ '\''
+ ", columnProjections="
+ columnProjections
+ ", rowFilters="
+ rowFilters
+ '}';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.polaris.core.policy.content;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.ExpressionParser;

public class IcebergExpressionListDeserializer extends JsonDeserializer<List<Expression>> {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(design) I wonder why a serializer/deserializer pair for the list is needed vs having it for the elementy type (Expression)?

@Override
public List<Expression> deserialize(JsonParser p, DeserializationContext ctxt)
throws IOException {
ObjectMapper mapper = (ObjectMapper) p.getCodec();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This cast assumes a Jackson implementation detail and can likely break with Jackson updates.
Therefore please remove this case.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As ObjectCodec does have a readTree(JsonParser) method, is the cast even necessary?

JsonNode node = mapper.readTree(p);

List<Expression> expressions = new ArrayList<>();
if (node.isArray()) {
for (JsonNode element : node) {
// Convert each JSON element back to a string and pass it to ExpressionParser.fromJson
expressions.add(ExpressionParser.fromJson(mapper.writeValueAsString(element)));
}
}
Comment on lines +40 to +46
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this necessary at all?

return expressions;
}
}
Loading