diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml
index 7a49f32729ec1f..dc770f7fc83a61 100644
--- a/.github/workflows/check-datahub-jars.yml
+++ b/.github/workflows/check-datahub-jars.yml
@@ -5,12 +5,12 @@ on:
     branches:
       - master
     paths:
-      - "metadata-integration"
+      - "metadata-integration/**"
   pull_request:
     branches:
       - "**"
     paths:
-      - "metadata-integration"
+      - "metadata-integration/**"
   release:
     types: [published]
 
@@ -28,15 +28,22 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: acryldata/sane-checkout-action@v3
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/uv
+          key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
+      - name: Install dependencies
+        run: ./metadata-ingestion/scripts/install_deps.sh
       - name: Set up JDK 17
         uses: actions/setup-java@v4
         with:
           distribution: "zulu"
           java-version: 17
       - uses: gradle/actions/setup-gradle@v3
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
       - name: check ${{ matrix.command }} jar
         run: |
           ./gradlew :metadata-integration:java:${{ matrix.command }}:build --info
diff --git a/build.gradle b/build.gradle
index 6893a2ca93d365..a3d807a7333494 100644
--- a/build.gradle
+++ b/build.gradle
@@ -373,6 +373,7 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) {
     exclude group: "org.slf4j", module: "slf4j-log4j12"
     exclude group: "org.slf4j", module: "slf4j-nop"
     exclude group: "org.slf4j", module: "slf4j-ext"
+    exclude group: "org.codehaus.jackson", module: "jackson-mapper-asl"
 
     resolutionStrategy.force externalDependency.antlr4Runtime
     resolutionStrategy.force externalDependency.antlr4
diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java
index 7fa99ab3cb2621..b95515684f01fc 100644
--- a/datahub-frontend/app/auth/AuthModule.java
+++ b/datahub-frontend/app/auth/AuthModule.java
@@ -27,6 +27,7 @@
 import io.datahubproject.metadata.context.EntityRegistryContext;
 import io.datahubproject.metadata.context.OperationContext;
 import io.datahubproject.metadata.context.OperationContextConfig;
+import io.datahubproject.metadata.context.RetrieverContext;
 import io.datahubproject.metadata.context.SearchContext;
 import io.datahubproject.metadata.context.ValidationContext;
 import java.nio.charset.StandardCharsets;
@@ -195,6 +196,7 @@ protected OperationContext provideOperationContext(
         .searchContext(SearchContext.EMPTY)
         .entityRegistryContext(EntityRegistryContext.builder().build(EmptyEntityRegistry.EMPTY))
         .validationContext(ValidationContext.builder().alternateValidation(false).build())
+        .retrieverContext(RetrieverContext.EMPTY)
         .build(systemAuthentication);
   }
 
diff --git a/datahub-frontend/conf/logback.xml b/datahub-frontend/conf/logback.xml
index 78da231b4a71c5..de37c56cba38a7 100644
--- a/datahub-frontend/conf/logback.xml
+++ b/datahub-frontend/conf/logback.xml
@@ -61,7 +61,7 @@
   <logger name="com.linkedin" level="DEBUG">
     <appender-ref ref="DEBUG_FILE"/>
   </logger>
-  <logger name="controller" level="DEBUG">
+  <logger name="controllers" level="DEBUG">
     <appender-ref ref="DEBUG_FILE"/>
   </logger>
   <logger name="auth" level="DEBUG">
diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle
index 372b0eb0570b98..a3b2e9ad6b3e22 100644
--- a/datahub-upgrade/build.gradle
+++ b/datahub-upgrade/build.gradle
@@ -60,7 +60,7 @@ dependencies {
   // mock internal schema registry
   implementation externalDependency.kafkaAvroSerde
   implementation externalDependency.kafkaAvroSerializer
-  implementation "org.apache.kafka:kafka_2.12:3.7.1"
+  implementation "org.apache.kafka:kafka_2.13:3.7.2"
 
   implementation externalDependency.slf4jApi
   compileOnly externalDependency.lombok
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
index 661717c6309cfc..fdd84da6044f73 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
@@ -13,6 +13,7 @@
 import com.linkedin.gms.factory.kafka.common.TopicConventionFactory;
 import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory;
 import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.config.kafka.KafkaConfiguration;
 import com.linkedin.metadata.dao.producer.KafkaEventProducer;
 import com.linkedin.metadata.dao.producer.KafkaHealthChecker;
@@ -186,6 +187,7 @@ protected OperationContext javaSystemOperationContext(
             components.getIndexConvention(),
             RetrieverContext.builder()
                 .aspectRetriever(entityServiceAspectRetriever)
+                .cachingAspectRetriever(CachingAspectRetriever.EMPTY)
                 .graphRetriever(systemGraphRetriever)
                 .searchRetriever(searchServiceSearchRetriever)
                 .build(),
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
index 4d53b603c1eaff..1e5cd6cdb24174 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
@@ -180,7 +180,7 @@ private void readerExecutable(ReaderWrapper reader, UpgradeContext context) {
       try {
         aspectRecord =
             EntityUtils.toSystemAspect(
-                    context.opContext().getRetrieverContext().get(), aspect.toEntityAspect())
+                    context.opContext().getRetrieverContext(), aspect.toEntityAspect())
                 .get()
                 .getRecordTemplate();
       } catch (Exception e) {
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
index cd7947ce3c11aa..56feffd211bcd7 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
@@ -113,8 +113,7 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
                   List<Pair<Future<?>, SystemAspect>> futures;
                   futures =
                       EntityUtils.toSystemAspectFromEbeanAspects(
-                              opContext.getRetrieverContext().get(),
-                              batch.collect(Collectors.toList()))
+                              opContext.getRetrieverContext(), batch.collect(Collectors.toList()))
                           .stream()
                           .map(
                               systemAspect -> {
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
index 4cc3edff3eb52d..5b807c6c450afb 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
@@ -100,8 +100,8 @@ static AspectsBatch generateAspectBatch(
             .collect(Collectors.toList());
 
     return AspectsBatchImpl.builder()
-        .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
-        .retrieverContext(opContext.getRetrieverContext().get())
+        .mcps(mcps, auditStamp, opContext.getRetrieverContext())
+        .retrieverContext(opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java
index 55cdcae931ab5b..1bdea10123999a 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java
@@ -2,6 +2,8 @@
 
 import static com.linkedin.metadata.Constants.*;
 
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.google.common.base.Throwables;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.datahub.upgrade.UpgradeContext;
@@ -23,8 +25,6 @@
 import java.util.Set;
 import java.util.function.Function;
 import lombok.extern.slf4j.Slf4j;
-import org.codehaus.jackson.node.JsonNodeFactory;
-import org.codehaus.jackson.node.ObjectNode;
 import org.opensearch.action.search.SearchRequest;
 import org.opensearch.action.search.SearchResponse;
 import org.opensearch.client.RequestOptions;
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
index 55bc8edbf6a768..de03538907432f 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
@@ -168,13 +168,13 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
 
                   AspectsBatch aspectsBatch =
                       AspectsBatchImpl.builder()
-                          .retrieverContext(opContext.getRetrieverContext().get())
+                          .retrieverContext(opContext.getRetrieverContext())
                           .items(
                               batch
                                   .flatMap(
                                       ebeanAspectV2 ->
                                           EntityUtils.toSystemAspectFromEbeanAspects(
-                                              opContext.getRetrieverContext().get(),
+                                              opContext.getRetrieverContext(),
                                               Set.of(ebeanAspectV2))
                                               .stream())
                                   .map(
@@ -189,11 +189,7 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
                                               .auditStamp(systemAspect.getAuditStamp())
                                               .systemMetadata(
                                                   withAppSource(systemAspect.getSystemMetadata()))
-                                              .build(
-                                                  opContext
-                                                      .getRetrieverContext()
-                                                      .get()
-                                                      .getAspectRetriever()))
+                                              .build(opContext.getAspectRetriever()))
                                   .collect(Collectors.toList()))
                           .build();
 
diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
index 3a2728b4e1d3d6..04b1095e770e0e 100644
--- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
+++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
@@ -22,7 +22,6 @@
 import com.linkedin.upgrade.DataHubUpgradeState;
 import io.datahubproject.metadata.context.OperationContext;
 import io.datahubproject.metadata.context.RetrieverContext;
-import java.util.Optional;
 import java.util.stream.Stream;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -48,7 +47,7 @@ public void setup() {
     step =
         new GenerateSchemaFieldsFromSchemaMetadataStep(
             mockOpContext, mockEntityService, mockAspectDao, 10, 100, 1000);
-    when(mockOpContext.getRetrieverContext()).thenReturn(Optional.of(mockRetrieverContext));
+    when(mockOpContext.getRetrieverContext()).thenReturn(mockRetrieverContext);
   }
 
   /** Test to verify the correct step ID is returned. */
diff --git a/datahub-web-react/src/app/entity/schemaField/SchemaFieldPropertiesEntity.tsx b/datahub-web-react/src/app/entity/schemaField/SchemaFieldPropertiesEntity.tsx
index 2c59c476195d0b..fdc0e33d77a057 100644
--- a/datahub-web-react/src/app/entity/schemaField/SchemaFieldPropertiesEntity.tsx
+++ b/datahub-web-react/src/app/entity/schemaField/SchemaFieldPropertiesEntity.tsx
@@ -35,11 +35,9 @@ export class SchemaFieldPropertiesEntity implements Entity<SchemaFieldEntity> {
     // Currently unused.
     getPathName = () => 'schemaField';
 
-    // Currently unused.
-    getEntityName = () => 'schemaField';
+    getEntityName = () => 'Column';
 
-    // Currently unused.
-    getCollectionName = () => 'schemaFields';
+    getCollectionName = () => 'Columns';
 
     // Currently unused.
     renderProfile = (_: string) => <></>;
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx b/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
index 08e9636f760de5..613264709ac23c 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
@@ -19,8 +19,6 @@ const DeprecatedContainer = styled.div`
     justify-content: center;
     align-items: center;
     color: #cd0d24;
-    margin-left: 0px;
-    margin-right: 8px;
     padding-top: 8px;
     padding-bottom: 8px;
     padding-right: 4px;
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleStringInput.tsx b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleOpenEndedInput.tsx
similarity index 87%
rename from datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleStringInput.tsx
rename to datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleOpenEndedInput.tsx
index fe6c0bbb99ce22..fe6cd1115419ae 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleStringInput.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleOpenEndedInput.tsx
@@ -4,6 +4,8 @@ import React from 'react';
 import styled from 'styled-components';
 import { ANTD_GRAY_V2 } from '../../../constants';
 
+const MultiStringWrapper = styled.div``;
+
 const StyledInput = styled(Input)`
     width: 75%;
     min-width: 350px;
@@ -29,10 +31,11 @@ const DeleteButton = styled(Button)`
 
 interface Props {
     selectedValues: any[];
+    inputType?: string;
     updateSelectedValues: (values: any[]) => void;
 }
 
-export default function MultipleStringInput({ selectedValues, updateSelectedValues }: Props) {
+export default function MultipleOpenEndedInput({ selectedValues, updateSelectedValues, inputType = 'text' }: Props) {
     function updateInput(text: string, index: number) {
         const updatedValues =
             selectedValues.length > 0 ? selectedValues.map((value, i) => (i === index ? text : value)) : [text];
@@ -53,14 +56,14 @@ export default function MultipleStringInput({ selectedValues, updateSelectedValu
     }
 
     return (
-        <div>
+        <MultiStringWrapper>
             {selectedValues.length > 1 &&
                 selectedValues.map((selectedValue, index) => {
                     const key = `${index}`;
                     return (
                         <InputWrapper key={key}>
                             <StyledInput
-                                type="text"
+                                type={inputType}
                                 value={selectedValue}
                                 onChange={(e) => updateInput(e.target.value, index)}
                             />
@@ -70,7 +73,7 @@ export default function MultipleStringInput({ selectedValues, updateSelectedValu
                 })}
             {selectedValues.length <= 1 && (
                 <StyledInput
-                    type="text"
+                    type={inputType}
                     value={selectedValues[0] || ''}
                     onChange={(e) => updateInput(e.target.value, 0)}
                 />
@@ -78,6 +81,6 @@ export default function MultipleStringInput({ selectedValues, updateSelectedValu
             <StyledButton type="link" onClick={addNewValue}>
                 + Add More
             </StyledButton>
-        </div>
+        </MultiStringWrapper>
     );
 }
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/NumberInput.tsx b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/NumberInput.tsx
index c56d85db7ef712..f4cedc4cf80ee5 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/NumberInput.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/NumberInput.tsx
@@ -1,7 +1,9 @@
 import { Input } from 'antd';
 import React, { ChangeEvent } from 'react';
 import styled from 'styled-components';
+import { PropertyCardinality } from '@src/types.generated';
 import { ANTD_GRAY_V2 } from '../../../constants';
+import MultipleOpenEndedInput from './MultipleOpenEndedInput';
 
 const StyledInput = styled(Input)`
     border: 1px solid ${ANTD_GRAY_V2[6]};
@@ -10,15 +12,31 @@ const StyledInput = styled(Input)`
 
 interface Props {
     selectedValues: any[];
+    cardinality?: PropertyCardinality | null;
     updateSelectedValues: (values: string[] | number[]) => void;
 }
 
-export default function NumberInput({ selectedValues, updateSelectedValues }: Props) {
+export default function NumberInput({ selectedValues, cardinality, updateSelectedValues }: Props) {
     function updateInput(event: ChangeEvent<HTMLInputElement>) {
         const number = Number(event.target.value);
         updateSelectedValues([number]);
     }
 
+    function updateMultipleValues(values: string[] | number[]) {
+        const numbers = values.map((v) => Number(v));
+        updateSelectedValues(numbers);
+    }
+
+    if (cardinality === PropertyCardinality.Multiple) {
+        return (
+            <MultipleOpenEndedInput
+                selectedValues={selectedValues}
+                updateSelectedValues={updateMultipleValues}
+                inputType="number"
+            />
+        );
+    }
+
     return (
         <StyledInput
             type="number"
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StringInput.tsx b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StringInput.tsx
index 8c69174a35bf3d..47912dbc22dd10 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StringInput.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StringInput.tsx
@@ -3,7 +3,7 @@ import React, { ChangeEvent } from 'react';
 import styled from 'styled-components';
 import { ANTD_GRAY_V2 } from '../../../constants';
 import { PropertyCardinality } from '../../../../../../types.generated';
-import MultipleStringInput from './MultipleStringInput';
+import MultipleOpenEndedInput from './MultipleOpenEndedInput';
 
 const StyledInput = styled(Input)`
     width: 75%;
@@ -24,8 +24,15 @@ export default function StringInput({ selectedValues, cardinality, updateSelecte
     }
 
     if (cardinality === PropertyCardinality.Multiple) {
-        return <MultipleStringInput selectedValues={selectedValues} updateSelectedValues={updateSelectedValues} />;
+        return <MultipleOpenEndedInput selectedValues={selectedValues} updateSelectedValues={updateSelectedValues} />;
     }
 
-    return <StyledInput type="text" value={selectedValues[0] || ''} onChange={updateInput} />;
+    return (
+        <StyledInput
+            type="text"
+            value={selectedValues[0] || ''}
+            onChange={updateInput}
+            data-testid="structured-property-string-value-input"
+        />
+    );
 }
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StructuredPropertyInput.tsx b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StructuredPropertyInput.tsx
index 894a304335b0f6..305347ee0bce80 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StructuredPropertyInput.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StructuredPropertyInput.tsx
@@ -60,7 +60,11 @@ export default function StructuredPropertyInput({
                 <DateInput selectedValues={selectedValues} updateSelectedValues={updateSelectedValues} />
             )}
             {!allowedValues && valueType.info.type === StdDataType.Number && (
-                <NumberInput selectedValues={selectedValues} updateSelectedValues={updateSelectedValues} />
+                <NumberInput
+                    selectedValues={selectedValues}
+                    cardinality={cardinality}
+                    updateSelectedValues={updateSelectedValues}
+                />
             )}
             {!allowedValues && valueType.info.type === StdDataType.Urn && (
                 <UrnInput
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
index 9e8dc83c32302d..7dd2888ba14f7a 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
@@ -24,6 +24,7 @@ const TitleWrapper = styled.div`
     display: flex;
     justify-content: left;
     align-items: center;
+    gap: 8px;
 
     .ant-typography-edit-content {
         padding-top: 7px;
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx
index 702f780f1aa111..8976629d9ef0b1 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx
@@ -8,8 +8,6 @@ import { useEntityData, useRefetch } from '../../../EntityContext';
 import { useGlossaryEntityData } from '../../../GlossaryEntityContext';
 
 export const EntityTitle = styled(Typography.Title)`
-    margin-right: 10px;
-
     &&& {
         margin-bottom: 0;
         word-break: break-all;
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/useStructuredProperties.test.ts b/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/useStructuredProperties.test.ts
new file mode 100644
index 00000000000000..ff7c6e51a04a00
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/useStructuredProperties.test.ts
@@ -0,0 +1,87 @@
+import { identifyAndAddParentRows } from '../useStructuredProperties';
+
+describe('identifyAndAddParentRows', () => {
+    it('should not return parent rows when there are none', () => {
+        const propertyRows = [
+            { displayName: 'test1', qualifiedName: 'test1' },
+            { displayName: 'test2', qualifiedName: 'test2' },
+        ];
+        expect(identifyAndAddParentRows(propertyRows)).toMatchObject([]);
+    });
+
+    it('should not return parent rows when another row starts with the same letters but is a different token', () => {
+        const propertyRows = [
+            { displayName: 'test1', qualifiedName: 'testing.one' },
+            { displayName: 'test2', qualifiedName: 'testingAgain.two' },
+        ];
+        expect(identifyAndAddParentRows(propertyRows)).toMatchObject([]);
+    });
+
+    it('should return parent rows properly', () => {
+        const propertyRows = [
+            { displayName: 'test1', qualifiedName: 'testing.one' },
+            { displayName: 'test2', qualifiedName: 'testing.two' },
+            { displayName: 'test3', qualifiedName: 'testing.three' },
+        ];
+        expect(identifyAndAddParentRows(propertyRows)).toMatchObject([
+            { displayName: 'testing', qualifiedName: 'testing', childrenCount: 3 },
+        ]);
+    });
+
+    it('should return parent rows properly with multiple layers of nesting', () => {
+        const propertyRows = [
+            { displayName: 'test1', qualifiedName: 'testing.one.two.a.1' },
+            { displayName: 'test1', qualifiedName: 'testing.one.two.a.2' },
+            { displayName: 'test1', qualifiedName: 'testing.one.two.b' },
+            { displayName: 'test1', qualifiedName: 'testing.one.three' },
+            { displayName: 'test2', qualifiedName: 'testing.two.c.d' },
+            { displayName: 'test3', qualifiedName: 'testing.three' },
+            { displayName: 'test3', qualifiedName: 'testParent' },
+        ];
+        expect(identifyAndAddParentRows(propertyRows)).toMatchObject([
+            { displayName: 'testing', qualifiedName: 'testing', isParentRow: true, childrenCount: 6 },
+            { displayName: 'testing.one', qualifiedName: 'testing.one', isParentRow: true, childrenCount: 4 },
+            { displayName: 'testing.one.two', qualifiedName: 'testing.one.two', isParentRow: true, childrenCount: 3 },
+            {
+                displayName: 'testing.one.two.a',
+                qualifiedName: 'testing.one.two.a',
+                isParentRow: true,
+                childrenCount: 2,
+            },
+        ]);
+    });
+
+    it('should return parent rows properly with multiple layers of nesting regardless of order', () => {
+        const propertyRows = [
+            { displayName: 'test1', qualifiedName: 'testing.one.two.a.1' },
+            { displayName: 'test3', qualifiedName: 'testParent' },
+            { displayName: 'test1', qualifiedName: 'testing.one.three' },
+            { displayName: 'test2', qualifiedName: 'testing.two.c.d' },
+            { displayName: 'test1', qualifiedName: 'testing.one.two.b' },
+            { displayName: 'test3', qualifiedName: 'testing.three' },
+            { displayName: 'test1', qualifiedName: 'testing.one.two.a.2' },
+        ];
+        expect(identifyAndAddParentRows(propertyRows)).toMatchObject([
+            { displayName: 'testing', qualifiedName: 'testing', isParentRow: true, childrenCount: 6 },
+            { displayName: 'testing.one', qualifiedName: 'testing.one', isParentRow: true, childrenCount: 4 },
+            { displayName: 'testing.one.two', qualifiedName: 'testing.one.two', isParentRow: true, childrenCount: 3 },
+            {
+                displayName: 'testing.one.two.a',
+                qualifiedName: 'testing.one.two.a',
+                isParentRow: true,
+                childrenCount: 2,
+            },
+        ]);
+    });
+
+    it('should return parent rows properly with simpler layers of nesting', () => {
+        const propertyRows = [
+            { displayName: 'test2', qualifiedName: 'testing.two.c.d' },
+            { displayName: 'test3', qualifiedName: 'testing.three' },
+            { displayName: 'test3', qualifiedName: 'testParent' },
+        ];
+        expect(identifyAndAddParentRows(propertyRows)).toMatchObject([
+            { displayName: 'testing', qualifiedName: 'testing', isParentRow: true, childrenCount: 2 },
+        ]);
+    });
+});
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx
index 18ee6bb18da3d3..60d0aac30eb4ce 100644
--- a/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx
+++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx
@@ -122,10 +122,10 @@ export function identifyAndAddParentRows(rows?: Array<PropertyRow>): Array<Prope
         // that would tell us to nest. If the count is not equal, we should nest the child properties.
         for (let index = 0; index < substrings.length; index++) {
             const token = substrings[index];
-            const currentCount = qualifiedNames.filter((name) => name.startsWith(token)).length;
+            const currentCount = qualifiedNames.filter((name) => name.startsWith(`${token}.`)).length;
 
-            // If we're at the beginning of the path and there is no nesting, break
-            if (index === 0 && currentCount === 1) {
+            // If there's only one child, don't nest it
+            if (currentCount === 1) {
                 break;
             }
 
diff --git a/datahub-web-react/src/app/govern/structuredProperties/AllowedValuesDrawer.tsx b/datahub-web-react/src/app/govern/structuredProperties/AllowedValuesDrawer.tsx
index f1dccb6db0c22c..16c07e8257cd9b 100644
--- a/datahub-web-react/src/app/govern/structuredProperties/AllowedValuesDrawer.tsx
+++ b/datahub-web-react/src/app/govern/structuredProperties/AllowedValuesDrawer.tsx
@@ -127,6 +127,7 @@ const AllowedValuesDrawer = ({
                                         setTimeout(() => scrollToBottom(), 0);
                                     }}
                                     color="violet"
+                                    type="button"
                                 >
                                     Add
                                 </Button>
diff --git a/datahub-web-react/src/app/govern/structuredProperties/DisplayPreferences.tsx b/datahub-web-react/src/app/govern/structuredProperties/DisplayPreferences.tsx
index 260c91ef93207c..95823de0f27c40 100644
--- a/datahub-web-react/src/app/govern/structuredProperties/DisplayPreferences.tsx
+++ b/datahub-web-react/src/app/govern/structuredProperties/DisplayPreferences.tsx
@@ -153,7 +153,8 @@ const DisplayPreferences = ({
                                 clickable={false}
                             />
                             &nbsp;is already being shown on asset previews, but only one property is allowed at a time.
-                            Do you want to replace the current property? This will hide PropVal on all asset previews.
+                            Do you want to replace the current property? This will hide {getDisplayName(badgeProperty)}{' '}
+                            on all asset previews.
                         </p>
                     }
                 />
diff --git a/datahub-web-react/src/app/govern/structuredProperties/StructuredPropsDrawer.tsx b/datahub-web-react/src/app/govern/structuredProperties/StructuredPropsDrawer.tsx
index 4b2bbaaf96826b..debffeac7d583c 100644
--- a/datahub-web-react/src/app/govern/structuredProperties/StructuredPropsDrawer.tsx
+++ b/datahub-web-react/src/app/govern/structuredProperties/StructuredPropsDrawer.tsx
@@ -192,6 +192,7 @@ const StructuredPropsDrawer = ({
             form.validateFields().then(() => {
                 const createInput = {
                     ...form.getFieldsValue(),
+                    qualifiedName: form.getFieldValue('qualifiedName') || undefined,
                     valueType: valueTypes.find((type) => type.value === form.getFieldValue('valueType'))?.urn,
                     allowedValues,
                     cardinality,
diff --git a/datahub-web-react/src/app/govern/structuredProperties/cacheUtils.ts b/datahub-web-react/src/app/govern/structuredProperties/cacheUtils.ts
index 590189d06e6b16..c8052784c6972a 100644
--- a/datahub-web-react/src/app/govern/structuredProperties/cacheUtils.ts
+++ b/datahub-web-react/src/app/govern/structuredProperties/cacheUtils.ts
@@ -17,7 +17,6 @@ const addToCache = (existingProperties, newProperty) => {
                 allowedValues: newProperty.definition.allowedValues,
                 created: newProperty.definition.created,
                 lastModified: newProperty.definition.lastModified,
-                filterStatus: newProperty.definition.filterStatus,
             },
             settings: {
                 isHidden: newProperty.settings.isHidden,
diff --git a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
index 4c8948a6664e07..a19862e83ae510 100644
--- a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
+++ b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
@@ -68,6 +68,7 @@ const TitleContainer = styled.div`
 const EntityTitleContainer = styled.div`
     display: flex;
     align-items: center;
+    gap: 8px;
 `;
 
 const EntityTitle = styled(Typography.Text)<{ $titleSizePx?: number }>`
@@ -77,7 +78,6 @@ const EntityTitle = styled(Typography.Text)<{ $titleSizePx?: number }>`
     }
 
     &&& {
-        margin-right 8px;
         font-size: ${(props) => props.$titleSizePx || 16}px;
         font-weight: 600;
         vertical-align: middle;
diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql
index ce0fde27f4c425..58c9a51f3d7e90 100644
--- a/datahub-web-react/src/graphql/search.graphql
+++ b/datahub-web-react/src/graphql/search.graphql
@@ -963,6 +963,7 @@ fragment facetFields on FacetMetadata {
         entity {
             urn
             type
+            ...entityDisplayNameFields
             ... on Tag {
                 name
                 properties {
diff --git a/docker/kafka-setup/Dockerfile b/docker/kafka-setup/Dockerfile
index a11f823f5efa55..324357b942e8e1 100644
--- a/docker/kafka-setup/Dockerfile
+++ b/docker/kafka-setup/Dockerfile
@@ -22,7 +22,7 @@ ARG ALPINE_REPO_URL
 ARG APACHE_DOWNLOAD_URL
 ARG GITHUB_REPO_URL
 
-ENV KAFKA_VERSION=3.7.1
+ENV KAFKA_VERSION=3.7.2
 ENV SCALA_VERSION=2.13
 
 LABEL name="kafka" version=${KAFKA_VERSION}
diff --git a/docs-website/vercel-setup.sh b/docs-website/vercel-setup.sh
index 4bb40eaddf4775..e9ba87b75be779 100755
--- a/docs-website/vercel-setup.sh
+++ b/docs-website/vercel-setup.sh
@@ -5,8 +5,8 @@ set -euxo pipefail
 ./metadata-ingestion/scripts/install_deps.sh
 
 # Set up java version for gradle
-yum install java-17-amazon-corretto -y
-java --version
+yum install java-17-amazon-corretto-devel -y
+javac --version
 
 # Build python from source.
 # Amazon Linux 2 has Python 3.8, but it's version of OpenSSL is super old and hence it
diff --git a/docs/cli.md b/docs/cli.md
index c633b7f4a38ad3..1c38077d0d12ef 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -115,6 +115,19 @@ datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml --dry-run
 datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml -n
 ```
 
+#### ingest --list-source-runs
+
+The `--list-source-runs` option of the `ingest` command lists the previous runs, displaying their run ID, source name, 
+start time, status, and source URN. This command allows you to filter results using the --urn option for URN-based 
+filtering or the --source option to filter by source name (partial or complete matches are supported).
+
+```shell
+# List all ingestion runs
+datahub ingest --list-source-runs
+# Filter runs by a source name containing "demo"
+datahub ingest --list-source-runs --source "demo"
+```
+
 #### ingest --preview
 
 The `--preview` option of the `ingest` command performs all of the ingestion steps, but limits the processing to only the first 10 workunits produced by the source.
diff --git a/docs/how/delete-metadata.md b/docs/how/delete-metadata.md
index f720a66ce57652..e36940bf398356 100644
--- a/docs/how/delete-metadata.md
+++ b/docs/how/delete-metadata.md
@@ -4,7 +4,7 @@
 To follow this guide, you'll need the [DataHub CLI](../cli.md).
 :::
 
-There are a two ways to delete metadata from DataHub:
+There are two ways to delete metadata from DataHub:
 
 1. Delete metadata attached to entities by providing a specific urn or filters that identify a set of urns (delete CLI).
 2. Delete metadata created by a single ingestion run (rollback).
@@ -233,7 +233,13 @@ To view the ids of the most recent set of ingestion batches, execute
 datahub ingest list-runs
 ```
 
-That will print out a table of all the runs. Once you have an idea of which run you want to roll back, run
+That will print out a table of all the runs. To see run statuses or to filter runs by URN/source run
+
+```shell
+datahub ingest list-source-runs
+```
+
+Once you have an idea of which run you want to roll back, run
 
 ```shell
 datahub ingest show --run-id <run-id>
diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md
index 72b5cbf57592d3..345213a0672d37 100644
--- a/docs/lineage/airflow.md
+++ b/docs/lineage/airflow.md
@@ -339,6 +339,37 @@ TypeError: on_task_instance_success() missing 3 required positional arguments: '
 
 The solution is to upgrade `acryl-datahub-airflow-plugin>=0.12.0.4` or upgrade `pluggy>=1.2.0`. See this [PR](https://github.com/datahub-project/datahub/pull/9365) for details.
 
+### Disabling the DataHub Plugin v2
+
+There are two ways to disable the DataHub Plugin v2:
+
+#### 1. Disable via Configuration
+
+Set the `datahub.enabled` configuration property to `False` in the `airflow.cfg` file and restart the Airflow environment to reload the configuration and disable the plugin.
+
+```ini title="airflow.cfg"
+[datahub]
+enabled = False
+```
+
+#### 2. Disable via Airflow Variable (Kill-Switch)
+
+If a restart is not possible and you need a faster way to disable the plugin, you can use the kill-switch. Create and set the `datahub_airflow_plugin_disable_listener` Airflow variable to `true`. This ensures that the listener won't process anything.
+
+#### Command Line
+
+```shell
+airflow variables set datahub_airflow_plugin_disable_listener true
+```
+
+#### Airflow UI
+
+1. Go to Admin -> Variables.
+2. Click the "+" symbol to create a new variable.
+3. Set the key to `datahub_airflow_plugin_disable_listener` and the value to `true`.
+
+This will immediately disable the plugin without requiring a restart.
+
 ## Compatibility
 
 We no longer officially support Airflow <2.3. However, you can use older versions of `acryl-datahub-airflow-plugin` with older versions of Airflow.
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
index 77e799f752455c..375dd8cf8911e1 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
@@ -1,4 +1,38 @@
 package com.linkedin.metadata.aspect;
 
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.Aspect;
+import com.linkedin.metadata.models.registry.EmptyEntityRegistry;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nonnull;
+
 /** Responses can be cached based on application.yaml caching configuration for the EntityClient */
-public interface CachingAspectRetriever extends AspectRetriever {}
+public interface CachingAspectRetriever extends AspectRetriever {
+
+  CachingAspectRetriever EMPTY = new EmptyAspectRetriever();
+
+  class EmptyAspectRetriever implements CachingAspectRetriever {
+    @Nonnull
+    @Override
+    public Map<Urn, Map<String, Aspect>> getLatestAspectObjects(
+        Set<Urn> urns, Set<String> aspectNames) {
+      return Collections.emptyMap();
+    }
+
+    @Nonnull
+    @Override
+    public Map<Urn, Map<String, SystemAspect>> getLatestSystemAspects(
+        Map<Urn, Set<String>> urnAspectNames) {
+      return Collections.emptyMap();
+    }
+
+    @Nonnull
+    @Override
+    public EntityRegistry getEntityRegistry() {
+      return EmptyEntityRegistry.EMPTY;
+    }
+  }
+}
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
index f6858e7da4ba63..30a2c1eb9df8c1 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
@@ -4,6 +4,7 @@
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipFilter;
 import com.linkedin.metadata.query.filter.SortCriterion;
+import java.util.Collections;
 import java.util.List;
 import java.util.function.Function;
 import javax.annotation.Nonnull;
@@ -97,4 +98,26 @@ default void consumeRelatedEntities(
       }
     }
   }
+
+  GraphRetriever EMPTY = new EmptyGraphRetriever();
+
+  class EmptyGraphRetriever implements GraphRetriever {
+
+    @Nonnull
+    @Override
+    public RelatedEntitiesScrollResult scrollRelatedEntities(
+        @Nullable List<String> sourceTypes,
+        @Nonnull Filter sourceEntityFilter,
+        @Nullable List<String> destinationTypes,
+        @Nonnull Filter destinationEntityFilter,
+        @Nonnull List<String> relationshipTypes,
+        @Nonnull RelationshipFilter relationshipFilter,
+        @Nonnull List<SortCriterion> sortCriterion,
+        @Nullable String scrollId,
+        int count,
+        @Nullable Long startTimeMillis,
+        @Nullable Long endTimeMillis) {
+      return new RelatedEntitiesScrollResult(0, 0, null, Collections.emptyList());
+    }
+  }
 }
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilder.java
index 6fffb17521ddb7..14fc92a1bf3c86 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilder.java
@@ -15,6 +15,8 @@
 import com.linkedin.metadata.aspect.patch.PatchOperationType;
 import com.linkedin.metadata.graph.LineageDirection;
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.tuple.ImmutableTriple;
 
 public class DataJobInputOutputPatchBuilder
@@ -24,6 +26,7 @@ public class DataJobInputOutputPatchBuilder
   private static final String OUTPUT_DATASET_EDGES_PATH_START = "/outputDatasetEdges/";
   private static final String INPUT_DATASET_FIELDS_PATH_START = "/inputDatasetFields/";
   private static final String OUTPUT_DATASET_FIELDS_PATH_START = "/outputDatasetFields/";
+  private static final String FINE_GRAINED_PATH_START = "/fineGrainedLineages/";
 
   // Simplified with just Urn
   public DataJobInputOutputPatchBuilder addInputDatajobEdge(@Nonnull DataJobUrn dataJobUrn) {
@@ -136,6 +139,103 @@ public DataJobInputOutputPatchBuilder addEdge(
     return this;
   }
 
+  /**
+   * Adds a field as a fine grained upstream
+   *
+   * @param upstreamSchemaField a schema field to be marked as upstream, format:
+   *     urn:li:schemaField(DATASET_URN, COLUMN NAME)
+   * @param confidenceScore optional, confidence score for the lineage edge. Defaults to 1.0 for
+   *     full confidence
+   * @param transformationOperation string operation type that describes the transformation
+   *     operation happening in the lineage edge
+   * @param downstreamSchemaField the downstream schema field this upstream is derived from, format:
+   *     urn:li:schemaField(DATASET_URN, COLUMN NAME)
+   * @param queryUrn query urn the relationship is derived from
+   * @return this builder
+   */
+  public DataJobInputOutputPatchBuilder addFineGrainedUpstreamField(
+      @Nonnull Urn upstreamSchemaField,
+      @Nullable Float confidenceScore,
+      @Nonnull String transformationOperation,
+      @Nonnull Urn downstreamSchemaField,
+      @Nullable Urn queryUrn) {
+    Float finalConfidenceScore = getConfidenceScoreOrDefault(confidenceScore);
+    String finalQueryUrn;
+    if (queryUrn == null || StringUtils.isBlank(queryUrn.toString())) {
+      finalQueryUrn = "NONE";
+    } else {
+      finalQueryUrn = queryUrn.toString();
+    }
+
+    ObjectNode fineGrainedLineageNode = instance.objectNode();
+    fineGrainedLineageNode.put("confidenceScore", instance.numberNode(finalConfidenceScore));
+    pathValues.add(
+        ImmutableTriple.of(
+            PatchOperationType.ADD.getValue(),
+            FINE_GRAINED_PATH_START
+                + transformationOperation
+                + "/"
+                + encodeValueUrn(downstreamSchemaField)
+                + "/"
+                + finalQueryUrn
+                + "/"
+                + encodeValueUrn(upstreamSchemaField),
+            fineGrainedLineageNode));
+
+    return this;
+  }
+
+  private Float getConfidenceScoreOrDefault(@Nullable Float confidenceScore) {
+    float finalConfidenceScore;
+    if (confidenceScore != null && confidenceScore > 0 && confidenceScore <= 1.0f) {
+      finalConfidenceScore = confidenceScore;
+    } else {
+      finalConfidenceScore = 1.0f;
+    }
+
+    return finalConfidenceScore;
+  }
+
+  /**
+   * Removes a field as a fine grained upstream
+   *
+   * @param upstreamSchemaField a schema field to be marked as upstream, format:
+   *     urn:li:schemaField(DATASET_URN, COLUMN NAME)
+   * @param transformationOperation string operation type that describes the transformation
+   *     operation happening in the lineage edge
+   * @param downstreamSchemaField the downstream schema field this upstream is derived from, format:
+   *     urn:li:schemaField(DATASET_URN, COLUMN NAME)
+   * @param queryUrn query urn the relationship is derived from
+   * @return this builder
+   */
+  public DataJobInputOutputPatchBuilder removeFineGrainedUpstreamField(
+      @Nonnull Urn upstreamSchemaField,
+      @Nonnull String transformationOperation,
+      @Nonnull Urn downstreamSchemaField,
+      @Nullable Urn queryUrn) {
+
+    String finalQueryUrn;
+    if (queryUrn == null || StringUtils.isBlank(queryUrn.toString())) {
+      finalQueryUrn = "NONE";
+    } else {
+      finalQueryUrn = queryUrn.toString();
+    }
+    pathValues.add(
+        ImmutableTriple.of(
+            PatchOperationType.REMOVE.getValue(),
+            FINE_GRAINED_PATH_START
+                + transformationOperation
+                + "/"
+                + encodeValueUrn(downstreamSchemaField)
+                + "/"
+                + finalQueryUrn
+                + "/"
+                + encodeValueUrn(upstreamSchemaField),
+            null));
+
+    return this;
+  }
+
   public DataJobInputOutputPatchBuilder removeEdge(
       @Nonnull Edge edge, @Nonnull LineageDirection direction) {
     String path = getEdgePath(edge, direction);
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java
index 08182761aeb03f..d0a46a35d51820 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java
@@ -142,7 +142,7 @@ public UpstreamLineagePatchBuilder removeFineGrainedUpstreamField(
             FINE_GRAINED_PATH_START
                 + transformationOperation
                 + "/"
-                + downstreamSchemaField
+                + encodeValueUrn(downstreamSchemaField)
                 + "/"
                 + finalQueryUrn
                 + "/"
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/FineGrainedLineageTemplateHelper.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/FineGrainedLineageTemplateHelper.java
new file mode 100644
index 00000000000000..1f6a58c52ba248
--- /dev/null
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/FineGrainedLineageTemplateHelper.java
@@ -0,0 +1,282 @@
+package com.linkedin.metadata.aspect.patch.template;
+
+import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*;
+import static com.linkedin.metadata.Constants.*;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.common.collect.Streams;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.codehaus.plexus.util.StringUtils;
+
+public class FineGrainedLineageTemplateHelper {
+
+  private static final String FINE_GRAINED_UPSTREAM_TYPE = "upstreamType";
+  private static final String FINE_GRAINED_UPSTREAMS = "upstreams";
+  private static final String FINE_GRAINED_DOWNSTREAM_TYPE = "downstreamType";
+  private static final String FINE_GRAINED_DOWNSTREAMS = "downstreams";
+  private static final String FINE_GRAINED_TRANSFORMATION_OPERATION = "transformOperation";
+  private static final String FINE_GRAINED_CONFIDENCE_SCORE = "confidenceScore";
+  private static final String FINE_GRAINED_QUERY_ID = "query";
+
+  // Template support
+  private static final String NONE_TRANSFORMATION_TYPE = "NONE";
+  private static final Float DEFAULT_CONFIDENCE_SCORE = 1.0f;
+  private static final String DEFAULT_QUERY_ID = "NONE";
+
+  /**
+   * Combines fine grained lineage array into a map using upstream and downstream types as keys,
+   * defaulting when not present. Due to this construction, patches will look like: path:
+   * /fineGrainedLineages/TRANSFORMATION_OPERATION/DOWNSTREAM_FIELD_URN/QUERY_ID/UPSTREAM_FIELD_URN,
+   * op: ADD/REMOVE, value: float (confidenceScore) Due to the way FineGrainedLineage was designed
+   * it doesn't necessarily have a consistent key we can reference, so this specialized method
+   * mimics the arrayFieldToMap of the super class with the specialization that it does not put the
+   * full value of the aspect at the end of the key, just the particular array. This prevents
+   * unintended overwrites through improper MCP construction that is technically allowed by the
+   * schema when combining under fields that form the natural key.
+   *
+   * @param fineGrainedLineages the fine grained lineage array node
+   * @return the modified {@link JsonNode} with array fields transformed to maps
+   */
+  public static JsonNode combineAndTransformFineGrainedLineages(
+      @Nullable JsonNode fineGrainedLineages) {
+    ObjectNode mapNode = instance.objectNode();
+    if (!(fineGrainedLineages instanceof ArrayNode) || fineGrainedLineages.isEmpty()) {
+      return mapNode;
+    }
+    JsonNode lineageCopy = fineGrainedLineages.deepCopy();
+
+    lineageCopy
+        .elements()
+        .forEachRemaining(
+            node -> {
+              JsonNode nodeClone = node.deepCopy();
+              String transformationOperation =
+                  nodeClone.has(FINE_GRAINED_TRANSFORMATION_OPERATION)
+                      ? nodeClone.get(FINE_GRAINED_TRANSFORMATION_OPERATION).asText()
+                      : NONE_TRANSFORMATION_TYPE;
+
+              if (!mapNode.has(transformationOperation)) {
+                mapNode.set(transformationOperation, instance.objectNode());
+              }
+              ObjectNode transformationOperationNode =
+                  (ObjectNode) mapNode.get(transformationOperation);
+
+              ArrayNode downstreams =
+                  nodeClone.has(FINE_GRAINED_DOWNSTREAMS)
+                      ? (ArrayNode) nodeClone.get(FINE_GRAINED_DOWNSTREAMS)
+                      : null;
+
+              if (downstreams == null || downstreams.size() != 1) {
+                throw new UnsupportedOperationException(
+                    "Patching not supported on fine grained lineages with not"
+                        + " exactly one downstream. Current fine grained lineage implementation is downstream derived and "
+                        + "patches are keyed on the root of this derivation.");
+              }
+
+              Float confidenceScore =
+                  nodeClone.has(FINE_GRAINED_CONFIDENCE_SCORE)
+                      ? nodeClone.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue()
+                      : DEFAULT_CONFIDENCE_SCORE;
+
+              String upstreamType =
+                  nodeClone.has(FINE_GRAINED_UPSTREAM_TYPE)
+                      ? nodeClone.get(FINE_GRAINED_UPSTREAM_TYPE).asText()
+                      : null;
+              String downstreamType =
+                  nodeClone.has(FINE_GRAINED_DOWNSTREAM_TYPE)
+                      ? nodeClone.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText()
+                      : null;
+              ArrayNode upstreams =
+                  nodeClone.has(FINE_GRAINED_UPSTREAMS)
+                      ? (ArrayNode) nodeClone.get(FINE_GRAINED_UPSTREAMS)
+                      : null;
+
+              String queryId =
+                  nodeClone.has(FINE_GRAINED_QUERY_ID)
+                      ? nodeClone.get(FINE_GRAINED_QUERY_ID).asText()
+                      : DEFAULT_QUERY_ID;
+
+              if (upstreamType == null) {
+                // Determine default type
+                Urn upstreamUrn =
+                    upstreams != null ? UrnUtils.getUrn(upstreams.get(0).asText()) : null;
+                if (upstreamUrn != null
+                    && DATASET_ENTITY_NAME.equals(upstreamUrn.getEntityType())) {
+                  upstreamType = FINE_GRAINED_LINEAGE_DATASET_TYPE;
+                } else {
+                  upstreamType = FINE_GRAINED_LINEAGE_FIELD_SET_TYPE;
+                }
+              }
+
+              if (downstreamType == null) {
+                // Always use FIELD type, only support patches for single field downstream
+                downstreamType = FINE_GRAINED_LINEAGE_FIELD_TYPE;
+              }
+
+              String downstreamRoot = downstreams.get(0).asText();
+              if (!transformationOperationNode.has(downstreamRoot)) {
+                transformationOperationNode.set(downstreamRoot, instance.objectNode());
+              }
+              ObjectNode downstreamRootNode =
+                  (ObjectNode) transformationOperationNode.get(downstreamRoot);
+              if (!downstreamRootNode.has(queryId)) {
+                downstreamRootNode.set(queryId, instance.objectNode());
+              }
+              ObjectNode queryNode = (ObjectNode) downstreamRootNode.get(queryId);
+              if (upstreams != null) {
+                addUrnsToParent(
+                    queryNode, upstreams, confidenceScore, upstreamType, downstreamType);
+              }
+            });
+    return mapNode;
+  }
+
+  private static void addUrnsToParent(
+      JsonNode parentNode,
+      ArrayNode urnsList,
+      Float confidenceScore,
+      String upstreamType,
+      String downstreamType) {
+    // Will overwrite repeat urns with different confidence scores with the most recently seen
+    ((ObjectNode) parentNode)
+        .setAll(
+            Streams.stream(urnsList.elements())
+                .map(JsonNode::asText)
+                .distinct()
+                .collect(
+                    Collectors.toMap(
+                        urn -> urn,
+                        urn ->
+                            mapToLineageValueNode(confidenceScore, upstreamType, downstreamType))));
+  }
+
+  private static JsonNode mapToLineageValueNode(
+      Float confidenceScore, String upstreamType, String downstreamType) {
+    ObjectNode objectNode = instance.objectNode();
+    objectNode.set(FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(confidenceScore));
+    objectNode.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType));
+    objectNode.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType));
+    return objectNode;
+  }
+
+  /**
+   * Takes the transformed fine grained lineages map from pre-processing and reconstructs an array
+   * of FineGrainedLineages Avoids producing side effects by copying nodes, use resulting node and
+   * not the original
+   *
+   * @param transformedFineGrainedLineages the transformed fine grained lineage map
+   * @return the modified {@link JsonNode} formatted consistent with the original schema
+   */
+  public static ArrayNode reconstructFineGrainedLineages(JsonNode transformedFineGrainedLineages) {
+    if (transformedFineGrainedLineages instanceof ArrayNode) {
+      // We already have an ArrayNode, no need to transform. This happens during `replace`
+      // operations
+      return (ArrayNode) transformedFineGrainedLineages;
+    }
+    ObjectNode mapNode = (ObjectNode) transformedFineGrainedLineages;
+    ArrayNode fineGrainedLineages = instance.arrayNode();
+
+    mapNode
+        .fieldNames()
+        .forEachRemaining(
+            transformationOperation -> {
+              final ObjectNode transformationOperationNode =
+                  (ObjectNode) mapNode.get(transformationOperation);
+              transformationOperationNode
+                  .fieldNames()
+                  .forEachRemaining(
+                      downstreamName -> {
+                        final ObjectNode downstreamNode =
+                            (ObjectNode) transformationOperationNode.get(downstreamName);
+                        downstreamNode
+                            .fieldNames()
+                            .forEachRemaining(
+                                queryId ->
+                                    buildFineGrainedLineage(
+                                        downstreamName,
+                                        downstreamNode,
+                                        queryId,
+                                        transformationOperation,
+                                        fineGrainedLineages));
+                      });
+            });
+
+    return fineGrainedLineages;
+  }
+
+  private static void buildFineGrainedLineage(
+      final String downstreamName,
+      final ObjectNode downstreamNode,
+      final String queryId,
+      final String transformationOperation,
+      final ArrayNode fineGrainedLineages) {
+    final ObjectNode fineGrainedLineage = instance.objectNode();
+    final ObjectNode queryNode = (ObjectNode) downstreamNode.get(queryId);
+    if (queryNode.isEmpty()) {
+      // Short circuit if no upstreams left
+      return;
+    }
+    ArrayNode downstream = instance.arrayNode();
+    downstream.add(instance.textNode(downstreamName));
+    // Set defaults, if found in sub nodes override, for confidenceScore take lowest
+    AtomicReference<Float> minimumConfidenceScore = new AtomicReference<>(DEFAULT_CONFIDENCE_SCORE);
+    AtomicReference<String> upstreamType =
+        new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_SET_TYPE);
+    AtomicReference<String> downstreamType = new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_TYPE);
+    ArrayNode upstreams = instance.arrayNode();
+    queryNode
+        .fieldNames()
+        .forEachRemaining(
+            upstream ->
+                processUpstream(
+                    queryNode,
+                    upstream,
+                    minimumConfidenceScore,
+                    upstreamType,
+                    downstreamType,
+                    upstreams));
+    fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAMS, downstream);
+    fineGrainedLineage.set(FINE_GRAINED_UPSTREAMS, upstreams);
+    if (StringUtils.isNotBlank(queryId) && !DEFAULT_QUERY_ID.equals(queryId)) {
+      fineGrainedLineage.set(FINE_GRAINED_QUERY_ID, instance.textNode(queryId));
+    }
+    fineGrainedLineage.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType.get()));
+    fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType.get()));
+    fineGrainedLineage.set(
+        FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(minimumConfidenceScore.get()));
+    fineGrainedLineage.set(
+        FINE_GRAINED_TRANSFORMATION_OPERATION, instance.textNode(transformationOperation));
+    fineGrainedLineages.add(fineGrainedLineage);
+  }
+
+  private static void processUpstream(
+      final ObjectNode queryNode,
+      final String upstream,
+      final AtomicReference<Float> minimumConfidenceScore,
+      final AtomicReference<String> upstreamType,
+      final AtomicReference<String> downstreamType,
+      final ArrayNode upstreams) {
+    final ObjectNode upstreamNode = (ObjectNode) queryNode.get(upstream);
+    if (upstreamNode.has(FINE_GRAINED_CONFIDENCE_SCORE)) {
+      Float scoreValue = upstreamNode.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue();
+      if (scoreValue <= minimumConfidenceScore.get()) {
+        minimumConfidenceScore.set(scoreValue);
+      }
+    }
+    // Set types to last encountered, should never change, but this at least tries to support
+    // other types being specified.
+    if (upstreamNode.has(FINE_GRAINED_UPSTREAM_TYPE)) {
+      upstreamType.set(upstreamNode.get(FINE_GRAINED_UPSTREAM_TYPE).asText());
+    }
+    if (upstreamNode.has(FINE_GRAINED_DOWNSTREAM_TYPE)) {
+      downstreamType.set(upstreamNode.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText());
+    }
+    upstreams.add(instance.textNode(upstream));
+  }
+}
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java
index 2423e37e6d5419..23879ad1c2e353 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java
@@ -84,7 +84,7 @@ public static JsonNode populateTopLevelKeys(JsonNode transformedNode, JsonPatch
       // Skip first as it will always be blank due to path starting with /
       for (int i = 1; i < endIdx; i++) {
         String decodedKey = decodeValue(keys[i]);
-        if (parent.get(keys[i]) == null) {
+        if (parent.get(decodedKey) == null) {
           ((ObjectNode) parent).set(decodedKey, instance.objectNode());
         }
         parent = parent.get(decodedKey);
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/datajob/DataJobInputOutputTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/datajob/DataJobInputOutputTemplate.java
index 3d398d97b50c38..ef26eed2f814f8 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/datajob/DataJobInputOutputTemplate.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/datajob/DataJobInputOutputTemplate.java
@@ -1,6 +1,10 @@
 package com.linkedin.metadata.aspect.patch.template.datajob;
 
+import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*;
+import static com.linkedin.metadata.Constants.*;
+
 import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.linkedin.common.DataJobUrnArray;
 import com.linkedin.common.DatasetUrnArray;
 import com.linkedin.common.EdgeArray;
@@ -9,6 +13,7 @@
 import com.linkedin.datajob.DataJobInputOutput;
 import com.linkedin.dataset.FineGrainedLineageArray;
 import com.linkedin.metadata.aspect.patch.template.ArrayMergingTemplate;
+import com.linkedin.metadata.aspect.patch.template.FineGrainedLineageTemplateHelper;
 import java.util.Collections;
 import javax.annotation.Nonnull;
 
@@ -23,6 +28,8 @@ public class DataJobInputOutputTemplate implements ArrayMergingTemplate<DataJobI
   private static final String INPUT_DATASET_FIELDS_FIELD_NAME = "inputDatasetFields";
   private static final String OUTPUT_DATASET_FIELDS_FIELD_NAME = "outputDatasetFields";
 
+  private static final String FINE_GRAINED_LINEAGES_FIELD_NAME = "fineGrainedLineages";
+
   @Override
   public DataJobInputOutput getSubtype(RecordTemplate recordTemplate) throws ClassCastException {
     if (recordTemplate instanceof DataJobInputOutput) {
@@ -81,6 +88,12 @@ public JsonNode transformFields(JsonNode baseNode) {
     transformedNode =
         arrayFieldToMap(transformedNode, OUTPUT_DATASET_FIELDS_FIELD_NAME, Collections.emptyList());
 
+    ((ObjectNode) transformedNode)
+        .set(
+            FINE_GRAINED_LINEAGES_FIELD_NAME,
+            FineGrainedLineageTemplateHelper.combineAndTransformFineGrainedLineages(
+                transformedNode.get(FINE_GRAINED_LINEAGES_FIELD_NAME)));
+
     return transformedNode;
   }
 
@@ -112,6 +125,12 @@ public JsonNode rebaseFields(JsonNode patched) {
         transformedMapToArray(
             rebasedNode, OUTPUT_DATASET_FIELDS_FIELD_NAME, Collections.emptyList());
 
+    ((ObjectNode) rebasedNode)
+        .set(
+            FINE_GRAINED_LINEAGES_FIELD_NAME,
+            FineGrainedLineageTemplateHelper.reconstructFineGrainedLineages(
+                rebasedNode.get(FINE_GRAINED_LINEAGES_FIELD_NAME)));
+
     return rebasedNode;
   }
 }
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/dataset/UpstreamLineageTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/dataset/UpstreamLineageTemplate.java
index e5da7551ac1a44..f28b35e3a5f4f2 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/dataset/UpstreamLineageTemplate.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/dataset/UpstreamLineageTemplate.java
@@ -1,25 +1,17 @@
 package com.linkedin.metadata.aspect.patch.template.dataset;
 
-import static com.fasterxml.jackson.databind.node.JsonNodeFactory.instance;
 import static com.linkedin.metadata.Constants.*;
 
 import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.node.ArrayNode;
 import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.google.common.collect.Streams;
-import com.linkedin.common.urn.Urn;
-import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.data.template.RecordTemplate;
 import com.linkedin.dataset.FineGrainedLineageArray;
 import com.linkedin.dataset.UpstreamArray;
 import com.linkedin.dataset.UpstreamLineage;
 import com.linkedin.metadata.aspect.patch.template.CompoundKeyTemplate;
+import com.linkedin.metadata.aspect.patch.template.FineGrainedLineageTemplateHelper;
 import java.util.Collections;
-import java.util.concurrent.atomic.AtomicReference;
-import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import org.codehaus.plexus.util.StringUtils;
 
 public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage> {
 
@@ -27,18 +19,6 @@ public class UpstreamLineageTemplate extends CompoundKeyTemplate<UpstreamLineage
   private static final String UPSTREAMS_FIELD_NAME = "upstreams";
   private static final String DATASET_FIELD_NAME = "dataset";
   private static final String FINE_GRAINED_LINEAGES_FIELD_NAME = "fineGrainedLineages";
-  private static final String FINE_GRAINED_UPSTREAM_TYPE = "upstreamType";
-  private static final String FINE_GRAINED_UPSTREAMS = "upstreams";
-  private static final String FINE_GRAINED_DOWNSTREAM_TYPE = "downstreamType";
-  private static final String FINE_GRAINED_DOWNSTREAMS = "downstreams";
-  private static final String FINE_GRAINED_TRANSFORMATION_OPERATION = "transformOperation";
-  private static final String FINE_GRAINED_CONFIDENCE_SCORE = "confidenceScore";
-  private static final String FINE_GRAINED_QUERY_ID = "query";
-
-  // Template support
-  private static final String NONE_TRANSFORMATION_TYPE = "NONE";
-  private static final Float DEFAULT_CONFIDENCE_SCORE = 1.0f;
-  private static final String DEFAULT_QUERY_ID = "NONE";
 
   @Override
   public UpstreamLineage getSubtype(RecordTemplate recordTemplate) throws ClassCastException {
@@ -72,7 +52,7 @@ public JsonNode transformFields(JsonNode baseNode) {
     ((ObjectNode) transformedNode)
         .set(
             FINE_GRAINED_LINEAGES_FIELD_NAME,
-            combineAndTransformFineGrainedLineages(
+            FineGrainedLineageTemplateHelper.combineAndTransformFineGrainedLineages(
                 transformedNode.get(FINE_GRAINED_LINEAGES_FIELD_NAME)));
 
     return transformedNode;
@@ -87,257 +67,8 @@ public JsonNode rebaseFields(JsonNode patched) {
     ((ObjectNode) rebasedNode)
         .set(
             FINE_GRAINED_LINEAGES_FIELD_NAME,
-            reconstructFineGrainedLineages(rebasedNode.get(FINE_GRAINED_LINEAGES_FIELD_NAME)));
+            FineGrainedLineageTemplateHelper.reconstructFineGrainedLineages(
+                rebasedNode.get(FINE_GRAINED_LINEAGES_FIELD_NAME)));
     return rebasedNode;
   }
-
-  /**
-   * Combines fine grained lineage array into a map using upstream and downstream types as keys,
-   * defaulting when not present. Due to this construction, patches will look like: path:
-   * /fineGrainedLineages/TRANSFORMATION_OPERATION/DOWNSTREAM_FIELD_URN/QUERY_ID/UPSTREAM_FIELD_URN,
-   * op: ADD/REMOVE, value: float (confidenceScore) Due to the way FineGrainedLineage was designed
-   * it doesn't necessarily have a consistent key we can reference, so this specialized method
-   * mimics the arrayFieldToMap of the super class with the specialization that it does not put the
-   * full value of the aspect at the end of the key, just the particular array. This prevents
-   * unintended overwrites through improper MCP construction that is technically allowed by the
-   * schema when combining under fields that form the natural key.
-   *
-   * @param fineGrainedLineages the fine grained lineage array node
-   * @return the modified {@link JsonNode} with array fields transformed to maps
-   */
-  private JsonNode combineAndTransformFineGrainedLineages(@Nullable JsonNode fineGrainedLineages) {
-    ObjectNode mapNode = instance.objectNode();
-    if (!(fineGrainedLineages instanceof ArrayNode) || fineGrainedLineages.isEmpty()) {
-      return mapNode;
-    }
-    JsonNode lineageCopy = fineGrainedLineages.deepCopy();
-
-    lineageCopy
-        .elements()
-        .forEachRemaining(
-            node -> {
-              JsonNode nodeClone = node.deepCopy();
-              String transformationOperation =
-                  nodeClone.has(FINE_GRAINED_TRANSFORMATION_OPERATION)
-                      ? nodeClone.get(FINE_GRAINED_TRANSFORMATION_OPERATION).asText()
-                      : NONE_TRANSFORMATION_TYPE;
-
-              if (!mapNode.has(transformationOperation)) {
-                mapNode.set(transformationOperation, instance.objectNode());
-              }
-              ObjectNode transformationOperationNode =
-                  (ObjectNode) mapNode.get(transformationOperation);
-
-              ArrayNode downstreams =
-                  nodeClone.has(FINE_GRAINED_DOWNSTREAMS)
-                      ? (ArrayNode) nodeClone.get(FINE_GRAINED_DOWNSTREAMS)
-                      : null;
-
-              if (downstreams == null || downstreams.size() != 1) {
-                throw new UnsupportedOperationException(
-                    "Patching not supported on fine grained lineages with not"
-                        + " exactly one downstream. Current fine grained lineage implementation is downstream derived and "
-                        + "patches are keyed on the root of this derivation.");
-              }
-
-              Float confidenceScore =
-                  nodeClone.has(FINE_GRAINED_CONFIDENCE_SCORE)
-                      ? nodeClone.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue()
-                      : DEFAULT_CONFIDENCE_SCORE;
-
-              String upstreamType =
-                  nodeClone.has(FINE_GRAINED_UPSTREAM_TYPE)
-                      ? nodeClone.get(FINE_GRAINED_UPSTREAM_TYPE).asText()
-                      : null;
-              String downstreamType =
-                  nodeClone.has(FINE_GRAINED_DOWNSTREAM_TYPE)
-                      ? nodeClone.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText()
-                      : null;
-              ArrayNode upstreams =
-                  nodeClone.has(FINE_GRAINED_UPSTREAMS)
-                      ? (ArrayNode) nodeClone.get(FINE_GRAINED_UPSTREAMS)
-                      : null;
-
-              String queryId =
-                  nodeClone.has(FINE_GRAINED_QUERY_ID)
-                      ? nodeClone.get(FINE_GRAINED_QUERY_ID).asText()
-                      : DEFAULT_QUERY_ID;
-
-              if (upstreamType == null) {
-                // Determine default type
-                Urn upstreamUrn =
-                    upstreams != null ? UrnUtils.getUrn(upstreams.get(0).asText()) : null;
-                if (upstreamUrn != null
-                    && DATASET_ENTITY_NAME.equals(upstreamUrn.getEntityType())) {
-                  upstreamType = FINE_GRAINED_LINEAGE_DATASET_TYPE;
-                } else {
-                  upstreamType = FINE_GRAINED_LINEAGE_FIELD_SET_TYPE;
-                }
-              }
-
-              if (downstreamType == null) {
-                // Always use FIELD type, only support patches for single field downstream
-                downstreamType = FINE_GRAINED_LINEAGE_FIELD_TYPE;
-              }
-
-              String downstreamRoot = downstreams.get(0).asText();
-              if (!transformationOperationNode.has(downstreamRoot)) {
-                transformationOperationNode.set(downstreamRoot, instance.objectNode());
-              }
-              ObjectNode downstreamRootNode =
-                  (ObjectNode) transformationOperationNode.get(downstreamRoot);
-              if (!downstreamRootNode.has(queryId)) {
-                downstreamRootNode.set(queryId, instance.objectNode());
-              }
-              ObjectNode queryNode = (ObjectNode) downstreamRootNode.get(queryId);
-              if (upstreams != null) {
-                addUrnsToParent(
-                    queryNode, upstreams, confidenceScore, upstreamType, downstreamType);
-              }
-            });
-    return mapNode;
-  }
-
-  private void addUrnsToParent(
-      JsonNode parentNode,
-      ArrayNode urnsList,
-      Float confidenceScore,
-      String upstreamType,
-      String downstreamType) {
-    // Will overwrite repeat urns with different confidence scores with the most recently seen
-    ((ObjectNode) parentNode)
-        .setAll(
-            Streams.stream(urnsList.elements())
-                .map(JsonNode::asText)
-                .distinct()
-                .collect(
-                    Collectors.toMap(
-                        urn -> urn,
-                        urn ->
-                            mapToLineageValueNode(confidenceScore, upstreamType, downstreamType))));
-  }
-
-  private JsonNode mapToLineageValueNode(
-      Float confidenceScore, String upstreamType, String downstreamType) {
-    ObjectNode objectNode = instance.objectNode();
-    objectNode.set(FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(confidenceScore));
-    objectNode.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType));
-    objectNode.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType));
-    return objectNode;
-  }
-
-  /**
-   * Takes the transformed fine grained lineages map from pre-processing and reconstructs an array
-   * of FineGrainedLineages Avoids producing side effects by copying nodes, use resulting node and
-   * not the original
-   *
-   * @param transformedFineGrainedLineages the transformed fine grained lineage map
-   * @return the modified {@link JsonNode} formatted consistent with the original schema
-   */
-  private ArrayNode reconstructFineGrainedLineages(JsonNode transformedFineGrainedLineages) {
-    if (transformedFineGrainedLineages instanceof ArrayNode) {
-      // We already have an ArrayNode, no need to transform. This happens during `replace`
-      // operations
-      return (ArrayNode) transformedFineGrainedLineages;
-    }
-    ObjectNode mapNode = (ObjectNode) transformedFineGrainedLineages;
-    ArrayNode fineGrainedLineages = instance.arrayNode();
-
-    mapNode
-        .fieldNames()
-        .forEachRemaining(
-            transformationOperation -> {
-              final ObjectNode transformationOperationNode =
-                  (ObjectNode) mapNode.get(transformationOperation);
-              transformationOperationNode
-                  .fieldNames()
-                  .forEachRemaining(
-                      downstreamName -> {
-                        final ObjectNode downstreamNode =
-                            (ObjectNode) transformationOperationNode.get(downstreamName);
-                        downstreamNode
-                            .fieldNames()
-                            .forEachRemaining(
-                                queryId ->
-                                    buildFineGrainedLineage(
-                                        downstreamName,
-                                        downstreamNode,
-                                        queryId,
-                                        transformationOperation,
-                                        fineGrainedLineages));
-                      });
-            });
-
-    return fineGrainedLineages;
-  }
-
-  private void buildFineGrainedLineage(
-      final String downstreamName,
-      final ObjectNode downstreamNode,
-      final String queryId,
-      final String transformationOperation,
-      final ArrayNode fineGrainedLineages) {
-    final ObjectNode fineGrainedLineage = instance.objectNode();
-    final ObjectNode queryNode = (ObjectNode) downstreamNode.get(queryId);
-    if (queryNode.isEmpty()) {
-      // Short circuit if no upstreams left
-      return;
-    }
-    ArrayNode downstream = instance.arrayNode();
-    downstream.add(instance.textNode(downstreamName));
-    // Set defaults, if found in sub nodes override, for confidenceScore take lowest
-    AtomicReference<Float> minimumConfidenceScore = new AtomicReference<>(DEFAULT_CONFIDENCE_SCORE);
-    AtomicReference<String> upstreamType =
-        new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_SET_TYPE);
-    AtomicReference<String> downstreamType = new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_TYPE);
-    ArrayNode upstreams = instance.arrayNode();
-    queryNode
-        .fieldNames()
-        .forEachRemaining(
-            upstream ->
-                processUpstream(
-                    queryNode,
-                    upstream,
-                    minimumConfidenceScore,
-                    upstreamType,
-                    downstreamType,
-                    upstreams));
-    fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAMS, downstream);
-    fineGrainedLineage.set(FINE_GRAINED_UPSTREAMS, upstreams);
-    if (StringUtils.isNotBlank(queryId) && !DEFAULT_QUERY_ID.equals(queryId)) {
-      fineGrainedLineage.set(FINE_GRAINED_QUERY_ID, instance.textNode(queryId));
-    }
-    fineGrainedLineage.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType.get()));
-    fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType.get()));
-    fineGrainedLineage.set(
-        FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(minimumConfidenceScore.get()));
-    fineGrainedLineage.set(
-        FINE_GRAINED_TRANSFORMATION_OPERATION, instance.textNode(transformationOperation));
-    fineGrainedLineages.add(fineGrainedLineage);
-  }
-
-  private void processUpstream(
-      final ObjectNode queryNode,
-      final String upstream,
-      final AtomicReference<Float> minimumConfidenceScore,
-      final AtomicReference<String> upstreamType,
-      final AtomicReference<String> downstreamType,
-      final ArrayNode upstreams) {
-    final ObjectNode upstreamNode = (ObjectNode) queryNode.get(upstream);
-    if (upstreamNode.has(FINE_GRAINED_CONFIDENCE_SCORE)) {
-      Float scoreValue = upstreamNode.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue();
-      if (scoreValue <= minimumConfidenceScore.get()) {
-        minimumConfidenceScore.set(scoreValue);
-      }
-    }
-    // Set types to last encountered, should never change, but this at least tries to support
-    // other types being specified.
-    if (upstreamNode.has(FINE_GRAINED_UPSTREAM_TYPE)) {
-      upstreamType.set(upstreamNode.get(FINE_GRAINED_UPSTREAM_TYPE).asText());
-    }
-    if (upstreamNode.has(FINE_GRAINED_DOWNSTREAM_TYPE)) {
-      downstreamType.set(upstreamNode.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText());
-    }
-    upstreams.add(instance.textNode(upstream));
-  }
 }
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
index eaa106b8d1f638..d4894c97015f8f 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
@@ -2,6 +2,7 @@
 
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.search.ScrollResult;
+import com.linkedin.metadata.search.SearchEntityArray;
 import java.util.List;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -21,4 +22,22 @@ ScrollResult scroll(
       @Nullable Filter filters,
       @Nullable String scrollId,
       int count);
+
+  SearchRetriever EMPTY = new EmptySearchRetriever();
+
+  class EmptySearchRetriever implements SearchRetriever {
+
+    @Override
+    public ScrollResult scroll(
+        @Nonnull List<String> entities,
+        @Nullable Filter filters,
+        @Nullable String scrollId,
+        int count) {
+      ScrollResult empty = new ScrollResult();
+      empty.setEntities(new SearchEntityArray());
+      empty.setNumEntities(0);
+      empty.setPageSize(0);
+      return empty;
+    }
+  }
 }
diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DataJobInputOutputTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DataJobInputOutputTemplateTest.java
new file mode 100644
index 00000000000000..d2a26221a3bb9f
--- /dev/null
+++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DataJobInputOutputTemplateTest.java
@@ -0,0 +1,255 @@
+package com.linkedin.metadata.aspect.patch.template;
+
+import static com.linkedin.metadata.utils.GenericRecordUtils.*;
+import static org.testng.Assert.*;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.linkedin.common.UrnArray;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.data.DataMap;
+import com.linkedin.datajob.DataJobInputOutput;
+import com.linkedin.dataset.FineGrainedLineage;
+import com.linkedin.dataset.FineGrainedLineageDownstreamType;
+import com.linkedin.dataset.FineGrainedLineageUpstreamType;
+import com.linkedin.metadata.aspect.patch.template.datajob.DataJobInputOutputTemplate;
+import jakarta.json.Json;
+import jakarta.json.JsonObjectBuilder;
+import jakarta.json.JsonPatch;
+import jakarta.json.JsonPatchBuilder;
+import jakarta.json.JsonValue;
+import org.testng.annotations.Test;
+
+public class DataJobInputOutputTemplateTest {
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+  @Test
+  public void testPatchUpstream() throws Exception {
+    DataJobInputOutputTemplate dataJobInputOutputTemplate = new DataJobInputOutputTemplate();
+    DataJobInputOutput dataJobInputOutput = dataJobInputOutputTemplate.getDefault();
+    JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder();
+
+    JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder();
+    JsonValue upstreamConfidenceScore = Json.createValue(1.0f);
+    fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore);
+    jsonPatchBuilder.add(
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)//urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)",
+        fineGrainedLineageNode.build());
+
+    // Initial population test
+    DataJobInputOutput result =
+        dataJobInputOutputTemplate.applyPatch(dataJobInputOutput, jsonPatchBuilder.build());
+    // Hack because Jackson parses values to doubles instead of floats
+    DataMap dataMap = new DataMap();
+    dataMap.put("confidenceScore", 1.0);
+    FineGrainedLineage fineGrainedLineage = new FineGrainedLineage(dataMap);
+    UrnArray urns = new UrnArray();
+    Urn urn1 =
+        UrnUtils.getUrn(
+            "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)");
+    urns.add(urn1);
+    UrnArray upstreams = new UrnArray();
+    Urn upstreamUrn =
+        UrnUtils.getUrn(
+            "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)");
+    upstreams.add(upstreamUrn);
+    fineGrainedLineage.setDownstreams(urns);
+    fineGrainedLineage.setUpstreams(upstreams);
+    fineGrainedLineage.setTransformOperation("CREATE");
+    fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
+    fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
+    assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage);
+
+    // Test non-overwrite upstreams and correct confidence score and types w/ overwrite
+    JsonObjectBuilder finegrainedLineageNode2 = Json.createObjectBuilder();
+    finegrainedLineageNode2.add(
+        "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name()));
+    finegrainedLineageNode2.add("confidenceScore", upstreamConfidenceScore);
+    finegrainedLineageNode2.add(
+        "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name()));
+
+    JsonPatchBuilder patchOperations2 = Json.createPatchBuilder();
+    patchOperations2.add(
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)",
+        finegrainedLineageNode2.build());
+
+    JsonValue upstreamConfidenceScore2 = Json.createValue(0.1f);
+    JsonObjectBuilder finegrainedLineageNode3 = Json.createObjectBuilder();
+    finegrainedLineageNode3.add(
+        "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.DATASET.name()));
+    finegrainedLineageNode3.add("confidenceScore", upstreamConfidenceScore2);
+    finegrainedLineageNode3.add(
+        "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD_SET.name()));
+
+    patchOperations2.add(
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)",
+        finegrainedLineageNode3.build());
+
+    JsonPatch jsonPatch2 = patchOperations2.build();
+
+    DataJobInputOutput result2 = dataJobInputOutputTemplate.applyPatch(result, jsonPatch2);
+    // Hack because Jackson parses values to doubles instead of floats
+    DataMap dataMap2 = new DataMap();
+    dataMap2.put("confidenceScore", 0.1);
+    FineGrainedLineage fineGrainedLineage2 = new FineGrainedLineage(dataMap2);
+    UrnArray urns2 = new UrnArray();
+    Urn urn2 =
+        UrnUtils.getUrn(
+            "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+    urns2.add(urn2);
+    Urn downstreamUrn2 =
+        UrnUtils.getUrn(
+            "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)");
+    UrnArray downstreams2 = new UrnArray();
+    downstreams2.add(downstreamUrn2);
+    fineGrainedLineage2.setUpstreams(urns2);
+    fineGrainedLineage2.setDownstreams(downstreams2);
+    fineGrainedLineage2.setTransformOperation("CREATE");
+    fineGrainedLineage2.setUpstreamType(FineGrainedLineageUpstreamType.DATASET);
+    fineGrainedLineage2.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
+    fineGrainedLineage2.setQuery(UrnUtils.getUrn("urn:li:query:someQuery"));
+    assertEquals(result2.getFineGrainedLineages().get(1), fineGrainedLineage2);
+
+    // Check different queries
+    JsonObjectBuilder finegrainedLineageNode4 = Json.createObjectBuilder();
+    finegrainedLineageNode4.add(
+        "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name()));
+    finegrainedLineageNode4.add("confidenceScore", upstreamConfidenceScore);
+    finegrainedLineageNode4.add(
+        "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name()));
+
+    JsonPatchBuilder patchOperations3 = Json.createPatchBuilder();
+    patchOperations3.add(
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)",
+        finegrainedLineageNode4.build());
+
+    JsonPatch jsonPatch3 = patchOperations3.build();
+    DataJobInputOutput result3 = dataJobInputOutputTemplate.applyPatch(result2, jsonPatch3);
+    // Hack because Jackson parses values to doubles instead of floats
+    DataMap dataMap3 = new DataMap();
+    dataMap3.put("confidenceScore", 1.0);
+    FineGrainedLineage fineGrainedLineage3 = new FineGrainedLineage(dataMap3);
+    UrnArray urns3 = new UrnArray();
+    Urn urn3 =
+        UrnUtils.getUrn(
+            "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)");
+    urns3.add(urn3);
+
+    Urn upstreamUrn3 =
+        UrnUtils.getUrn(
+            "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+    UrnArray upstreamUrns3 = new UrnArray();
+    upstreamUrns3.add(upstreamUrn3);
+    fineGrainedLineage3.setDownstreams(urns3);
+    fineGrainedLineage3.setUpstreams(upstreamUrns3);
+    fineGrainedLineage3.setTransformOperation("CREATE");
+    fineGrainedLineage3.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
+    fineGrainedLineage3.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
+    fineGrainedLineage3.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery"));
+    // Splits into two for different types
+    assertEquals(result3.getFineGrainedLineages().get(2), fineGrainedLineage3);
+
+    // Check different transform types
+    JsonObjectBuilder finegrainedLineageNode5 = Json.createObjectBuilder();
+    finegrainedLineageNode5.add(
+        "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name()));
+    finegrainedLineageNode5.add("confidenceScore", upstreamConfidenceScore);
+    finegrainedLineageNode5.add(
+        "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name()));
+
+    JsonPatchBuilder patchOperations4 = Json.createPatchBuilder();
+    patchOperations4.add(
+        "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)",
+        finegrainedLineageNode5.build());
+    JsonPatch jsonPatch4 = patchOperations4.build();
+
+    DataJobInputOutput result4 = dataJobInputOutputTemplate.applyPatch(result3, jsonPatch4);
+    // Hack because Jackson parses values to doubles instead of floats
+    DataMap dataMap4 = new DataMap();
+    dataMap4.put("confidenceScore", 1.0);
+    FineGrainedLineage fineGrainedLineage4 = new FineGrainedLineage(dataMap4);
+    fineGrainedLineage4.setUpstreams(upstreamUrns3);
+    fineGrainedLineage4.setDownstreams(urns3);
+    fineGrainedLineage4.setTransformOperation("TRANSFORM");
+    fineGrainedLineage4.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
+    fineGrainedLineage4.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
+    fineGrainedLineage4.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery"));
+    // New entry in array because of new transformation type
+    assertEquals(result4.getFineGrainedLineages().get(3), fineGrainedLineage4);
+
+    // Remove
+    JsonPatchBuilder removeOperations = Json.createPatchBuilder();
+    removeOperations.remove(
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)");
+    removeOperations.remove(
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+    removeOperations.remove(
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+    removeOperations.remove(
+        "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+
+    JsonPatch removePatch = removeOperations.build();
+    DataJobInputOutput finalResult = dataJobInputOutputTemplate.applyPatch(result4, removePatch);
+    assertEquals(finalResult, dataJobInputOutputTemplate.getDefault());
+  }
+
+  @Test
+  public void testPatchWithFieldWithForwardSlash() throws JsonProcessingException {
+
+    String downstreamUrn =
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)";
+    String unescapedUpstreamUrn =
+        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),slash/column)";
+    String escapedUpstreamUrn =
+        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),slash~1column)";
+    String lineagePath = downstreamUrn + "//" + escapedUpstreamUrn;
+
+    DataJobInputOutputTemplate dataJobInputOutputTemplate = new DataJobInputOutputTemplate();
+    DataJobInputOutput dataJobInputOutput = dataJobInputOutputTemplate.getDefault();
+    JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder();
+
+    JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder();
+    JsonValue upstreamConfidenceScore = Json.createValue(1.0f);
+    fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore);
+
+    jsonPatchBuilder.add(lineagePath, fineGrainedLineageNode.build());
+
+    // Initial population test
+    DataJobInputOutput result =
+        dataJobInputOutputTemplate.applyPatch(dataJobInputOutput, jsonPatchBuilder.build());
+
+    assertEquals(
+        result.getFineGrainedLineages().get(0).getUpstreams().get(0).toString(),
+        unescapedUpstreamUrn);
+  }
+
+  @Test
+  public void testPatchWithFieldWithTilde() throws JsonProcessingException {
+
+    String downstreamUrn =
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)";
+    String unescapedUpstreamUrn =
+        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),tilde~column)";
+    String escapedUpstreamUrn =
+        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),tilde~0column)";
+    String lineagePath = downstreamUrn + "//" + escapedUpstreamUrn;
+
+    DataJobInputOutputTemplate dataJobInputOutputTemplate = new DataJobInputOutputTemplate();
+    DataJobInputOutput dataJobInputOutput = dataJobInputOutputTemplate.getDefault();
+    JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder();
+
+    JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder();
+    JsonValue upstreamConfidenceScore = Json.createValue(1.0f);
+    fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore);
+
+    jsonPatchBuilder.add(lineagePath, fineGrainedLineageNode.build());
+
+    // Initial population test
+    DataJobInputOutput result =
+        dataJobInputOutputTemplate.applyPatch(dataJobInputOutput, jsonPatchBuilder.build());
+    assertEquals(
+        result.getFineGrainedLineages().get(0).getUpstreams().get(0).toString(),
+        unescapedUpstreamUrn);
+  }
+}
diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java
index f934dd8961ca37..ab0e7f960251c4 100644
--- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java
+++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java
@@ -221,6 +221,7 @@ public void testPatchUpstream() throws Exception {
 
     JsonPatch removePatch = removeOperations.build();
     UpstreamLineage finalResult = upstreamLineageTemplate.applyPatch(result4, removePatch);
+
     assertEquals(finalResult, upstreamLineageTemplate.getDefault());
   }
 
@@ -337,4 +338,39 @@ public void testPatchWithFieldWithTilde() throws JsonProcessingException {
         result.getFineGrainedLineages().get(0).getUpstreams().get(0).toString(),
         unescapedUpstreamUrn);
   }
+
+  @Test
+  public void testPatchRemoveWithFields() throws JsonProcessingException {
+
+    String downstreamUrn =
+        "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,~1tmp~1test.parquet,PROD),c1)";
+    String upstreamUrn =
+        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)";
+    String upstreamUrn2 =
+        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)";
+
+    String lineagePath1 = downstreamUrn + "/NONE/" + upstreamUrn;
+    String lineagePath2 = downstreamUrn + "/NONE/" + upstreamUrn2;
+
+    UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate();
+    UpstreamLineage upstreamLineage = upstreamLineageTemplate.getDefault();
+    JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder();
+
+    JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder();
+    JsonValue upstreamConfidenceScore = Json.createValue(1.0f);
+    fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore);
+
+    jsonPatchBuilder.add(lineagePath1, fineGrainedLineageNode.build());
+    jsonPatchBuilder.add(lineagePath2, fineGrainedLineageNode.build());
+
+    // Initial population test
+    UpstreamLineage result =
+        upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatchBuilder.build());
+    assertEquals(
+        result.getFineGrainedLineages().get(0).getUpstreams().get(0).toString(), upstreamUrn);
+    assertEquals(
+        result.getFineGrainedLineages().get(0).getUpstreams().get(1).toString(), upstreamUrn2);
+
+    assertEquals(result.getFineGrainedLineages().get(0).getUpstreams().size(), 2);
+  }
 }
diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
index 65705f15022b6b..98a6d59004a92a 100644
--- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
+++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
@@ -5,7 +5,7 @@
 import com.linkedin.data.DataMap;
 import com.linkedin.data.template.RecordTemplate;
 import com.linkedin.entity.Aspect;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.SystemAspect;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.mxe.SystemMetadata;
@@ -22,7 +22,7 @@
 import javax.annotation.Nonnull;
 import org.mockito.Mockito;
 
-public class MockAspectRetriever implements AspectRetriever {
+public class MockAspectRetriever implements CachingAspectRetriever {
   private final Map<Urn, Map<String, Aspect>> data;
   private final Map<Urn, Map<String, SystemAspect>> systemData = new HashMap<>();
 
diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
index ff6a79108600a3..09f873ebf7bc96 100644
--- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java
+++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
@@ -409,6 +409,8 @@ public class Constants {
   /** User Status */
   public static final String CORP_USER_STATUS_ACTIVE = "ACTIVE";
 
+  public static final String CORP_USER_STATUS_SUSPENDED = "SUSPENDED";
+
   /** Task Runs */
   public static final String DATA_PROCESS_INSTANCE_ENTITY_NAME = "dataProcessInstance";
 
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
index aa7b3108f64f1e..640991a90a1d28 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
@@ -9,6 +9,7 @@
 
 import airflow
 import datahub.emitter.mce_builder as builder
+from airflow.models import Variable
 from airflow.models.serialized_dag import SerializedDagModel
 from datahub.api.entities.datajob import DataJob
 from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
@@ -78,6 +79,8 @@ def hookimpl(f: _F) -> _F:  # type: ignore[misc] # noqa: F811
 )
 _DATAHUB_CLEANUP_DAG = "Datahub_Cleanup"
 
+KILL_SWITCH_VARIABLE_NAME = "datahub_airflow_plugin_disable_listener"
+
 
 def get_airflow_plugin_listener() -> Optional["DataHubListener"]:
     # Using globals instead of functools.lru_cache to make testing easier.
@@ -364,6 +367,12 @@ def _extract_lineage(
                     redact_with_exclusions(v)
                 )
 
+    def check_kill_switch(self):
+        if Variable.get(KILL_SWITCH_VARIABLE_NAME, "false").lower() == "true":
+            logger.debug("DataHub listener disabled by kill switch")
+            return True
+        return False
+
     @hookimpl
     @run_in_thread
     def on_task_instance_running(
@@ -372,6 +381,8 @@ def on_task_instance_running(
         task_instance: "TaskInstance",
         session: "Session",  # This will always be QUEUED
     ) -> None:
+        if self.check_kill_switch():
+            return
         self._set_log_level()
 
         # This if statement mirrors the logic in https://github.com/OpenLineage/OpenLineage/pull/508.
@@ -454,6 +465,9 @@ def on_task_instance_running(
             f"DataHub listener finished processing notification about task instance start for {task_instance.task_id}"
         )
 
+        self.materialize_iolets(datajob)
+
+    def materialize_iolets(self, datajob: DataJob) -> None:
         if self.config.materialize_iolets:
             for outlet in datajob.outlets:
                 reported_time: int = int(time.time() * 1000)
@@ -541,6 +555,9 @@ def on_task_instance_finish(
     def on_task_instance_success(
         self, previous_state: None, task_instance: "TaskInstance", session: "Session"
     ) -> None:
+        if self.check_kill_switch():
+            return
+
         self._set_log_level()
 
         logger.debug(
@@ -556,6 +573,9 @@ def on_task_instance_success(
     def on_task_instance_failed(
         self, previous_state: None, task_instance: "TaskInstance", session: "Session"
     ) -> None:
+        if self.check_kill_switch():
+            return
+
         self._set_log_level()
 
         logger.debug(
@@ -696,6 +716,9 @@ def on_dag_start(self, dag_run: "DagRun") -> None:
         @hookimpl
         @run_in_thread
         def on_dag_run_running(self, dag_run: "DagRun", msg: str) -> None:
+            if self.check_kill_switch():
+                return
+
             self._set_log_level()
 
             logger.debug(
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 415871d30175f8..c6994dd6d5aa65 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -76,7 +76,7 @@
     # now provide prebuilt wheels for most platforms, including M1 Macs and
     # Linux aarch64 (e.g. Docker's linux/arm64). Installing confluent_kafka
     # from source remains a pain.
-    "confluent_kafka>=1.9.0",
+    "confluent_kafka[schemaregistry]>=1.9.0",
     # We currently require both Avro libraries. The codegen uses avro-python3 (above)
     # schema parsers at runtime for generating and reading JSON into Python objects.
     # At the same time, we use Kafka's AvroSerializer, which internally relies on
@@ -101,7 +101,7 @@
     # We heavily monkeypatch sqlglot.
     # Prior to the patching, we originally maintained an acryl-sqlglot fork:
     # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1
-    "sqlglot[rs]==25.26.0",
+    "sqlglot[rs]==25.32.1",
     "patchy==2.8.0",
 }
 
@@ -741,7 +741,7 @@
         "hive-metastore = datahub.ingestion.source.sql.hive_metastore:HiveMetastoreSource",
         "json-schema = datahub.ingestion.source.schema.json_schema:JsonSchemaSource",
         "kafka = datahub.ingestion.source.kafka.kafka:KafkaSource",
-        "kafka-connect = datahub.ingestion.source.kafka.kafka_connect:KafkaConnectSource",
+        "kafka-connect = datahub.ingestion.source.kafka_connect.kafka_connect:KafkaConnectSource",
         "ldap = datahub.ingestion.source.ldap:LDAPSource",
         "looker = datahub.ingestion.source.looker.looker_source:LookerDashboardSource",
         "lookml = datahub.ingestion.source.looker.lookml_source:LookMLSource",
diff --git a/metadata-ingestion/sink_docs/metadata-file.md b/metadata-ingestion/sink_docs/metadata-file.md
index 49ca3c75397af4..36c868828070ed 100644
--- a/metadata-ingestion/sink_docs/metadata-file.md
+++ b/metadata-ingestion/sink_docs/metadata-file.md
@@ -25,7 +25,7 @@ source:
 sink:
   type: file
   config:
-    path: ./path/to/mce/file.json
+    filename: ./path/to/mce/file.json
 ```
 
 ## Config details
diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
index fd3fe7ca098ecb..619f69b016262d 100644
--- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
+++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
@@ -9,27 +9,18 @@
 
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.api.global_context import get_graph_context, set_graph_context
-from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
+from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     PropertyValueClass,
     StructuredPropertyDefinitionClass,
 )
-from datahub.utilities.urns.urn import Urn
+from datahub.metadata.urns import DataTypeUrn, StructuredPropertyUrn, Urn
+from datahub.utilities.urns._urn_base import URN_TYPES
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 
-class StructuredPropertiesConfig:
-    """Configuration class to hold the graph client"""
-
-    @classmethod
-    def get_graph_required(cls) -> DataHubGraph:
-        """Get the current graph, falling back to default if none set"""
-        return get_graph_context() or get_default_graph()
-
-
 class AllowedTypes(Enum):
     STRING = "string"
     RICH_TEXT = "rich_text"
@@ -51,29 +42,28 @@ class AllowedValue(ConfigModel):
     description: Optional[str] = None
 
 
-VALID_ENTITY_TYPES_PREFIX_STRING = ", ".join(
-    [
-        f"urn:li:entityType:datahub.{x}"
-        for x in ["dataset", "dashboard", "dataFlow", "schemaField"]
-    ]
-)
-VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {VALID_ENTITY_TYPES_PREFIX_STRING}, etc... Ensure that the entity type is valid."
+VALID_ENTITY_TYPE_URNS = [
+    Urn.make_entity_type_urn(entity_type) for entity_type in URN_TYPES.keys()
+]
+_VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {', '.join(VALID_ENTITY_TYPE_URNS)}, etc... Ensure that the entity type is valid."
+
+
+def _validate_entity_type_urn(v: str) -> str:
+    urn = Urn.make_entity_type_urn(v)
+    if urn not in VALID_ENTITY_TYPE_URNS:
+        raise ValueError(
+            f"Input {v} is not a valid entity type urn. {_VALID_ENTITY_TYPES_STRING}"
+        )
+    v = str(urn)
+    return v
 
 
 class TypeQualifierAllowedTypes(ConfigModel):
     allowed_types: List[str]
 
-    @validator("allowed_types", each_item=True)
-    def validate_allowed_types(cls, v):
-        if v:
-            graph = StructuredPropertiesConfig.get_graph_required()
-            validated_urn = Urn.make_entity_type_urn(v)
-            if not graph.exists(validated_urn):
-                raise ValueError(
-                    f"Input {v} is not a valid entity type urn. {VALID_ENTITY_TYPES_STRING}"
-                )
-            v = str(validated_urn)
-        return v
+    _check_allowed_types = validator("allowed_types", each_item=True, allow_reuse=True)(
+        _validate_entity_type_urn
+    )
 
 
 class StructuredProperties(ConfigModel):
@@ -90,22 +80,42 @@ class StructuredProperties(ConfigModel):
     type_qualifier: Optional[TypeQualifierAllowedTypes] = None
     immutable: Optional[bool] = False
 
-    @validator("entity_types", each_item=True)
-    def validate_entity_types(cls, v):
-        if v:
-            graph = StructuredPropertiesConfig.get_graph_required()
-            validated_urn = Urn.make_entity_type_urn(v)
-            if not graph.exists(validated_urn):
-                raise ValueError(
-                    f"Input {v} is not a valid entity type urn. {VALID_ENTITY_TYPES_STRING}"
-                )
-            v = str(validated_urn)
-        return v
+    _check_entity_types = validator("entity_types", each_item=True, allow_reuse=True)(
+        _validate_entity_type_urn
+    )
+
+    @validator("type")
+    def validate_type(cls, v: str) -> str:
+        # This logic is somewhat hacky, since we need to deal with
+        # 1. fully qualified urns
+        # 2. raw data types, that need to get the datahub namespace prefix
+        # While keeping the user-facing interface and error messages clean.
+
+        if not v.startswith("urn:li:") and not v.islower():
+            # Convert to lowercase if needed
+            v = v.lower()
+            logger.warning(
+                f"Structured property type should be lowercase. Updated to {v}"
+            )
+
+        urn = Urn.make_data_type_urn(v)
+
+        # Check if type is allowed
+        data_type_urn = DataTypeUrn.from_string(urn)
+        unqualified_data_type = data_type_urn.id
+        if unqualified_data_type.startswith("datahub."):
+            unqualified_data_type = unqualified_data_type[len("datahub.") :]
+        if not AllowedTypes.check_allowed_type(unqualified_data_type):
+            raise ValueError(
+                f"Type {unqualified_data_type} is not allowed. Allowed types are {AllowedTypes.values()}"
+            )
+
+        return urn
 
     @property
     def fqn(self) -> str:
         assert self.urn is not None
-        id = Urn.create_from_string(self.urn).get_entity_id()[0]
+        id = StructuredPropertyUrn.from_string(self.urn).id
         if self.qualified_name is not None:
             # ensure that qualified name and ID match
             assert (
@@ -122,101 +132,90 @@ def urn_must_be_present(cls, v, values):
         return v
 
     @staticmethod
-    def create(file: str, graph: Optional[DataHubGraph] = None) -> None:
-        with set_graph_context(graph):
-            graph = StructuredPropertiesConfig.get_graph_required()
-
-            with open(file) as fp:
-                structuredproperties: List[dict] = yaml.safe_load(fp)
-            for structuredproperty_raw in structuredproperties:
-                structuredproperty = StructuredProperties.parse_obj(
-                    structuredproperty_raw
-                )
-
-                if not structuredproperty.type.islower():
-                    structuredproperty.type = structuredproperty.type.lower()
-                    logger.warning(
-                        f"Structured property type should be lowercase. Updated to {structuredproperty.type}"
-                    )
-                if not AllowedTypes.check_allowed_type(structuredproperty.type):
-                    raise ValueError(
-                        f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}"
-                    )
-                mcp = MetadataChangeProposalWrapper(
-                    entityUrn=structuredproperty.urn,
-                    aspect=StructuredPropertyDefinitionClass(
-                        qualifiedName=structuredproperty.fqn,
-                        valueType=Urn.make_data_type_urn(structuredproperty.type),
-                        displayName=structuredproperty.display_name,
-                        description=structuredproperty.description,
-                        entityTypes=[
-                            Urn.make_entity_type_urn(entity_type)
-                            for entity_type in structuredproperty.entity_types or []
-                        ],
-                        cardinality=structuredproperty.cardinality,
-                        immutable=structuredproperty.immutable,
-                        allowedValues=(
-                            [
-                                PropertyValueClass(
-                                    value=v.value, description=v.description
-                                )
-                                for v in structuredproperty.allowed_values
-                            ]
-                            if structuredproperty.allowed_values
-                            else None
-                        ),
-                        typeQualifier=(
-                            {
-                                "allowedTypes": structuredproperty.type_qualifier.allowed_types
-                            }
-                            if structuredproperty.type_qualifier
-                            else None
-                        ),
-                    ),
-                )
-                graph.emit_mcp(mcp)
-
-                logger.info(f"Created structured property {structuredproperty.urn}")
-
-    @classmethod
-    def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
-        with set_graph_context(graph):
-            structured_property: Optional[
-                StructuredPropertyDefinitionClass
-            ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
-            if structured_property is None:
-                raise Exception(
-                    "StructuredPropertyDefinition aspect is None. Unable to create structured property."
-                )
-            return StructuredProperties(
-                urn=urn,
-                qualified_name=structured_property.qualifiedName,
-                display_name=structured_property.displayName,
-                type=structured_property.valueType,
-                description=structured_property.description,
-                entity_types=structured_property.entityTypes,
-                cardinality=structured_property.cardinality,
-                allowed_values=(
+    def from_yaml(file: str) -> List["StructuredProperties"]:
+        with open(file) as fp:
+            structuredproperties: List[dict] = yaml.safe_load(fp)
+
+        result: List[StructuredProperties] = []
+        for structuredproperty_raw in structuredproperties:
+            result.append(StructuredProperties.parse_obj(structuredproperty_raw))
+        return result
+
+    def generate_mcps(self) -> List[MetadataChangeProposalWrapper]:
+        mcp = MetadataChangeProposalWrapper(
+            entityUrn=self.urn,
+            aspect=StructuredPropertyDefinitionClass(
+                qualifiedName=self.fqn,
+                valueType=Urn.make_data_type_urn(self.type),
+                displayName=self.display_name,
+                description=self.description,
+                entityTypes=[
+                    Urn.make_entity_type_urn(entity_type)
+                    for entity_type in self.entity_types or []
+                ],
+                cardinality=self.cardinality,
+                immutable=self.immutable,
+                allowedValues=(
                     [
-                        AllowedValue(
-                            value=av.value,
-                            description=av.description,
-                        )
-                        for av in structured_property.allowedValues or []
+                        PropertyValueClass(value=v.value, description=v.description)
+                        for v in self.allowed_values
                     ]
-                    if structured_property.allowedValues is not None
+                    if self.allowed_values
                     else None
                 ),
-                type_qualifier=(
-                    {
-                        "allowed_types": structured_property.typeQualifier.get(
-                            "allowedTypes"
-                        )
-                    }
-                    if structured_property.typeQualifier
+                typeQualifier=(
+                    {"allowedTypes": self.type_qualifier.allowed_types}
+                    if self.type_qualifier
                     else None
                 ),
+            ),
+        )
+        return [mcp]
+
+    @staticmethod
+    def create(file: str, graph: DataHubGraph) -> None:
+        # TODO: Deprecate this method.
+        structuredproperties = StructuredProperties.from_yaml(file)
+        for structuredproperty in structuredproperties:
+            for mcp in structuredproperty.generate_mcps():
+                graph.emit_mcp(mcp)
+
+            logger.info(f"Created structured property {structuredproperty.urn}")
+
+    @classmethod
+    def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
+        structured_property: Optional[
+            StructuredPropertyDefinitionClass
+        ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
+        if structured_property is None:
+            raise Exception(
+                "StructuredPropertyDefinition aspect is None. Unable to create structured property."
             )
+        return StructuredProperties(
+            urn=urn,
+            qualified_name=structured_property.qualifiedName,
+            display_name=structured_property.displayName,
+            type=structured_property.valueType,
+            description=structured_property.description,
+            entity_types=structured_property.entityTypes,
+            cardinality=structured_property.cardinality,
+            allowed_values=(
+                [
+                    AllowedValue(
+                        value=av.value,
+                        description=av.description,
+                    )
+                    for av in structured_property.allowedValues or []
+                ]
+                if structured_property.allowedValues is not None
+                else None
+            ),
+            type_qualifier=(
+                {"allowed_types": structured_property.typeQualifier.get("allowedTypes")}
+                if structured_property.typeQualifier
+                else None
+            ),
+        )
 
     def to_yaml(
         self,
diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py
index 51f095751f7dd9..fcab07a1c2aaf6 100644
--- a/metadata-ingestion/src/datahub/cli/ingest_cli.py
+++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py
@@ -27,6 +27,7 @@
 
 logger = logging.getLogger(__name__)
 
+INGEST_SRC_TABLE_COLUMNS = ["runId", "source", "startTime", "status", "URN"]
 RUNS_TABLE_COLUMNS = ["runId", "rows", "created at"]
 RUN_TABLE_COLUMNS = ["urn", "aspect name", "created at"]
 
@@ -437,6 +438,115 @@ def mcps(path: str) -> None:
     sys.exit(ret)
 
 
+@ingest.command()
+@click.argument("page_offset", type=int, default=0)
+@click.argument("page_size", type=int, default=100)
+@click.option("--urn", type=str, default=None, help="Filter by ingestion source URN.")
+@click.option(
+    "--source", type=str, default=None, help="Filter by ingestion source name."
+)
+@upgrade.check_upgrade
+@telemetry.with_telemetry()
+def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) -> None:
+    """List ingestion source runs with their details, optionally filtered by URN or source."""
+
+    query = """
+    query listIngestionRuns($input: ListIngestionSourcesInput!) {
+      listIngestionSources(input: $input) {
+        ingestionSources {
+          urn
+          name
+          executions {
+            executionRequests {
+              id
+              result {
+                startTimeMs
+                status
+              }
+            }
+          }
+        }
+      }
+    }
+    """
+
+    # filter by urn and/or source using CONTAINS
+    filters = []
+    if urn:
+        filters.append({"field": "urn", "values": [urn], "condition": "CONTAIN"})
+    if source:
+        filters.append({"field": "name", "values": [source], "condition": "CONTAIN"})
+
+    variables = {
+        "input": {
+            "start": page_offset,
+            "count": page_size,
+            "filters": filters,
+        }
+    }
+
+    client = get_default_graph()
+    session = client._session
+    gms_host = client.config.server
+
+    url = f"{gms_host}/api/graphql"
+    try:
+        response = session.post(url, json={"query": query, "variables": variables})
+        response.raise_for_status()
+    except Exception as e:
+        click.echo(f"Error fetching data: {str(e)}")
+        return
+
+    try:
+        data = response.json()
+    except ValueError:
+        click.echo("Failed to parse JSON response from server.")
+        return
+
+    if not data:
+        click.echo("No response received from the server.")
+        return
+
+    # when urn or source filter does not match, exit gracefully
+    if (
+        not isinstance(data.get("data"), dict)
+        or "listIngestionSources" not in data["data"]
+    ):
+        click.echo("No matching ingestion sources found. Please check your filters.")
+        return
+
+    ingestion_sources = data["data"]["listIngestionSources"]["ingestionSources"]
+    if not ingestion_sources:
+        click.echo("No ingestion sources or executions found.")
+        return
+
+    rows = []
+    for ingestion_source in ingestion_sources:
+        urn = ingestion_source.get("urn", "N/A")
+        name = ingestion_source.get("name", "N/A")
+
+        executions = ingestion_source.get("executions", {}).get("executionRequests", [])
+        for execution in executions:
+            execution_id = execution.get("id", "N/A")
+            start_time = execution.get("result", {}).get("startTimeMs", "N/A")
+            start_time = (
+                datetime.fromtimestamp(start_time / 1000).strftime("%Y-%m-%d %H:%M:%S")
+                if start_time != "N/A"
+                else "N/A"
+            )
+            status = execution.get("result", {}).get("status", "N/A")
+
+            rows.append([execution_id, name, start_time, status, urn])
+
+    click.echo(
+        tabulate(
+            rows,
+            headers=INGEST_SRC_TABLE_COLUMNS,
+            tablefmt="grid",
+        )
+    )
+
+
 @ingest.command()
 @click.argument("page_offset", type=int, default=0)
 @click.argument("page_size", type=int, default=100)
diff --git a/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py b/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py
index 4162d44b9b0ea8..42285cf13a5ddc 100644
--- a/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py
+++ b/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py
@@ -31,7 +31,8 @@ def properties() -> None:
 def upsert(file: Path) -> None:
     """Upsert structured properties in DataHub."""
 
-    StructuredProperties.create(str(file))
+    with get_default_graph() as graph:
+        StructuredProperties.create(str(file), graph)
 
 
 @properties.command(
diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py
index d237cd9ddd306c..e7e9bfd43adca5 100644
--- a/metadata-ingestion/src/datahub/configuration/git.py
+++ b/metadata-ingestion/src/datahub/configuration/git.py
@@ -24,7 +24,11 @@ class GitReference(ConfigModel):
         "main",
         description="Branch on which your files live by default. Typically main or master. This can also be a commit hash.",
     )
-
+    url_subdir: Optional[str] = Field(
+        default=None,
+        description="Prefix to prepend when generating URLs for files - useful when files are in a subdirectory. "
+        "Only affects URL generation, not git operations.",
+    )
     url_template: Optional[str] = Field(
         None,
         description=f"Template for generating a URL to a file in the repo e.g. '{_GITHUB_URL_TEMPLATE}'. We can infer this for GitHub and GitLab repos, and it is otherwise required."
@@ -68,6 +72,8 @@ def infer_url_template(cls, url_template: Optional[str], values: dict) -> str:
 
     def get_url_for_file_path(self, file_path: str) -> str:
         assert self.url_template
+        if self.url_subdir:
+            file_path = f"{self.url_subdir}/{file_path}"
         return self.url_template.format(
             repo_url=self.repo, branch=self.branch, file_path=file_path
         )
diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py
index 44c737f1bd13d4..8e41e9fb917878 100644
--- a/metadata-ingestion/src/datahub/configuration/source_common.py
+++ b/metadata-ingestion/src/datahub/configuration/source_common.py
@@ -63,3 +63,16 @@ class DatasetLineageProviderConfigBase(EnvConfigMixin):
         default=None,
         description="A holder for platform -> platform_instance mappings to generate correct dataset urns",
     )
+
+
+class PlatformDetail(ConfigModel):
+    platform_instance: Optional[str] = Field(
+        default=None,
+        description="DataHub platform instance name. To generate correct urn for upstream dataset, this should match "
+        "with platform instance name used in ingestion "
+        "recipe of other datahub sources.",
+    )
+    env: str = Field(
+        default=DEFAULT_ENV,
+        description="The environment that all assets produced by DataHub platform ingestion source belong to",
+    )
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index c80da04e481a9f..c3638635b19aac 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -184,6 +184,7 @@ def infos(self) -> LossyList[StructuredLogEntry]:
 
 @dataclass
 class SourceReport(Report):
+    event_not_produced_warn: bool = True
     events_produced: int = 0
     events_produced_per_sec: int = 0
 
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 0c86e1cf47203f..7791ea2797be34 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -150,7 +150,7 @@ def auto_workunit_reporter(report: "SourceReport", stream: Iterable[T]) -> Itera
         report.report_workunit(wu)
         yield wu
 
-    if report.events_produced == 0:
+    if report.event_not_produced_warn and report.events_produced == 0:
         report.warning(
             title="No metadata was produced by the source",
             message="Please check the source configuration, filters, and permissions.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
index faa281097de4cd..80906ca63115f5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
@@ -147,6 +147,47 @@ def query(self) -> str:
             version
         """
 
+    def execute_server_cursor(
+        self, query: str, params: Dict[str, Any]
+    ) -> Iterable[Dict[str, Any]]:
+        with self.engine.connect() as conn:
+            if self.engine.dialect.name == "postgresql":
+                with conn.begin():  # Transaction required for PostgreSQL server-side cursor
+                    conn = conn.execution_options(
+                        stream_results=True,
+                        yield_per=self.config.database_query_batch_size,
+                    )
+                    result = conn.execute(query, params)
+                    for row in result:
+                        yield dict(row)
+            elif self.engine.dialect.name == "mysql":  # MySQL
+                import MySQLdb
+
+                with contextlib.closing(
+                    conn.connection.cursor(MySQLdb.cursors.SSCursor)
+                ) as cursor:
+                    logger.debug(f"Using Cursor type: {cursor.__class__.__name__}")
+                    cursor.execute(query, params)
+
+                    columns = [desc[0] for desc in cursor.description]
+                    while True:
+                        rows = cursor.fetchmany(self.config.database_query_batch_size)
+                        if not rows:
+                            break  # Use break instead of return in generator
+                        for row in rows:
+                            yield dict(zip(columns, row))
+            else:
+                raise ValueError(f"Unsupported dialect: {self.engine.dialect.name}")
+
+    def _get_rows(
+        self, from_createdon: datetime, stop_time: datetime
+    ) -> Iterable[Dict[str, Any]]:
+        params = {
+            "exclude_aspects": list(self.config.exclude_aspects),
+            "since_createdon": from_createdon.strftime(DATETIME_FORMAT),
+        }
+        yield from self.execute_server_cursor(self.query, params)
+
     def get_aspects(
         self, from_createdon: datetime, stop_time: datetime
     ) -> Iterable[Tuple[MetadataChangeProposalWrapper, datetime]]:
@@ -159,27 +200,6 @@ def get_aspects(
             if mcp:
                 yield mcp, row["createdon"]
 
-    def _get_rows(
-        self, from_createdon: datetime, stop_time: datetime
-    ) -> Iterable[Dict[str, Any]]:
-        with self.engine.connect() as conn:
-            with contextlib.closing(conn.connection.cursor()) as cursor:
-                cursor.execute(
-                    self.query,
-                    {
-                        "exclude_aspects": list(self.config.exclude_aspects),
-                        "since_createdon": from_createdon.strftime(DATETIME_FORMAT),
-                    },
-                )
-
-                columns = [desc[0] for desc in cursor.description]
-                while True:
-                    rows = cursor.fetchmany(self.config.database_query_batch_size)
-                    if not rows:
-                        return
-                    for row in rows:
-                        yield dict(zip(columns, row))
-
     def get_soft_deleted_rows(self) -> Iterable[Dict[str, Any]]:
         """
         Fetches all soft-deleted entities from the database.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
index 63cea45f75864b..cb72441344088c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
@@ -1,5 +1,5 @@
 import logging
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
 from functools import partial
 from typing import Dict, Iterable, List, Optional
 
@@ -26,6 +26,7 @@
     StatefulIngestionSourceBase,
 )
 from datahub.metadata.schema_classes import ChangeTypeClass
+from datahub.utilities.progress_timer import ProgressTimer
 
 logger = logging.getLogger(__name__)
 
@@ -105,11 +106,17 @@ def _get_database_workunits(
         self, from_createdon: datetime, reader: DataHubDatabaseReader
     ) -> Iterable[MetadataWorkUnit]:
         logger.info(f"Fetching database aspects starting from {from_createdon}")
+        progress = ProgressTimer(report_every=timedelta(seconds=60))
         mcps = reader.get_aspects(from_createdon, self.report.stop_time)
         for i, (mcp, createdon) in enumerate(mcps):
             if not self.urn_pattern.allowed(str(mcp.entityUrn)):
                 continue
 
+            if progress.should_report():
+                logger.info(
+                    f"Ingested {i} database aspects so far, currently at {createdon}"
+                )
+
             yield mcp.as_workunit()
             self.report.num_database_aspects_ingested += 1
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py
index 814f65ecb45cf0..4eecbb4d9d7177 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py
@@ -65,18 +65,18 @@ class DataHubGcSourceConfig(ConfigModel):
         description="Sleep between truncation monitoring.",
     )
 
-    dataprocess_cleanup: Optional[DataProcessCleanupConfig] = Field(
-        default=None,
+    dataprocess_cleanup: DataProcessCleanupConfig = Field(
+        default_factory=DataProcessCleanupConfig,
         description="Configuration for data process cleanup",
     )
 
-    soft_deleted_entities_cleanup: Optional[SoftDeletedEntitiesCleanupConfig] = Field(
-        default=None,
+    soft_deleted_entities_cleanup: SoftDeletedEntitiesCleanupConfig = Field(
+        default_factory=SoftDeletedEntitiesCleanupConfig,
         description="Configuration for soft deleted entities cleanup",
     )
 
-    execution_request_cleanup: Optional[DatahubExecutionRequestCleanupConfig] = Field(
-        default=None,
+    execution_request_cleanup: DatahubExecutionRequestCleanupConfig = Field(
+        default_factory=DatahubExecutionRequestCleanupConfig,
         description="Configuration for execution request cleanup",
     )
 
@@ -108,28 +108,22 @@ def __init__(self, ctx: PipelineContext, config: DataHubGcSourceConfig):
         self.ctx = ctx
         self.config = config
         self.report = DataHubGcSourceReport()
+        self.report.event_not_produced_warn = False
         self.graph = ctx.require_graph("The DataHubGc source")
-        self.dataprocess_cleanup: Optional[DataProcessCleanup] = None
-        self.soft_deleted_entities_cleanup: Optional[SoftDeletedEntitiesCleanup] = None
-        self.execution_request_cleanup: Optional[DatahubExecutionRequestCleanup] = None
-
-        if self.config.dataprocess_cleanup:
-            self.dataprocess_cleanup = DataProcessCleanup(
-                ctx, self.config.dataprocess_cleanup, self.report, self.config.dry_run
-            )
-        if self.config.soft_deleted_entities_cleanup:
-            self.soft_deleted_entities_cleanup = SoftDeletedEntitiesCleanup(
-                ctx,
-                self.config.soft_deleted_entities_cleanup,
-                self.report,
-                self.config.dry_run,
-            )
-        if self.config.execution_request_cleanup:
-            self.execution_request_cleanup = DatahubExecutionRequestCleanup(
-                config=self.config.execution_request_cleanup,
-                graph=self.graph,
-                report=self.report,
-            )
+        self.dataprocess_cleanup = DataProcessCleanup(
+            ctx, self.config.dataprocess_cleanup, self.report, self.config.dry_run
+        )
+        self.soft_deleted_entities_cleanup = SoftDeletedEntitiesCleanup(
+            ctx,
+            self.config.soft_deleted_entities_cleanup,
+            self.report,
+            self.config.dry_run,
+        )
+        self.execution_request_cleanup = DatahubExecutionRequestCleanup(
+            config=self.config.execution_request_cleanup,
+            graph=self.graph,
+            report=self.report,
+        )
 
     @classmethod
     def create(cls, config_dict, ctx):
@@ -153,19 +147,19 @@ def get_workunits_internal(
                 self.truncate_indices()
             except Exception as e:
                 self.report.failure("While trying to truncate indices ", exc=e)
-        if self.soft_deleted_entities_cleanup:
+        if self.config.soft_deleted_entities_cleanup.enabled:
             try:
                 self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
             except Exception as e:
                 self.report.failure(
                     "While trying to cleanup soft deleted entities ", exc=e
                 )
-        if self.execution_request_cleanup:
+        if self.config.execution_request_cleanup.enabled:
             try:
                 self.execution_request_cleanup.run()
             except Exception as e:
                 self.report.failure("While trying to cleanup execution request ", exc=e)
-        if self.dataprocess_cleanup:
+        if self.config.dataprocess_cleanup.enabled:
             try:
                 yield from self.dataprocess_cleanup.get_workunits_internal()
             except Exception as e:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py
index 8aacf13cdb00fb..6d16aaab2d7980 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py
@@ -98,6 +98,9 @@
 
 
 class DataProcessCleanupConfig(ConfigModel):
+    enabled: bool = Field(
+        default=True, description="Whether to do data process cleanup."
+    )
     retention_days: Optional[int] = Field(
         10,
         description="Number of days to retain metadata in DataHub",
@@ -371,17 +374,26 @@ def get_data_flows(self) -> Iterable[DataFlowEntity]:
         previous_scroll_id: Optional[str] = None
 
         while True:
-            result = self.ctx.graph.execute_graphql(
-                DATAFLOW_QUERY,
-                {
-                    "query": "*",
-                    "scrollId": scroll_id if scroll_id else None,
-                    "batchSize": self.config.batch_size,
-                },
-            )
+            result = None
+            try:
+                result = self.ctx.graph.execute_graphql(
+                    DATAFLOW_QUERY,
+                    {
+                        "query": "*",
+                        "scrollId": scroll_id if scroll_id else None,
+                        "batchSize": self.config.batch_size,
+                    },
+                )
+            except Exception as e:
+                self.report.failure(
+                    f"While trying to get dataflows with {scroll_id}", exc=e
+                )
+                break
+
             scrollAcrossEntities = result.get("scrollAcrossEntities")
             if not scrollAcrossEntities:
                 raise ValueError("Missing scrollAcrossEntities in response")
+            logger.info(f"Got {scrollAcrossEntities.get('count')} DataFlow entities")
 
             scroll_id = scrollAcrossEntities.get("nextScrollId")
             for flow in scrollAcrossEntities.get("searchResults"):
@@ -398,6 +410,8 @@ def get_data_flows(self) -> Iterable[DataFlowEntity]:
             previous_scroll_id = scroll_id
 
     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        if not self.config.enabled:
+            return []
         assert self.ctx.graph
 
         dataFlows: Dict[str, DataFlowEntity] = {}
@@ -411,14 +425,20 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         deleted_jobs: int = 0
 
         while True:
-            result = self.ctx.graph.execute_graphql(
-                DATAJOB_QUERY,
-                {
-                    "query": "*",
-                    "scrollId": scroll_id if scroll_id else None,
-                    "batchSize": self.config.batch_size,
-                },
-            )
+            try:
+                result = self.ctx.graph.execute_graphql(
+                    DATAJOB_QUERY,
+                    {
+                        "query": "*",
+                        "scrollId": scroll_id if scroll_id else None,
+                        "batchSize": self.config.batch_size,
+                    },
+                )
+            except Exception as e:
+                self.report.failure(
+                    f"While trying to get data jobs with {scroll_id}", exc=e
+                )
+                break
             scrollAcrossEntities = result.get("scrollAcrossEntities")
             if not scrollAcrossEntities:
                 raise ValueError("Missing scrollAcrossEntities in response")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
index bb4ab753543b7b..93f004ab675edc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
@@ -20,6 +20,9 @@
 
 
 class SoftDeletedEntitiesCleanupConfig(ConfigModel):
+    enabled: bool = Field(
+        default=True, description="Whether to do soft deletion cleanup."
+    )
     retention_days: Optional[int] = Field(
         10,
         description="Number of days to retain metadata in DataHub",
@@ -156,6 +159,8 @@ def delete_soft_deleted_entity(self, urn: str) -> None:
                 self.delete_entity(urn)
 
     def cleanup_soft_deleted_entities(self) -> None:
+        if not self.config.enabled:
+            return
         assert self.ctx.graph
         start_time = time.time()
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka_connect.py
deleted file mode 100644
index 23a99ccb310e13..00000000000000
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka_connect.py
+++ /dev/null
@@ -1,1468 +0,0 @@
-import logging
-import re
-from dataclasses import dataclass, field
-from typing import Dict, Iterable, List, Optional, Tuple
-
-import jpype
-import jpype.imports
-import requests
-from pydantic.fields import Field
-from sqlalchemy.engine.url import make_url
-
-import datahub.emitter.mce_builder as builder
-import datahub.metadata.schema_classes as models
-from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.source_common import (
-    DatasetLineageProviderConfigBase,
-    PlatformInstanceConfigMixin,
-)
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.api.common import PipelineContext
-from datahub.ingestion.api.decorators import (
-    SourceCapability,
-    SupportStatus,
-    capability,
-    config_class,
-    platform_name,
-    support_status,
-)
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
-from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
-    get_platform_from_sqlalchemy_uri,
-)
-from datahub.ingestion.source.state.stale_entity_removal_handler import (
-    StaleEntityRemovalHandler,
-    StaleEntityRemovalSourceReport,
-    StatefulStaleMetadataRemovalConfig,
-)
-from datahub.ingestion.source.state.stateful_ingestion_base import (
-    StatefulIngestionConfigBase,
-    StatefulIngestionSourceBase,
-)
-
-logger = logging.getLogger(__name__)
-
-KAFKA = "kafka"
-SOURCE = "source"
-SINK = "sink"
-CONNECTOR_CLASS = "connector.class"
-
-
-class ProvidedConfig(ConfigModel):
-    provider: str
-    path_key: str
-    value: str
-
-
-class GenericConnectorConfig(ConfigModel):
-    connector_name: str
-    source_dataset: str
-    source_platform: str
-
-
-class KafkaConnectSourceConfig(
-    PlatformInstanceConfigMixin,
-    DatasetLineageProviderConfigBase,
-    StatefulIngestionConfigBase,
-):
-    # See the Connect REST Interface for details
-    # https://docs.confluent.io/platform/current/connect/references/restapi.html#
-    connect_uri: str = Field(
-        default="http://localhost:8083/", description="URI to connect to."
-    )
-    username: Optional[str] = Field(default=None, description="Kafka Connect username.")
-    password: Optional[str] = Field(default=None, description="Kafka Connect password.")
-    cluster_name: Optional[str] = Field(
-        default="connect-cluster", description="Cluster to ingest from."
-    )
-    # convert lineage dataset's urns to lowercase
-    convert_lineage_urns_to_lowercase: bool = Field(
-        default=False,
-        description="Whether to convert the urns of ingested lineage dataset to lowercase",
-    )
-    connector_patterns: AllowDenyPattern = Field(
-        default=AllowDenyPattern.allow_all(),
-        description="regex patterns for connectors to filter for ingestion.",
-    )
-    provided_configs: Optional[List[ProvidedConfig]] = Field(
-        default=None, description="Provided Configurations"
-    )
-    connect_to_platform_map: Optional[Dict[str, Dict[str, str]]] = Field(
-        default=None,
-        description='Platform instance mapping when multiple instances for a platform is available. Entry for a platform should be in either `platform_instance_map` or `connect_to_platform_map`. e.g.`connect_to_platform_map: { "postgres-connector-finance-db": "postgres": "core_finance_instance" }`',
-    )
-    platform_instance_map: Optional[Dict[str, str]] = Field(
-        default=None,
-        description='Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { "hive": "warehouse" }`',
-    )
-    generic_connectors: List[GenericConnectorConfig] = Field(
-        default=[],
-        description="Provide lineage graph for sources connectors other than Confluent JDBC Source Connector, Debezium Source Connector, and Mongo Source Connector",
-    )
-
-    stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
-
-
-@dataclass
-class KafkaConnectSourceReport(StaleEntityRemovalSourceReport):
-    connectors_scanned: int = 0
-    filtered: List[str] = field(default_factory=list)
-
-    def report_connector_scanned(self, connector: str) -> None:
-        self.connectors_scanned += 1
-
-    def report_dropped(self, connector: str) -> None:
-        self.filtered.append(connector)
-
-
-@dataclass
-class KafkaConnectLineage:
-    """Class to store Kafka Connect lineage mapping, Each instance is potential DataJob"""
-
-    source_platform: str
-    target_dataset: str
-    target_platform: str
-    job_property_bag: Optional[Dict[str, str]] = None
-    source_dataset: Optional[str] = None
-
-
-@dataclass
-class ConnectorManifest:
-    """Each instance is potential DataFlow"""
-
-    name: str
-    type: str
-    config: Dict
-    tasks: Dict
-    url: Optional[str] = None
-    flow_property_bag: Optional[Dict[str, str]] = None
-    lineages: List[KafkaConnectLineage] = field(default_factory=list)
-    topic_names: Iterable[str] = field(default_factory=list)
-
-
-def remove_prefix(text: str, prefix: str) -> str:
-    if text.startswith(prefix):
-        index = len(prefix)
-        return text[index:]
-    return text
-
-
-def unquote(
-    string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
-) -> str:
-    """
-    If string starts and ends with a quote, unquote it
-    """
-    trailing_quote = trailing_quote if trailing_quote else leading_quote
-    if string.startswith(leading_quote) and string.endswith(trailing_quote):
-        string = string[1:-1]
-    return string
-
-
-def get_dataset_name(
-    database_name: Optional[str],
-    source_table: str,
-) -> str:
-    if database_name:
-        dataset_name = database_name + "." + source_table
-    else:
-        dataset_name = source_table
-
-    return dataset_name
-
-
-def get_platform_instance(
-    config: KafkaConnectSourceConfig, connector_name: str, platform: str
-) -> Optional[str]:
-    instance_name = None
-    if (
-        config.connect_to_platform_map
-        and config.connect_to_platform_map.get(connector_name)
-        and config.connect_to_platform_map[connector_name].get(platform)
-    ):
-        instance_name = config.connect_to_platform_map[connector_name][platform]
-        if config.platform_instance_map and config.platform_instance_map.get(platform):
-            logger.warning(
-                f"Same source platform {platform} configured in both platform_instance_map and connect_to_platform_map."
-                "Will prefer connector specific platform instance from connect_to_platform_map."
-            )
-    elif config.platform_instance_map and config.platform_instance_map.get(platform):
-        instance_name = config.platform_instance_map[platform]
-    logger.info(
-        f"Instance name assigned is: {instance_name} for Connector Name {connector_name} and platform {platform}"
-    )
-    return instance_name
-
-
-@dataclass
-class ConfluentJDBCSourceConnector:
-    connector_manifest: ConnectorManifest
-    report: KafkaConnectSourceReport
-
-    def __init__(
-        self,
-        connector_manifest: ConnectorManifest,
-        config: KafkaConnectSourceConfig,
-        report: KafkaConnectSourceReport,
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.config = config
-        self.report = report
-        self._extract_lineages()
-
-    REGEXROUTER = "org.apache.kafka.connect.transforms.RegexRouter"
-    KNOWN_TOPICROUTING_TRANSFORMS = [REGEXROUTER]
-    # https://kafka.apache.org/documentation/#connect_included_transformation
-    KAFKA_NONTOPICROUTING_TRANSFORMS = [
-        "InsertField",
-        "InsertField$Key",
-        "InsertField$Value",
-        "ReplaceField",
-        "ReplaceField$Key",
-        "ReplaceField$Value",
-        "MaskField",
-        "MaskField$Key",
-        "MaskField$Value",
-        "ValueToKey",
-        "ValueToKey$Key",
-        "ValueToKey$Value",
-        "HoistField",
-        "HoistField$Key",
-        "HoistField$Value",
-        "ExtractField",
-        "ExtractField$Key",
-        "ExtractField$Value",
-        "SetSchemaMetadata",
-        "SetSchemaMetadata$Key",
-        "SetSchemaMetadata$Value",
-        "Flatten",
-        "Flatten$Key",
-        "Flatten$Value",
-        "Cast",
-        "Cast$Key",
-        "Cast$Value",
-        "HeadersFrom",
-        "HeadersFrom$Key",
-        "HeadersFrom$Value",
-        "TimestampConverter",
-        "Filter",
-        "InsertHeader",
-        "DropHeaders",
-    ]
-    # https://docs.confluent.io/platform/current/connect/transforms/overview.html
-    CONFLUENT_NONTOPICROUTING_TRANSFORMS = [
-        "Drop",
-        "Drop$Key",
-        "Drop$Value",
-        "Filter",
-        "Filter$Key",
-        "Filter$Value",
-        "TombstoneHandler",
-    ]
-    KNOWN_NONTOPICROUTING_TRANSFORMS = (
-        KAFKA_NONTOPICROUTING_TRANSFORMS
-        + [
-            f"org.apache.kafka.connect.transforms.{t}"
-            for t in KAFKA_NONTOPICROUTING_TRANSFORMS
-        ]
-        + CONFLUENT_NONTOPICROUTING_TRANSFORMS
-        + [
-            f"io.confluent.connect.transforms.{t}"
-            for t in CONFLUENT_NONTOPICROUTING_TRANSFORMS
-        ]
-    )
-
-    @dataclass
-    class JdbcParser:
-        db_connection_url: str
-        source_platform: str
-        database_name: str
-        topic_prefix: str
-        query: str
-        transforms: list
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> JdbcParser:
-        url = remove_prefix(
-            str(connector_manifest.config.get("connection.url")), "jdbc:"
-        )
-        url_instance = make_url(url)
-        source_platform = get_platform_from_sqlalchemy_uri(str(url_instance))
-        database_name = url_instance.database
-        assert database_name
-        db_connection_url = f"{url_instance.drivername}://{url_instance.host}:{url_instance.port}/{database_name}"
-
-        topic_prefix = self.connector_manifest.config.get("topic.prefix", None)
-
-        query = self.connector_manifest.config.get("query", None)
-
-        transform_names = (
-            self.connector_manifest.config.get("transforms", "").split(",")
-            if self.connector_manifest.config.get("transforms")
-            else []
-        )
-
-        transforms = []
-        for name in transform_names:
-            transform = {"name": name}
-            transforms.append(transform)
-            for key in self.connector_manifest.config.keys():
-                if key.startswith(f"transforms.{name}."):
-                    transform[
-                        key.replace(f"transforms.{name}.", "")
-                    ] = self.connector_manifest.config[key]
-
-        return self.JdbcParser(
-            db_connection_url,
-            source_platform,
-            database_name,
-            topic_prefix,
-            query,
-            transforms,
-        )
-
-    def default_get_lineages(
-        self,
-        topic_prefix: str,
-        database_name: str,
-        source_platform: str,
-        topic_names: Optional[Iterable[str]] = None,
-        include_source_dataset: bool = True,
-    ) -> List[KafkaConnectLineage]:
-        lineages: List[KafkaConnectLineage] = []
-        if not topic_names:
-            topic_names = self.connector_manifest.topic_names
-        table_name_tuples: List[Tuple] = self.get_table_names()
-        for topic in topic_names:
-            # All good for NO_TRANSFORM or (SINGLE_TRANSFORM and KNOWN_NONTOPICROUTING_TRANSFORM) or (not SINGLE_TRANSFORM and all(KNOWN_NONTOPICROUTING_TRANSFORM))
-            source_table: str = (
-                remove_prefix(topic, topic_prefix) if topic_prefix else topic
-            )
-            # include schema name for three-level hierarchies
-            if has_three_level_hierarchy(source_platform):
-                table_name_tuple: Tuple = next(
-                    iter([t for t in table_name_tuples if t and t[-1] == source_table]),
-                    (),
-                )
-                if len(table_name_tuple) > 1:
-                    source_table = f"{table_name_tuple[-2]}.{source_table}"
-                else:
-                    include_source_dataset = False
-                    self.report.warning(
-                        "Could not find schema for table"
-                        f"{self.connector_manifest.name} : {source_table}",
-                    )
-            dataset_name: str = get_dataset_name(database_name, source_table)
-            lineage = KafkaConnectLineage(
-                source_dataset=dataset_name if include_source_dataset else None,
-                source_platform=source_platform,
-                target_dataset=topic,
-                target_platform=KAFKA,
-            )
-            lineages.append(lineage)
-        return lineages
-
-    def get_table_names(self) -> List[Tuple]:
-        sep: str = "."
-        leading_quote_char: str = '"'
-        trailing_quote_char: str = leading_quote_char
-
-        table_ids: List[str] = []
-        if self.connector_manifest.tasks:
-            table_ids = (
-                ",".join(
-                    [
-                        task["config"].get("tables")
-                        for task in self.connector_manifest.tasks
-                    ]
-                )
-            ).split(",")
-            quote_method = self.connector_manifest.config.get(
-                "quote.sql.identifiers", "always"
-            )
-            if (
-                quote_method == "always"
-                and table_ids
-                and table_ids[0]
-                and table_ids[-1]
-            ):
-                leading_quote_char = table_ids[0][0]
-                trailing_quote_char = table_ids[-1][-1]
-                # This will only work for single character quotes
-        elif self.connector_manifest.config.get("table.whitelist"):
-            table_ids = self.connector_manifest.config.get("table.whitelist").split(",")  # type: ignore
-
-        # List of Tuple containing (schema, table)
-        tables: List[Tuple] = [
-            (
-                (
-                    unquote(
-                        table_id.split(sep)[-2], leading_quote_char, trailing_quote_char
-                    )
-                    if len(table_id.split(sep)) > 1
-                    else ""
-                ),
-                unquote(
-                    table_id.split(sep)[-1], leading_quote_char, trailing_quote_char
-                ),
-            )
-            for table_id in table_ids
-        ]
-        return tables
-
-    def _extract_lineages(self):
-        lineages: List[KafkaConnectLineage] = list()
-        parser = self.get_parser(self.connector_manifest)
-        source_platform = parser.source_platform
-        database_name = parser.database_name
-        query = parser.query
-        topic_prefix = parser.topic_prefix
-        transforms = parser.transforms
-        self.connector_manifest.flow_property_bag = self.connector_manifest.config
-
-        # Mask/Remove properties that may reveal credentials
-        self.connector_manifest.flow_property_bag[
-            "connection.url"
-        ] = parser.db_connection_url
-        if "connection.password" in self.connector_manifest.flow_property_bag:
-            del self.connector_manifest.flow_property_bag["connection.password"]
-        if "connection.user" in self.connector_manifest.flow_property_bag:
-            del self.connector_manifest.flow_property_bag["connection.user"]
-
-        logging.debug(
-            f"Extracting source platform: {source_platform} and database name: {database_name} from connection url "
-        )
-
-        if not self.connector_manifest.topic_names:
-            self.connector_manifest.lineages = lineages
-            return
-
-        if query:
-            # Lineage source_table can be extracted by parsing query
-            for topic in self.connector_manifest.topic_names:
-                # default method - as per earlier implementation
-                dataset_name: str = get_dataset_name(database_name, topic)
-
-                lineage = KafkaConnectLineage(
-                    source_dataset=None,
-                    source_platform=source_platform,
-                    target_dataset=topic,
-                    target_platform=KAFKA,
-                )
-                lineages.append(lineage)
-                self.report.warning(
-                    "Could not find input dataset, the connector has query configuration set",
-                    self.connector_manifest.name,
-                )
-                self.connector_manifest.lineages = lineages
-                return
-
-        SINGLE_TRANSFORM = len(transforms) == 1
-        NO_TRANSFORM = len(transforms) == 0
-        UNKNOWN_TRANSFORM = any(
-            [
-                transform["type"]
-                not in self.KNOWN_TOPICROUTING_TRANSFORMS
-                + self.KNOWN_NONTOPICROUTING_TRANSFORMS
-                for transform in transforms
-            ]
-        )
-        ALL_TRANSFORMS_NON_TOPICROUTING = all(
-            [
-                transform["type"] in self.KNOWN_NONTOPICROUTING_TRANSFORMS
-                for transform in transforms
-            ]
-        )
-
-        if NO_TRANSFORM or ALL_TRANSFORMS_NON_TOPICROUTING:
-            self.connector_manifest.lineages = self.default_get_lineages(
-                database_name=database_name,
-                source_platform=source_platform,
-                topic_prefix=topic_prefix,
-            )
-            return
-
-        if SINGLE_TRANSFORM and transforms[0]["type"] == self.REGEXROUTER:
-            tables = self.get_table_names()
-            topic_names = list(self.connector_manifest.topic_names)
-
-            from java.util.regex import Pattern
-
-            for table in tables:
-                source_table: str = table[-1]
-                topic = topic_prefix + source_table if topic_prefix else source_table
-
-                transform_regex = Pattern.compile(transforms[0]["regex"])
-                transform_replacement = transforms[0]["replacement"]
-
-                matcher = transform_regex.matcher(topic)
-                if matcher.matches():
-                    topic = str(matcher.replaceFirst(transform_replacement))
-
-                # Additional check to confirm that the topic present
-                # in connector topics
-
-                if topic in self.connector_manifest.topic_names:
-                    # include schema name for three-level hierarchies
-                    if has_three_level_hierarchy(source_platform) and len(table) > 1:
-                        source_table = f"{table[-2]}.{table[-1]}"
-
-                    dataset_name = get_dataset_name(database_name, source_table)
-
-                    lineage = KafkaConnectLineage(
-                        source_dataset=dataset_name,
-                        source_platform=source_platform,
-                        target_dataset=topic,
-                        target_platform=KAFKA,
-                    )
-                    topic_names.remove(topic)
-                    lineages.append(lineage)
-
-            if topic_names:
-                lineages.extend(
-                    self.default_get_lineages(
-                        database_name=database_name,
-                        source_platform=source_platform,
-                        topic_prefix=topic_prefix,
-                        topic_names=topic_names,
-                        include_source_dataset=False,
-                    )
-                )
-                self.report.warning(
-                    "Could not find input dataset for connector topics",
-                    f"{self.connector_manifest.name} : {topic_names}",
-                )
-            self.connector_manifest.lineages = lineages
-            return
-        else:
-            include_source_dataset = True
-            if SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
-                self.report.warning(
-                    "Could not find input dataset, connector has unknown transform",
-                    f"{self.connector_manifest.name} : {transforms[0]['type']}",
-                )
-                include_source_dataset = False
-            if not SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
-                self.report.warning(
-                    "Could not find input dataset, connector has one or more unknown transforms",
-                    self.connector_manifest.name,
-                )
-                include_source_dataset = False
-            lineages = self.default_get_lineages(
-                database_name=database_name,
-                source_platform=source_platform,
-                topic_prefix=topic_prefix,
-                include_source_dataset=include_source_dataset,
-            )
-            self.connector_manifest.lineages = lineages
-            return
-
-
-@dataclass
-class MongoSourceConnector:
-    # https://www.mongodb.com/docs/kafka-connector/current/source-connector/
-
-    connector_manifest: ConnectorManifest
-
-    def __init__(
-        self, connector_manifest: ConnectorManifest, config: KafkaConnectSourceConfig
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.config = config
-        self._extract_lineages()
-
-    @dataclass
-    class MongoSourceParser:
-        db_connection_url: Optional[str]
-        source_platform: str
-        database_name: Optional[str]
-        topic_prefix: Optional[str]
-        transforms: List[str]
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> MongoSourceParser:
-        parser = self.MongoSourceParser(
-            db_connection_url=connector_manifest.config.get("connection.uri"),
-            source_platform="mongodb",
-            database_name=connector_manifest.config.get("database"),
-            topic_prefix=connector_manifest.config.get("topic_prefix"),
-            transforms=(
-                connector_manifest.config["transforms"].split(",")
-                if "transforms" in connector_manifest.config
-                else []
-            ),
-        )
-
-        return parser
-
-    def _extract_lineages(self):
-        lineages: List[KafkaConnectLineage] = list()
-        parser = self.get_parser(self.connector_manifest)
-        source_platform = parser.source_platform
-        topic_naming_pattern = r"mongodb\.(\w+)\.(\w+)"
-
-        if not self.connector_manifest.topic_names:
-            return lineages
-
-        for topic in self.connector_manifest.topic_names:
-            found = re.search(re.compile(topic_naming_pattern), topic)
-
-            if found:
-                table_name = get_dataset_name(found.group(1), found.group(2))
-
-                lineage = KafkaConnectLineage(
-                    source_dataset=table_name,
-                    source_platform=source_platform,
-                    target_dataset=topic,
-                    target_platform=KAFKA,
-                )
-                lineages.append(lineage)
-        self.connector_manifest.lineages = lineages
-
-
-@dataclass
-class DebeziumSourceConnector:
-    connector_manifest: ConnectorManifest
-    report: KafkaConnectSourceReport
-
-    def __init__(
-        self,
-        connector_manifest: ConnectorManifest,
-        config: KafkaConnectSourceConfig,
-        report: KafkaConnectSourceReport,
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.config = config
-        self.report = report
-        self._extract_lineages()
-
-    @dataclass
-    class DebeziumParser:
-        source_platform: str
-        server_name: Optional[str]
-        database_name: Optional[str]
-
-    def get_server_name(self, connector_manifest: ConnectorManifest) -> str:
-        if "topic.prefix" in connector_manifest.config:
-            return connector_manifest.config["topic.prefix"]
-        else:
-            return connector_manifest.config.get("database.server.name", "")
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> DebeziumParser:
-        connector_class = connector_manifest.config.get(CONNECTOR_CLASS, "")
-
-        if connector_class == "io.debezium.connector.mysql.MySqlConnector":
-            parser = self.DebeziumParser(
-                source_platform="mysql",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=None,
-            )
-        elif connector_class == "MySqlConnector":
-            parser = self.DebeziumParser(
-                source_platform="mysql",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=None,
-            )
-        elif connector_class == "io.debezium.connector.mongodb.MongoDbConnector":
-            parser = self.DebeziumParser(
-                source_platform="mongodb",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=None,
-            )
-        elif connector_class == "io.debezium.connector.postgresql.PostgresConnector":
-            parser = self.DebeziumParser(
-                source_platform="postgres",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=connector_manifest.config.get("database.dbname"),
-            )
-        elif connector_class == "io.debezium.connector.oracle.OracleConnector":
-            parser = self.DebeziumParser(
-                source_platform="oracle",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=connector_manifest.config.get("database.dbname"),
-            )
-        elif connector_class == "io.debezium.connector.sqlserver.SqlServerConnector":
-            database_name = connector_manifest.config.get(
-                "database.names"
-            ) or connector_manifest.config.get("database.dbname")
-
-            if "," in str(database_name):
-                raise Exception(
-                    f"Only one database is supported for Debezium's SQL Server connector. Found: {database_name}"
-                )
-
-            parser = self.DebeziumParser(
-                source_platform="mssql",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=database_name,
-            )
-        elif connector_class == "io.debezium.connector.db2.Db2Connector":
-            parser = self.DebeziumParser(
-                source_platform="db2",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=connector_manifest.config.get("database.dbname"),
-            )
-        elif connector_class == "io.debezium.connector.vitess.VitessConnector":
-            parser = self.DebeziumParser(
-                source_platform="vitess",
-                server_name=self.get_server_name(connector_manifest),
-                database_name=connector_manifest.config.get("vitess.keyspace"),
-            )
-        else:
-            raise ValueError(f"Connector class '{connector_class}' is unknown.")
-
-        return parser
-
-    def _extract_lineages(self):
-        lineages: List[KafkaConnectLineage] = list()
-
-        try:
-            parser = self.get_parser(self.connector_manifest)
-            source_platform = parser.source_platform
-            server_name = parser.server_name
-            database_name = parser.database_name
-            topic_naming_pattern = rf"({server_name})\.(\w+\.\w+)"
-
-            if not self.connector_manifest.topic_names:
-                return lineages
-
-            for topic in self.connector_manifest.topic_names:
-                found = re.search(re.compile(topic_naming_pattern), topic)
-
-                if found:
-                    table_name = get_dataset_name(database_name, found.group(2))
-
-                    lineage = KafkaConnectLineage(
-                        source_dataset=table_name,
-                        source_platform=source_platform,
-                        target_dataset=topic,
-                        target_platform=KAFKA,
-                    )
-                    lineages.append(lineage)
-            self.connector_manifest.lineages = lineages
-        except Exception as e:
-            self.report.warning(
-                "Error resolving lineage for connector",
-                self.connector_manifest.name,
-                exc=e,
-            )
-
-        return
-
-
-@dataclass
-class BigQuerySinkConnector:
-    connector_manifest: ConnectorManifest
-    report: KafkaConnectSourceReport
-
-    def __init__(
-        self, connector_manifest: ConnectorManifest, report: KafkaConnectSourceReport
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.report = report
-        self._extract_lineages()
-
-    @dataclass
-    class BQParser:
-        project: str
-        target_platform: str
-        sanitizeTopics: str
-        transforms: list
-        topicsToTables: Optional[str] = None
-        datasets: Optional[str] = None
-        defaultDataset: Optional[str] = None
-        version: str = "v1"
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> BQParser:
-        project = connector_manifest.config["project"]
-        sanitizeTopics = connector_manifest.config.get("sanitizeTopics", "false")
-        transform_names = (
-            self.connector_manifest.config.get("transforms", "").split(",")
-            if self.connector_manifest.config.get("transforms")
-            else []
-        )
-        transforms = []
-        for name in transform_names:
-            transform = {"name": name}
-            transforms.append(transform)
-            for key in self.connector_manifest.config.keys():
-                if key.startswith(f"transforms.{name}."):
-                    transform[
-                        key.replace(f"transforms.{name}.", "")
-                    ] = self.connector_manifest.config[key]
-
-        if "defaultDataset" in connector_manifest.config:
-            defaultDataset = connector_manifest.config["defaultDataset"]
-            return self.BQParser(
-                project=project,
-                defaultDataset=defaultDataset,
-                target_platform="bigquery",
-                sanitizeTopics=sanitizeTopics.lower() == "true",
-                version="v2",
-                transforms=transforms,
-            )
-        else:
-            # version 1.6.x and similar configs supported
-            datasets = connector_manifest.config["datasets"]
-            topicsToTables = connector_manifest.config.get("topicsToTables")
-
-            return self.BQParser(
-                project=project,
-                topicsToTables=topicsToTables,
-                datasets=datasets,
-                target_platform="bigquery",
-                sanitizeTopics=sanitizeTopics.lower() == "true",
-                transforms=transforms,
-            )
-
-    def get_list(self, property: str) -> Iterable[Tuple[str, str]]:
-        entries = property.split(",")
-        for entry in entries:
-            key, val = entry.rsplit("=")
-            yield (key.strip(), val.strip())
-
-    def get_dataset_for_topic_v1(self, topic: str, parser: BQParser) -> Optional[str]:
-        topicregex_dataset_map: Dict[str, str] = dict(self.get_list(parser.datasets))  # type: ignore
-        from java.util.regex import Pattern
-
-        for pattern, dataset in topicregex_dataset_map.items():
-            patternMatcher = Pattern.compile(pattern).matcher(topic)
-            if patternMatcher.matches():
-                return dataset
-        return None
-
-    def sanitize_table_name(self, table_name):
-        table_name = re.sub("[^a-zA-Z0-9_]", "_", table_name)
-        if re.match("^[^a-zA-Z_].*", table_name):
-            table_name = "_" + table_name
-
-        return table_name
-
-    def get_dataset_table_for_topic(
-        self, topic: str, parser: BQParser
-    ) -> Optional[str]:
-        if parser.version == "v2":
-            dataset = parser.defaultDataset
-            parts = topic.split(":")
-            if len(parts) == 2:
-                dataset = parts[0]
-                table = parts[1]
-            else:
-                table = parts[0]
-        else:
-            dataset = self.get_dataset_for_topic_v1(topic, parser)
-            if dataset is None:
-                return None
-
-            table = topic
-            if parser.topicsToTables:
-                topicregex_table_map: Dict[str, str] = dict(
-                    self.get_list(parser.topicsToTables)  # type: ignore
-                )
-                from java.util.regex import Pattern
-
-                for pattern, tbl in topicregex_table_map.items():
-                    patternMatcher = Pattern.compile(pattern).matcher(topic)
-                    if patternMatcher.matches():
-                        table = tbl
-                        break
-
-        if parser.sanitizeTopics:
-            table = self.sanitize_table_name(table)
-        return f"{dataset}.{table}"
-
-    def apply_transformations(
-        self, topic: str, transforms: List[Dict[str, str]]
-    ) -> str:
-        for transform in transforms:
-            if transform["type"] == "org.apache.kafka.connect.transforms.RegexRouter":
-                regex = transform["regex"]
-                replacement = transform["replacement"]
-                pattern = re.compile(regex)
-                if pattern.match(topic):
-                    topic = pattern.sub(replacement, topic, count=1)
-        return topic
-
-    def _extract_lineages(self):
-        lineages: List[KafkaConnectLineage] = list()
-        parser = self.get_parser(self.connector_manifest)
-        if not parser:
-            return lineages
-        target_platform = parser.target_platform
-        project = parser.project
-        transforms = parser.transforms
-        self.connector_manifest.flow_property_bag = self.connector_manifest.config
-        # Mask/Remove properties that may reveal credentials
-        if "keyfile" in self.connector_manifest.flow_property_bag:
-            del self.connector_manifest.flow_property_bag["keyfile"]
-
-        for topic in self.connector_manifest.topic_names:
-            transformed_topic = self.apply_transformations(topic, transforms)
-            dataset_table = self.get_dataset_table_for_topic(transformed_topic, parser)
-            if dataset_table is None:
-                self.report.warning(
-                    "Could not find target dataset for topic, please check your connector configuration"
-                    f"{self.connector_manifest.name} : {transformed_topic} ",
-                )
-                continue
-            target_dataset = f"{project}.{dataset_table}"
-
-            lineages.append(
-                KafkaConnectLineage(
-                    source_dataset=transformed_topic,
-                    source_platform=KAFKA,
-                    target_dataset=target_dataset,
-                    target_platform=target_platform,
-                )
-            )
-        self.connector_manifest.lineages = lineages
-        return
-
-
-@dataclass
-class SnowflakeSinkConnector:
-    connector_manifest: ConnectorManifest
-    report: KafkaConnectSourceReport
-
-    def __init__(
-        self, connector_manifest: ConnectorManifest, report: KafkaConnectSourceReport
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.report = report
-        self._extract_lineages()
-
-    @dataclass
-    class SnowflakeParser:
-        database_name: str
-        schema_name: str
-        topics_to_tables: Dict[str, str]
-
-    def get_table_name_from_topic_name(self, topic_name: str) -> str:
-        """
-        This function converts the topic name to a valid Snowflake table name using some rules.
-        Refer below link for more info
-        https://docs.snowflake.com/en/user-guide/kafka-connector-overview#target-tables-for-kafka-topics
-        """
-        table_name = re.sub("[^a-zA-Z0-9_]", "_", topic_name)
-        if re.match("^[^a-zA-Z_].*", table_name):
-            table_name = "_" + table_name
-        # Connector  may append original topic's hash code as suffix for conflict resolution
-        # if generated table names for 2 topics are similar. This corner case is not handled here.
-        # Note that Snowflake recommends to choose topic names that follow the rules for
-        # Snowflake identifier names so this case is not recommended by snowflake.
-        return table_name
-
-    def get_parser(
-        self,
-        connector_manifest: ConnectorManifest,
-    ) -> SnowflakeParser:
-        database_name = connector_manifest.config["snowflake.database.name"]
-        schema_name = connector_manifest.config["snowflake.schema.name"]
-
-        # Fetch user provided topic to table map
-        provided_topics_to_tables: Dict[str, str] = {}
-        if connector_manifest.config.get("snowflake.topic2table.map"):
-            for each in connector_manifest.config["snowflake.topic2table.map"].split(
-                ","
-            ):
-                topic, table = each.split(":")
-                provided_topics_to_tables[topic.strip()] = table.strip()
-
-        topics_to_tables: Dict[str, str] = {}
-        # Extract lineage for only those topics whose data ingestion started
-        for topic in connector_manifest.topic_names:
-            if topic in provided_topics_to_tables:
-                # If user provided which table to get mapped with this topic
-                topics_to_tables[topic] = provided_topics_to_tables[topic]
-            else:
-                # Else connector converts topic name to a valid Snowflake table name.
-                topics_to_tables[topic] = self.get_table_name_from_topic_name(topic)
-
-        return self.SnowflakeParser(
-            database_name=database_name,
-            schema_name=schema_name,
-            topics_to_tables=topics_to_tables,
-        )
-
-    def _extract_lineages(self):
-        self.connector_manifest.flow_property_bag = self.connector_manifest.config
-
-        # For all snowflake sink connector properties, refer below link
-        # https://docs.snowflake.com/en/user-guide/kafka-connector-install#configuring-the-kafka-connector
-        # remove private keys, secrets from properties
-        secret_properties = [
-            "snowflake.private.key",
-            "snowflake.private.key.passphrase",
-            "value.converter.basic.auth.user.info",
-        ]
-        for k in secret_properties:
-            if k in self.connector_manifest.flow_property_bag:
-                del self.connector_manifest.flow_property_bag[k]
-
-        lineages: List[KafkaConnectLineage] = list()
-        parser = self.get_parser(self.connector_manifest)
-
-        for topic, table in parser.topics_to_tables.items():
-            target_dataset = f"{parser.database_name}.{parser.schema_name}.{table}"
-            lineages.append(
-                KafkaConnectLineage(
-                    source_dataset=topic,
-                    source_platform=KAFKA,
-                    target_dataset=target_dataset,
-                    target_platform="snowflake",
-                )
-            )
-
-        self.connector_manifest.lineages = lineages
-        return
-
-
-@dataclass
-class ConfluentS3SinkConnector:
-    connector_manifest: ConnectorManifest
-
-    def __init__(
-        self, connector_manifest: ConnectorManifest, report: KafkaConnectSourceReport
-    ) -> None:
-        self.connector_manifest = connector_manifest
-        self.report = report
-        self._extract_lineages()
-
-    @dataclass
-    class S3SinkParser:
-        target_platform: str
-        bucket: str
-        topics_dir: str
-        topics: Iterable[str]
-
-    def _get_parser(self, connector_manifest: ConnectorManifest) -> S3SinkParser:
-        # https://docs.confluent.io/kafka-connectors/s3-sink/current/configuration_options.html#s3
-        bucket = connector_manifest.config.get("s3.bucket.name")
-        if not bucket:
-            raise ValueError(
-                "Could not find 's3.bucket.name' in connector configuration"
-            )
-
-        # https://docs.confluent.io/kafka-connectors/s3-sink/current/configuration_options.html#storage
-        topics_dir = connector_manifest.config.get("topics.dir", "topics")
-
-        return self.S3SinkParser(
-            target_platform="s3",
-            bucket=bucket,
-            topics_dir=topics_dir,
-            topics=connector_manifest.topic_names,
-        )
-
-    def _extract_lineages(self):
-        self.connector_manifest.flow_property_bag = self.connector_manifest.config
-
-        # remove keys, secrets from properties
-        secret_properties = [
-            "aws.access.key.id",
-            "aws.secret.access.key",
-            "s3.sse.customer.key",
-            "s3.proxy.password",
-        ]
-        for k in secret_properties:
-            if k in self.connector_manifest.flow_property_bag:
-                del self.connector_manifest.flow_property_bag[k]
-
-        try:
-            parser = self._get_parser(self.connector_manifest)
-
-            lineages: List[KafkaConnectLineage] = list()
-            for topic in parser.topics:
-                target_dataset = f"{parser.bucket}/{parser.topics_dir}/{topic}"
-
-                lineages.append(
-                    KafkaConnectLineage(
-                        source_dataset=topic,
-                        source_platform="kafka",
-                        target_dataset=target_dataset,
-                        target_platform=parser.target_platform,
-                    )
-                )
-            self.connector_manifest.lineages = lineages
-        except Exception as e:
-            self.report.warning(
-                "Error resolving lineage for connector",
-                self.connector_manifest.name,
-                exc=e,
-            )
-
-        return
-
-
-def transform_connector_config(
-    connector_config: Dict, provided_configs: List[ProvidedConfig]
-) -> None:
-    """This method will update provided configs in connector config values, if any"""
-    lookupsByProvider = {}
-    for pconfig in provided_configs:
-        lookupsByProvider[f"${{{pconfig.provider}:{pconfig.path_key}}}"] = pconfig.value
-    for k, v in connector_config.items():
-        for key, value in lookupsByProvider.items():
-            if key in v:
-                connector_config[k] = connector_config[k].replace(key, value)
-
-
-@platform_name("Kafka Connect")
-@config_class(KafkaConnectSourceConfig)
-@support_status(SupportStatus.CERTIFIED)
-@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
-@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
-@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
-class KafkaConnectSource(StatefulIngestionSourceBase):
-    config: KafkaConnectSourceConfig
-    report: KafkaConnectSourceReport
-    platform: str = "kafka-connect"
-
-    def __init__(self, config: KafkaConnectSourceConfig, ctx: PipelineContext):
-        super().__init__(config, ctx)
-        self.config = config
-        self.report = KafkaConnectSourceReport()
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "Accept": "application/json",
-                "Content-Type": "application/json",
-            }
-        )
-
-        # Test the connection
-        if self.config.username is not None and self.config.password is not None:
-            logger.info(
-                f"Connecting to {self.config.connect_uri} with Authentication..."
-            )
-            self.session.auth = (self.config.username, self.config.password)
-
-        test_response = self.session.get(f"{self.config.connect_uri}/connectors")
-        test_response.raise_for_status()
-        logger.info(f"Connection to {self.config.connect_uri} is ok")
-        if not jpype.isJVMStarted():
-            jpype.startJVM()
-
-    @classmethod
-    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
-        config = KafkaConnectSourceConfig.parse_obj(config_dict)
-        return cls(config, ctx)
-
-    def get_connectors_manifest(self) -> List[ConnectorManifest]:
-        """Get Kafka Connect connectors manifest using REST API.
-        Enrich with lineages metadata.
-        """
-        connectors_manifest = list()
-
-        connector_response = self.session.get(
-            f"{self.config.connect_uri}/connectors",
-        )
-
-        payload = connector_response.json()
-
-        for connector_name in payload:
-            connector_url = f"{self.config.connect_uri}/connectors/{connector_name}"
-            connector_manifest = self._get_connector_manifest(
-                connector_name, connector_url
-            )
-            if (
-                connector_manifest is None
-                or not self.config.connector_patterns.allowed(connector_manifest.name)
-            ):
-                self.report.report_dropped(connector_name)
-                continue
-
-            if self.config.provided_configs:
-                transform_connector_config(
-                    connector_manifest.config, self.config.provided_configs
-                )
-            # Initialize connector lineages
-            connector_manifest.lineages = list()
-            connector_manifest.url = connector_url
-
-            connector_manifest.topic_names = self._get_connector_topics(connector_name)
-
-            # Populate Source Connector metadata
-            if connector_manifest.type == SOURCE:
-                connector_manifest.tasks = self._get_connector_tasks(connector_name)
-
-                # JDBC source connector lineages
-                if connector_manifest.config.get(CONNECTOR_CLASS).__eq__(
-                    "io.confluent.connect.jdbc.JdbcSourceConnector"
-                ):
-                    connector_manifest = ConfluentJDBCSourceConnector(
-                        connector_manifest=connector_manifest,
-                        config=self.config,
-                        report=self.report,
-                    ).connector_manifest
-                elif connector_manifest.config.get(CONNECTOR_CLASS, "").startswith(
-                    "io.debezium.connector"
-                ):
-                    connector_manifest = DebeziumSourceConnector(
-                        connector_manifest=connector_manifest,
-                        config=self.config,
-                        report=self.report,
-                    ).connector_manifest
-                elif (
-                    connector_manifest.config.get(CONNECTOR_CLASS, "")
-                    == "com.mongodb.kafka.connect.MongoSourceConnector"
-                ):
-                    connector_manifest = MongoSourceConnector(
-                        connector_manifest=connector_manifest, config=self.config
-                    ).connector_manifest
-                else:
-                    # Find the target connector object in the list, or log an error if unknown.
-                    target_connector = None
-                    for connector in self.config.generic_connectors:
-                        if connector.connector_name == connector_manifest.name:
-                            target_connector = connector
-                            break
-                    if not target_connector:
-                        logger.warning(
-                            f"Detected undefined connector {connector_manifest.name}, which is not in the customized connector list. Please refer to Kafka Connect ingestion recipe to define this customized connector."
-                        )
-                        continue
-
-                    for topic in connector_manifest.topic_names:
-                        lineage = KafkaConnectLineage(
-                            source_dataset=target_connector.source_dataset,
-                            source_platform=target_connector.source_platform,
-                            target_dataset=topic,
-                            target_platform=KAFKA,
-                        )
-
-                    connector_manifest.lineages.append(lineage)
-
-            if connector_manifest.type == SINK:
-                if connector_manifest.config.get(CONNECTOR_CLASS).__eq__(
-                    "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector"
-                ):
-                    connector_manifest = BigQuerySinkConnector(
-                        connector_manifest=connector_manifest, report=self.report
-                    ).connector_manifest
-                elif connector_manifest.config.get("connector.class").__eq__(
-                    "io.confluent.connect.s3.S3SinkConnector"
-                ):
-                    connector_manifest = ConfluentS3SinkConnector(
-                        connector_manifest=connector_manifest, report=self.report
-                    ).connector_manifest
-                elif connector_manifest.config.get("connector.class").__eq__(
-                    "com.snowflake.kafka.connector.SnowflakeSinkConnector"
-                ):
-                    connector_manifest = SnowflakeSinkConnector(
-                        connector_manifest=connector_manifest, report=self.report
-                    ).connector_manifest
-                else:
-                    self.report.report_dropped(connector_manifest.name)
-                    logger.warning(
-                        f"Skipping connector {connector_manifest.name}. Lineage for  Connector not yet implemented"
-                    )
-                pass
-
-            connectors_manifest.append(connector_manifest)
-
-        return connectors_manifest
-
-    def _get_connector_manifest(
-        self, connector_name: str, connector_url: str
-    ) -> Optional[ConnectorManifest]:
-        try:
-            connector_response = self.session.get(connector_url)
-            connector_response.raise_for_status()
-        except Exception as e:
-            self.report.warning(
-                "Failed to get connector details", connector_name, exc=e
-            )
-            return None
-        manifest = connector_response.json()
-        connector_manifest = ConnectorManifest(**manifest)
-        return connector_manifest
-
-    def _get_connector_tasks(self, connector_name: str) -> dict:
-        try:
-            response = self.session.get(
-                f"{self.config.connect_uri}/connectors/{connector_name}/tasks",
-            )
-            response.raise_for_status()
-        except Exception as e:
-            self.report.warning(
-                "Error getting connector tasks", context=connector_name, exc=e
-            )
-            return {}
-
-        return response.json()
-
-    def _get_connector_topics(self, connector_name: str) -> List[str]:
-        try:
-            response = self.session.get(
-                f"{self.config.connect_uri}/connectors/{connector_name}/topics",
-            )
-            response.raise_for_status()
-        except Exception as e:
-            self.report.warning(
-                "Error getting connector topics", context=connector_name, exc=e
-            )
-            return []
-
-        return response.json()[connector_name]["topics"]
-
-    def construct_flow_workunit(self, connector: ConnectorManifest) -> MetadataWorkUnit:
-        connector_name = connector.name
-        connector_type = connector.type
-        connector_class = connector.config.get(CONNECTOR_CLASS)
-        flow_property_bag = connector.flow_property_bag
-        # connector_url = connector.url  # NOTE: this will expose connector credential when used
-        flow_urn = builder.make_data_flow_urn(
-            self.platform,
-            connector_name,
-            self.config.env,
-            self.config.platform_instance,
-        )
-
-        return MetadataChangeProposalWrapper(
-            entityUrn=flow_urn,
-            aspect=models.DataFlowInfoClass(
-                name=connector_name,
-                description=f"{connector_type.capitalize()} connector using `{connector_class}` plugin.",
-                customProperties=flow_property_bag,
-                # externalUrl=connector_url, # NOTE: this will expose connector credential when used
-            ),
-        ).as_workunit()
-
-    def construct_job_workunits(
-        self, connector: ConnectorManifest
-    ) -> Iterable[MetadataWorkUnit]:
-        connector_name = connector.name
-        flow_urn = builder.make_data_flow_urn(
-            self.platform,
-            connector_name,
-            self.config.env,
-            self.config.platform_instance,
-        )
-
-        lineages = connector.lineages
-        if lineages:
-            for lineage in lineages:
-                source_dataset = lineage.source_dataset
-                source_platform = lineage.source_platform
-                target_dataset = lineage.target_dataset
-                target_platform = lineage.target_platform
-                job_property_bag = lineage.job_property_bag
-
-                source_platform_instance = get_platform_instance(
-                    self.config, connector_name, source_platform
-                )
-                target_platform_instance = get_platform_instance(
-                    self.config, connector_name, target_platform
-                )
-
-                job_id = self.get_job_id(lineage, connector, self.config)
-                job_urn = builder.make_data_job_urn_with_flow(flow_urn, job_id)
-
-                inlets = (
-                    [
-                        self.make_lineage_dataset_urn(
-                            source_platform, source_dataset, source_platform_instance
-                        )
-                    ]
-                    if source_dataset
-                    else []
-                )
-                outlets = [
-                    self.make_lineage_dataset_urn(
-                        target_platform, target_dataset, target_platform_instance
-                    )
-                ]
-
-                yield MetadataChangeProposalWrapper(
-                    entityUrn=job_urn,
-                    aspect=models.DataJobInfoClass(
-                        name=f"{connector_name}:{job_id}",
-                        type="COMMAND",
-                        customProperties=job_property_bag,
-                    ),
-                ).as_workunit()
-
-                yield MetadataChangeProposalWrapper(
-                    entityUrn=job_urn,
-                    aspect=models.DataJobInputOutputClass(
-                        inputDatasets=inlets,
-                        outputDatasets=outlets,
-                    ),
-                ).as_workunit()
-
-    def get_job_id(
-        self,
-        lineage: KafkaConnectLineage,
-        connector: ConnectorManifest,
-        config: KafkaConnectSourceConfig,
-    ) -> str:
-        connector_class = connector.config.get(CONNECTOR_CLASS)
-
-        # Note - This block is only to maintain backward compatibility of Job URN
-        if (
-            connector_class
-            and connector.type == SOURCE
-            and (
-                "JdbcSourceConnector" in connector_class
-                or connector_class.startswith("io.debezium.connector")
-            )
-            and lineage.source_dataset
-            and config.connect_to_platform_map
-            and config.connect_to_platform_map.get(connector.name)
-            and config.connect_to_platform_map[connector.name].get(
-                lineage.source_platform
-            )
-        ):
-            return f"{config.connect_to_platform_map[connector.name][lineage.source_platform]}.{lineage.source_dataset}"
-
-        return (
-            lineage.source_dataset
-            if lineage.source_dataset
-            else f"unknown_source.{lineage.target_dataset}"
-        )
-
-    def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
-        return [
-            *super().get_workunit_processors(),
-            StaleEntityRemovalHandler.create(
-                self, self.config, self.ctx
-            ).workunit_processor,
-        ]
-
-    def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
-        connectors_manifest = self.get_connectors_manifest()
-        for connector in connectors_manifest:
-            name = connector.name
-
-            yield self.construct_flow_workunit(connector)
-            yield from self.construct_job_workunits(connector)
-            self.report.report_connector_scanned(name)
-
-    def get_report(self) -> KafkaConnectSourceReport:
-        return self.report
-
-    def make_lineage_dataset_urn(
-        self, platform: str, name: str, platform_instance: Optional[str]
-    ) -> str:
-        if self.config.convert_lineage_urns_to_lowercase:
-            name = name.lower()
-
-        return builder.make_dataset_urn_with_platform_instance(
-            platform, name, platform_instance, self.config.env
-        )
-
-
-# TODO: Find a more automated way to discover new platforms with 3 level naming hierarchy.
-def has_three_level_hierarchy(platform: str) -> bool:
-    return platform in ["postgres", "trino", "redshift", "snowflake"]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/common.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/common.py
new file mode 100644
index 00000000000000..36f6a96c0d4080
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/common.py
@@ -0,0 +1,202 @@
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, Iterable, List, Optional
+
+from pydantic.fields import Field
+
+from datahub.configuration.common import AllowDenyPattern, ConfigModel
+from datahub.configuration.source_common import (
+    DatasetLineageProviderConfigBase,
+    PlatformInstanceConfigMixin,
+)
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+    StaleEntityRemovalSourceReport,
+    StatefulStaleMetadataRemovalConfig,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+    StatefulIngestionConfigBase,
+)
+
+logger = logging.getLogger(__name__)
+
+KAFKA = "kafka"
+SOURCE = "source"
+SINK = "sink"
+CONNECTOR_CLASS = "connector.class"
+
+
+class ProvidedConfig(ConfigModel):
+    provider: str
+    path_key: str
+    value: str
+
+
+class GenericConnectorConfig(ConfigModel):
+    connector_name: str
+    source_dataset: str
+    source_platform: str
+
+
+class KafkaConnectSourceConfig(
+    PlatformInstanceConfigMixin,
+    DatasetLineageProviderConfigBase,
+    StatefulIngestionConfigBase,
+):
+    # See the Connect REST Interface for details
+    # https://docs.confluent.io/platform/current/connect/references/restapi.html#
+    connect_uri: str = Field(
+        default="http://localhost:8083/", description="URI to connect to."
+    )
+    username: Optional[str] = Field(default=None, description="Kafka Connect username.")
+    password: Optional[str] = Field(default=None, description="Kafka Connect password.")
+    cluster_name: Optional[str] = Field(
+        default="connect-cluster", description="Cluster to ingest from."
+    )
+    # convert lineage dataset's urns to lowercase
+    convert_lineage_urns_to_lowercase: bool = Field(
+        default=False,
+        description="Whether to convert the urns of ingested lineage dataset to lowercase",
+    )
+    connector_patterns: AllowDenyPattern = Field(
+        default=AllowDenyPattern.allow_all(),
+        description="regex patterns for connectors to filter for ingestion.",
+    )
+    provided_configs: Optional[List[ProvidedConfig]] = Field(
+        default=None, description="Provided Configurations"
+    )
+    connect_to_platform_map: Optional[Dict[str, Dict[str, str]]] = Field(
+        default=None,
+        description='Platform instance mapping when multiple instances for a platform is available. Entry for a platform should be in either `platform_instance_map` or `connect_to_platform_map`. e.g.`connect_to_platform_map: { "postgres-connector-finance-db": "postgres": "core_finance_instance" }`',
+    )
+    platform_instance_map: Optional[Dict[str, str]] = Field(
+        default=None,
+        description='Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { "hive": "warehouse" }`',
+    )
+    generic_connectors: List[GenericConnectorConfig] = Field(
+        default=[],
+        description="Provide lineage graph for sources connectors other than Confluent JDBC Source Connector, Debezium Source Connector, and Mongo Source Connector",
+    )
+
+    stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
+
+
+@dataclass
+class KafkaConnectSourceReport(StaleEntityRemovalSourceReport):
+    connectors_scanned: int = 0
+    filtered: List[str] = field(default_factory=list)
+
+    def report_connector_scanned(self, connector: str) -> None:
+        self.connectors_scanned += 1
+
+    def report_dropped(self, connector: str) -> None:
+        self.filtered.append(connector)
+
+
+@dataclass
+class KafkaConnectLineage:
+    """Class to store Kafka Connect lineage mapping, Each instance is potential DataJob"""
+
+    source_platform: str
+    target_dataset: str
+    target_platform: str
+    job_property_bag: Optional[Dict[str, str]] = None
+    source_dataset: Optional[str] = None
+
+
+@dataclass
+class ConnectorManifest:
+    """Each instance is potential DataFlow"""
+
+    name: str
+    type: str
+    config: Dict
+    tasks: Dict
+    url: Optional[str] = None
+    flow_property_bag: Optional[Dict[str, str]] = None
+    lineages: List[KafkaConnectLineage] = field(default_factory=list)
+    topic_names: Iterable[str] = field(default_factory=list)
+
+
+def remove_prefix(text: str, prefix: str) -> str:
+    if text.startswith(prefix):
+        index = len(prefix)
+        return text[index:]
+    return text
+
+
+def unquote(
+    string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
+) -> str:
+    """
+    If string starts and ends with a quote, unquote it
+    """
+    trailing_quote = trailing_quote if trailing_quote else leading_quote
+    if string.startswith(leading_quote) and string.endswith(trailing_quote):
+        string = string[1:-1]
+    return string
+
+
+def get_dataset_name(
+    database_name: Optional[str],
+    source_table: str,
+) -> str:
+    if database_name:
+        dataset_name = database_name + "." + source_table
+    else:
+        dataset_name = source_table
+
+    return dataset_name
+
+
+def get_platform_instance(
+    config: KafkaConnectSourceConfig, connector_name: str, platform: str
+) -> Optional[str]:
+    instance_name = None
+    if (
+        config.connect_to_platform_map
+        and config.connect_to_platform_map.get(connector_name)
+        and config.connect_to_platform_map[connector_name].get(platform)
+    ):
+        instance_name = config.connect_to_platform_map[connector_name][platform]
+        if config.platform_instance_map and config.platform_instance_map.get(platform):
+            logger.warning(
+                f"Same source platform {platform} configured in both platform_instance_map and connect_to_platform_map."
+                "Will prefer connector specific platform instance from connect_to_platform_map."
+            )
+    elif config.platform_instance_map and config.platform_instance_map.get(platform):
+        instance_name = config.platform_instance_map[platform]
+    logger.info(
+        f"Instance name assigned is: {instance_name} for Connector Name {connector_name} and platform {platform}"
+    )
+    return instance_name
+
+
+def transform_connector_config(
+    connector_config: Dict, provided_configs: List[ProvidedConfig]
+) -> None:
+    """This method will update provided configs in connector config values, if any"""
+    lookupsByProvider = {}
+    for pconfig in provided_configs:
+        lookupsByProvider[f"${{{pconfig.provider}:{pconfig.path_key}}}"] = pconfig.value
+    for k, v in connector_config.items():
+        for key, value in lookupsByProvider.items():
+            if key in v:
+                connector_config[k] = connector_config[k].replace(key, value)
+
+
+# TODO: Find a more automated way to discover new platforms with 3 level naming hierarchy.
+def has_three_level_hierarchy(platform: str) -> bool:
+    return platform in ["postgres", "trino", "redshift", "snowflake"]
+
+
+@dataclass
+class BaseConnector:
+    connector_manifest: ConnectorManifest
+    config: KafkaConnectSourceConfig
+    report: KafkaConnectSourceReport
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        return []
+
+    def extract_flow_property_bag(self) -> Optional[Dict[str, str]]:
+        return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
new file mode 100644
index 00000000000000..fa6b614c4b52a6
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
@@ -0,0 +1,367 @@
+import logging
+from typing import Iterable, List, Optional, Type
+
+import jpype
+import jpype.imports
+import requests
+
+import datahub.emitter.mce_builder as builder
+import datahub.metadata.schema_classes as models
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+    SourceCapability,
+    SupportStatus,
+    capability,
+    config_class,
+    platform_name,
+    support_status,
+)
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.kafka_connect.common import (
+    CONNECTOR_CLASS,
+    SINK,
+    SOURCE,
+    BaseConnector,
+    ConnectorManifest,
+    KafkaConnectLineage,
+    KafkaConnectSourceConfig,
+    KafkaConnectSourceReport,
+    get_platform_instance,
+    transform_connector_config,
+)
+from datahub.ingestion.source.kafka_connect.sink_connectors import (
+    BIGQUERY_SINK_CONNECTOR_CLASS,
+    S3_SINK_CONNECTOR_CLASS,
+    SNOWFLAKE_SINK_CONNECTOR_CLASS,
+    BigQuerySinkConnector,
+    ConfluentS3SinkConnector,
+    SnowflakeSinkConnector,
+)
+from datahub.ingestion.source.kafka_connect.source_connectors import (
+    DEBEZIUM_SOURCE_CONNECTOR_PREFIX,
+    JDBC_SOURCE_CONNECTOR_CLASS,
+    MONGO_SOURCE_CONNECTOR_CLASS,
+    ConfigDrivenSourceConnector,
+    ConfluentJDBCSourceConnector,
+    DebeziumSourceConnector,
+    MongoSourceConnector,
+)
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+    StaleEntityRemovalHandler,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+    StatefulIngestionSourceBase,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@platform_name("Kafka Connect")
+@config_class(KafkaConnectSourceConfig)
+@support_status(SupportStatus.CERTIFIED)
+@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
+@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
+class KafkaConnectSource(StatefulIngestionSourceBase):
+    config: KafkaConnectSourceConfig
+    report: KafkaConnectSourceReport
+    platform: str = "kafka-connect"
+
+    def __init__(self, config: KafkaConnectSourceConfig, ctx: PipelineContext):
+        super().__init__(config, ctx)
+        self.config = config
+        self.report = KafkaConnectSourceReport()
+        self.session = requests.Session()
+        self.session.headers.update(
+            {
+                "Accept": "application/json",
+                "Content-Type": "application/json",
+            }
+        )
+
+        # Test the connection
+        if self.config.username is not None and self.config.password is not None:
+            logger.info(
+                f"Connecting to {self.config.connect_uri} with Authentication..."
+            )
+            self.session.auth = (self.config.username, self.config.password)
+
+        test_response = self.session.get(f"{self.config.connect_uri}/connectors")
+        test_response.raise_for_status()
+        logger.info(f"Connection to {self.config.connect_uri} is ok")
+        if not jpype.isJVMStarted():
+            jpype.startJVM()
+
+    @classmethod
+    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
+        config = KafkaConnectSourceConfig.parse_obj(config_dict)
+        return cls(config, ctx)
+
+    def get_connectors_manifest(self) -> Iterable[ConnectorManifest]:
+        """Get Kafka Connect connectors manifest using REST API.
+        Enrich with lineages metadata.
+        """
+
+        connector_response = self.session.get(
+            f"{self.config.connect_uri}/connectors",
+        )
+
+        payload = connector_response.json()
+
+        for connector_name in payload:
+            connector_url = f"{self.config.connect_uri}/connectors/{connector_name}"
+            connector_manifest = self._get_connector_manifest(
+                connector_name, connector_url
+            )
+            if (
+                connector_manifest is None
+                or not self.config.connector_patterns.allowed(connector_manifest.name)
+            ):
+                self.report.report_dropped(connector_name)
+                continue
+
+            if self.config.provided_configs:
+                transform_connector_config(
+                    connector_manifest.config, self.config.provided_configs
+                )
+            connector_manifest.url = connector_url
+            connector_manifest.topic_names = self._get_connector_topics(connector_name)
+            connector_class_value = connector_manifest.config.get(CONNECTOR_CLASS) or ""
+
+            class_type: Type[BaseConnector] = BaseConnector
+
+            # Populate Source Connector metadata
+            if connector_manifest.type == SOURCE:
+                connector_manifest.tasks = self._get_connector_tasks(connector_name)
+
+                # JDBC source connector lineages
+                if connector_class_value == JDBC_SOURCE_CONNECTOR_CLASS:
+                    class_type = ConfluentJDBCSourceConnector
+                elif connector_class_value.startswith(DEBEZIUM_SOURCE_CONNECTOR_PREFIX):
+                    class_type = DebeziumSourceConnector
+                elif connector_class_value == MONGO_SOURCE_CONNECTOR_CLASS:
+                    class_type = MongoSourceConnector
+                elif any(
+                    [
+                        connector.connector_name == connector_manifest.name
+                        for connector in self.config.generic_connectors
+                    ]
+                ):
+                    class_type = ConfigDrivenSourceConnector
+                else:
+                    self.report.report_dropped(connector_manifest.name)
+                    self.report.warning(
+                        "Lineage for Source Connector not supported. "
+                        "Please refer to Kafka Connect docs to use `generic_connectors` config.",
+                        context=f"{connector_manifest.name} of type {connector_class_value}",
+                    )
+                    continue
+            elif connector_manifest.type == SINK:
+                if connector_class_value == BIGQUERY_SINK_CONNECTOR_CLASS:
+                    class_type = BigQuerySinkConnector
+                elif connector_class_value == S3_SINK_CONNECTOR_CLASS:
+                    class_type = ConfluentS3SinkConnector
+                elif connector_class_value == SNOWFLAKE_SINK_CONNECTOR_CLASS:
+                    class_type = SnowflakeSinkConnector
+                else:
+                    self.report.report_dropped(connector_manifest.name)
+                    self.report.warning(
+                        "Lineage for Sink Connector not supported.",
+                        context=f"{connector_manifest.name} of type {connector_class_value}",
+                    )
+
+            connector_class = class_type(connector_manifest, self.config, self.report)
+            connector_manifest.lineages = connector_class.extract_lineages()
+            connector_manifest.flow_property_bag = (
+                connector_class.extract_flow_property_bag()
+            )
+
+            yield connector_manifest
+
+    def _get_connector_manifest(
+        self, connector_name: str, connector_url: str
+    ) -> Optional[ConnectorManifest]:
+        try:
+            connector_response = self.session.get(connector_url)
+            connector_response.raise_for_status()
+        except Exception as e:
+            self.report.warning(
+                "Failed to get connector details", connector_name, exc=e
+            )
+            return None
+        manifest = connector_response.json()
+        connector_manifest = ConnectorManifest(**manifest)
+        return connector_manifest
+
+    def _get_connector_tasks(self, connector_name: str) -> dict:
+        try:
+            response = self.session.get(
+                f"{self.config.connect_uri}/connectors/{connector_name}/tasks",
+            )
+            response.raise_for_status()
+        except Exception as e:
+            self.report.warning(
+                "Error getting connector tasks", context=connector_name, exc=e
+            )
+            return {}
+
+        return response.json()
+
+    def _get_connector_topics(self, connector_name: str) -> List[str]:
+        try:
+            response = self.session.get(
+                f"{self.config.connect_uri}/connectors/{connector_name}/topics",
+            )
+            response.raise_for_status()
+        except Exception as e:
+            self.report.warning(
+                "Error getting connector topics", context=connector_name, exc=e
+            )
+            return []
+
+        return response.json()[connector_name]["topics"]
+
+    def construct_flow_workunit(self, connector: ConnectorManifest) -> MetadataWorkUnit:
+        connector_name = connector.name
+        connector_type = connector.type
+        connector_class = connector.config.get(CONNECTOR_CLASS)
+        flow_property_bag = connector.flow_property_bag
+        # connector_url = connector.url  # NOTE: this will expose connector credential when used
+        flow_urn = builder.make_data_flow_urn(
+            self.platform,
+            connector_name,
+            self.config.env,
+            self.config.platform_instance,
+        )
+
+        return MetadataChangeProposalWrapper(
+            entityUrn=flow_urn,
+            aspect=models.DataFlowInfoClass(
+                name=connector_name,
+                description=f"{connector_type.capitalize()} connector using `{connector_class}` plugin.",
+                customProperties=flow_property_bag,
+                # externalUrl=connector_url, # NOTE: this will expose connector credential when used
+            ),
+        ).as_workunit()
+
+    def construct_job_workunits(
+        self, connector: ConnectorManifest
+    ) -> Iterable[MetadataWorkUnit]:
+        connector_name = connector.name
+        flow_urn = builder.make_data_flow_urn(
+            self.platform,
+            connector_name,
+            self.config.env,
+            self.config.platform_instance,
+        )
+
+        lineages = connector.lineages
+        if lineages:
+            for lineage in lineages:
+                source_dataset = lineage.source_dataset
+                source_platform = lineage.source_platform
+                target_dataset = lineage.target_dataset
+                target_platform = lineage.target_platform
+                job_property_bag = lineage.job_property_bag
+
+                source_platform_instance = get_platform_instance(
+                    self.config, connector_name, source_platform
+                )
+                target_platform_instance = get_platform_instance(
+                    self.config, connector_name, target_platform
+                )
+
+                job_id = self.get_job_id(lineage, connector, self.config)
+                job_urn = builder.make_data_job_urn_with_flow(flow_urn, job_id)
+
+                inlets = (
+                    [
+                        self.make_lineage_dataset_urn(
+                            source_platform, source_dataset, source_platform_instance
+                        )
+                    ]
+                    if source_dataset
+                    else []
+                )
+                outlets = [
+                    self.make_lineage_dataset_urn(
+                        target_platform, target_dataset, target_platform_instance
+                    )
+                ]
+
+                yield MetadataChangeProposalWrapper(
+                    entityUrn=job_urn,
+                    aspect=models.DataJobInfoClass(
+                        name=f"{connector_name}:{job_id}",
+                        type="COMMAND",
+                        customProperties=job_property_bag,
+                    ),
+                ).as_workunit()
+
+                yield MetadataChangeProposalWrapper(
+                    entityUrn=job_urn,
+                    aspect=models.DataJobInputOutputClass(
+                        inputDatasets=inlets,
+                        outputDatasets=outlets,
+                    ),
+                ).as_workunit()
+
+    def get_job_id(
+        self,
+        lineage: KafkaConnectLineage,
+        connector: ConnectorManifest,
+        config: KafkaConnectSourceConfig,
+    ) -> str:
+        connector_class = connector.config.get(CONNECTOR_CLASS)
+
+        # Note - This block is only to maintain backward compatibility of Job URN
+        if (
+            connector_class
+            and connector.type == SOURCE
+            and (
+                "JdbcSourceConnector" in connector_class
+                or connector_class.startswith("io.debezium.connector")
+            )
+            and lineage.source_dataset
+            and config.connect_to_platform_map
+            and config.connect_to_platform_map.get(connector.name)
+            and config.connect_to_platform_map[connector.name].get(
+                lineage.source_platform
+            )
+        ):
+            return f"{config.connect_to_platform_map[connector.name][lineage.source_platform]}.{lineage.source_dataset}"
+
+        return (
+            lineage.source_dataset
+            if lineage.source_dataset
+            else f"unknown_source.{lineage.target_dataset}"
+        )
+
+    def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
+        return [
+            *super().get_workunit_processors(),
+            StaleEntityRemovalHandler.create(
+                self, self.config, self.ctx
+            ).workunit_processor,
+        ]
+
+    def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        for connector in self.get_connectors_manifest():
+            yield self.construct_flow_workunit(connector)
+            yield from self.construct_job_workunits(connector)
+            self.report.report_connector_scanned(connector.name)
+
+    def get_report(self) -> KafkaConnectSourceReport:
+        return self.report
+
+    def make_lineage_dataset_urn(
+        self, platform: str, name: str, platform_instance: Optional[str]
+    ) -> str:
+        if self.config.convert_lineage_urns_to_lowercase:
+            name = name.lower()
+
+        return builder.make_dataset_urn_with_platform_instance(
+            platform, name, platform_instance, self.config.env
+        )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
new file mode 100644
index 00000000000000..2790460c8e6019
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
@@ -0,0 +1,341 @@
+import re
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from datahub.ingestion.source.kafka_connect.common import (
+    KAFKA,
+    BaseConnector,
+    ConnectorManifest,
+    KafkaConnectLineage,
+)
+
+
+@dataclass
+class ConfluentS3SinkConnector(BaseConnector):
+    @dataclass
+    class S3SinkParser:
+        target_platform: str
+        bucket: str
+        topics_dir: str
+        topics: Iterable[str]
+
+    def _get_parser(self, connector_manifest: ConnectorManifest) -> S3SinkParser:
+        # https://docs.confluent.io/kafka-connectors/s3-sink/current/configuration_options.html#s3
+        bucket = connector_manifest.config.get("s3.bucket.name")
+        if not bucket:
+            raise ValueError(
+                "Could not find 's3.bucket.name' in connector configuration"
+            )
+
+        # https://docs.confluent.io/kafka-connectors/s3-sink/current/configuration_options.html#storage
+        topics_dir = connector_manifest.config.get("topics.dir", "topics")
+
+        return self.S3SinkParser(
+            target_platform="s3",
+            bucket=bucket,
+            topics_dir=topics_dir,
+            topics=connector_manifest.topic_names,
+        )
+
+    def extract_flow_property_bag(self) -> Dict[str, str]:
+        # Mask/Remove properties that may reveal credentials
+        flow_property_bag = {
+            k: v
+            for k, v in self.connector_manifest.config.items()
+            if k
+            not in [
+                "aws.access.key.id",
+                "aws.secret.access.key",
+                "s3.sse.customer.key",
+                "s3.proxy.password",
+            ]
+        }
+        return flow_property_bag
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        try:
+            parser = self._get_parser(self.connector_manifest)
+
+            lineages: List[KafkaConnectLineage] = list()
+            for topic in parser.topics:
+                target_dataset = f"{parser.bucket}/{parser.topics_dir}/{topic}"
+
+                lineages.append(
+                    KafkaConnectLineage(
+                        source_dataset=topic,
+                        source_platform="kafka",
+                        target_dataset=target_dataset,
+                        target_platform=parser.target_platform,
+                    )
+                )
+            return lineages
+        except Exception as e:
+            self.report.warning(
+                "Error resolving lineage for connector",
+                self.connector_manifest.name,
+                exc=e,
+            )
+
+        return []
+
+
+@dataclass
+class SnowflakeSinkConnector(BaseConnector):
+    @dataclass
+    class SnowflakeParser:
+        database_name: str
+        schema_name: str
+        topics_to_tables: Dict[str, str]
+
+    def get_table_name_from_topic_name(self, topic_name: str) -> str:
+        """
+        This function converts the topic name to a valid Snowflake table name using some rules.
+        Refer below link for more info
+        https://docs.snowflake.com/en/user-guide/kafka-connector-overview#target-tables-for-kafka-topics
+        """
+        table_name = re.sub("[^a-zA-Z0-9_]", "_", topic_name)
+        if re.match("^[^a-zA-Z_].*", table_name):
+            table_name = "_" + table_name
+        # Connector  may append original topic's hash code as suffix for conflict resolution
+        # if generated table names for 2 topics are similar. This corner case is not handled here.
+        # Note that Snowflake recommends to choose topic names that follow the rules for
+        # Snowflake identifier names so this case is not recommended by snowflake.
+        return table_name
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> SnowflakeParser:
+        database_name = connector_manifest.config["snowflake.database.name"]
+        schema_name = connector_manifest.config["snowflake.schema.name"]
+
+        # Fetch user provided topic to table map
+        provided_topics_to_tables: Dict[str, str] = {}
+        if connector_manifest.config.get("snowflake.topic2table.map"):
+            for each in connector_manifest.config["snowflake.topic2table.map"].split(
+                ","
+            ):
+                topic, table = each.split(":")
+                provided_topics_to_tables[topic.strip()] = table.strip()
+
+        topics_to_tables: Dict[str, str] = {}
+        # Extract lineage for only those topics whose data ingestion started
+        for topic in connector_manifest.topic_names:
+            if topic in provided_topics_to_tables:
+                # If user provided which table to get mapped with this topic
+                topics_to_tables[topic] = provided_topics_to_tables[topic]
+            else:
+                # Else connector converts topic name to a valid Snowflake table name.
+                topics_to_tables[topic] = self.get_table_name_from_topic_name(topic)
+
+        return self.SnowflakeParser(
+            database_name=database_name,
+            schema_name=schema_name,
+            topics_to_tables=topics_to_tables,
+        )
+
+    def extract_flow_property_bag(self) -> Dict[str, str]:
+        # For all snowflake sink connector properties, refer below link
+        # https://docs.snowflake.com/en/user-guide/kafka-connector-install#configuring-the-kafka-connector
+        # remove private keys, secrets from properties
+        flow_property_bag = {
+            k: v
+            for k, v in self.connector_manifest.config.items()
+            if k
+            not in [
+                "snowflake.private.key",
+                "snowflake.private.key.passphrase",
+                "value.converter.basic.auth.user.info",
+            ]
+        }
+
+        return flow_property_bag
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+
+        for topic, table in parser.topics_to_tables.items():
+            target_dataset = f"{parser.database_name}.{parser.schema_name}.{table}"
+            lineages.append(
+                KafkaConnectLineage(
+                    source_dataset=topic,
+                    source_platform=KAFKA,
+                    target_dataset=target_dataset,
+                    target_platform="snowflake",
+                )
+            )
+
+        return lineages
+
+
+@dataclass
+class BigQuerySinkConnector(BaseConnector):
+    @dataclass
+    class BQParser:
+        project: str
+        target_platform: str
+        sanitizeTopics: str
+        transforms: list
+        topicsToTables: Optional[str] = None
+        datasets: Optional[str] = None
+        defaultDataset: Optional[str] = None
+        version: str = "v1"
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> BQParser:
+        project = connector_manifest.config["project"]
+        sanitizeTopics = connector_manifest.config.get("sanitizeTopics", "false")
+        transform_names = (
+            self.connector_manifest.config.get("transforms", "").split(",")
+            if self.connector_manifest.config.get("transforms")
+            else []
+        )
+        transforms = []
+        for name in transform_names:
+            transform = {"name": name}
+            transforms.append(transform)
+            for key in self.connector_manifest.config.keys():
+                if key.startswith(f"transforms.{name}."):
+                    transform[
+                        key.replace(f"transforms.{name}.", "")
+                    ] = self.connector_manifest.config[key]
+
+        if "defaultDataset" in connector_manifest.config:
+            defaultDataset = connector_manifest.config["defaultDataset"]
+            return self.BQParser(
+                project=project,
+                defaultDataset=defaultDataset,
+                target_platform="bigquery",
+                sanitizeTopics=sanitizeTopics.lower() == "true",
+                version="v2",
+                transforms=transforms,
+            )
+        else:
+            # version 1.6.x and similar configs supported
+            datasets = connector_manifest.config["datasets"]
+            topicsToTables = connector_manifest.config.get("topicsToTables")
+
+            return self.BQParser(
+                project=project,
+                topicsToTables=topicsToTables,
+                datasets=datasets,
+                target_platform="bigquery",
+                sanitizeTopics=sanitizeTopics.lower() == "true",
+                transforms=transforms,
+            )
+
+    def get_list(self, property: str) -> Iterable[Tuple[str, str]]:
+        entries = property.split(",")
+        for entry in entries:
+            key, val = entry.rsplit("=")
+            yield (key.strip(), val.strip())
+
+    def get_dataset_for_topic_v1(self, topic: str, parser: BQParser) -> Optional[str]:
+        topicregex_dataset_map: Dict[str, str] = dict(self.get_list(parser.datasets))  # type: ignore
+        from java.util.regex import Pattern
+
+        for pattern, dataset in topicregex_dataset_map.items():
+            patternMatcher = Pattern.compile(pattern).matcher(topic)
+            if patternMatcher.matches():
+                return dataset
+        return None
+
+    def sanitize_table_name(self, table_name):
+        table_name = re.sub("[^a-zA-Z0-9_]", "_", table_name)
+        if re.match("^[^a-zA-Z_].*", table_name):
+            table_name = "_" + table_name
+
+        return table_name
+
+    def get_dataset_table_for_topic(
+        self, topic: str, parser: BQParser
+    ) -> Optional[str]:
+        if parser.version == "v2":
+            dataset = parser.defaultDataset
+            parts = topic.split(":")
+            if len(parts) == 2:
+                dataset = parts[0]
+                table = parts[1]
+            else:
+                table = parts[0]
+        else:
+            dataset = self.get_dataset_for_topic_v1(topic, parser)
+            if dataset is None:
+                return None
+
+            table = topic
+            if parser.topicsToTables:
+                topicregex_table_map: Dict[str, str] = dict(
+                    self.get_list(parser.topicsToTables)  # type: ignore
+                )
+                from java.util.regex import Pattern
+
+                for pattern, tbl in topicregex_table_map.items():
+                    patternMatcher = Pattern.compile(pattern).matcher(topic)
+                    if patternMatcher.matches():
+                        table = tbl
+                        break
+
+        if parser.sanitizeTopics:
+            table = self.sanitize_table_name(table)
+        return f"{dataset}.{table}"
+
+    def apply_transformations(
+        self, topic: str, transforms: List[Dict[str, str]]
+    ) -> str:
+        for transform in transforms:
+            if transform["type"] == "org.apache.kafka.connect.transforms.RegexRouter":
+                regex = transform["regex"]
+                replacement = transform["replacement"]
+                pattern = re.compile(regex)
+                if pattern.match(topic):
+                    topic = pattern.sub(replacement, topic, count=1)
+        return topic
+
+    def extract_flow_property_bag(self) -> Dict[str, str]:
+        # Mask/Remove properties that may reveal credentials
+        flow_property_bag = {
+            k: v
+            for k, v in self.connector_manifest.config.items()
+            if k not in ["keyfile"]
+        }
+
+        return flow_property_bag
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+        if not parser:
+            return lineages
+        target_platform = parser.target_platform
+        project = parser.project
+        transforms = parser.transforms
+
+        for topic in self.connector_manifest.topic_names:
+            transformed_topic = self.apply_transformations(topic, transforms)
+            dataset_table = self.get_dataset_table_for_topic(transformed_topic, parser)
+            if dataset_table is None:
+                self.report.warning(
+                    "Could not find target dataset for topic, please check your connector configuration"
+                    f"{self.connector_manifest.name} : {transformed_topic} ",
+                )
+                continue
+            target_dataset = f"{project}.{dataset_table}"
+
+            lineages.append(
+                KafkaConnectLineage(
+                    source_dataset=transformed_topic,
+                    source_platform=KAFKA,
+                    target_dataset=target_dataset,
+                    target_platform=target_platform,
+                )
+            )
+        return lineages
+
+
+BIGQUERY_SINK_CONNECTOR_CLASS = "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector"
+S3_SINK_CONNECTOR_CLASS = "io.confluent.connect.s3.S3SinkConnector"
+SNOWFLAKE_SINK_CONNECTOR_CLASS = "com.snowflake.kafka.connector.SnowflakeSinkConnector"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
new file mode 100644
index 00000000000000..7b3b6e551a0a1f
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
@@ -0,0 +1,570 @@
+import logging
+import re
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from sqlalchemy.engine.url import make_url
+
+from datahub.ingestion.source.kafka_connect.common import (
+    CONNECTOR_CLASS,
+    KAFKA,
+    BaseConnector,
+    ConnectorManifest,
+    KafkaConnectLineage,
+    get_dataset_name,
+    has_three_level_hierarchy,
+    remove_prefix,
+    unquote,
+)
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
+
+
+@dataclass
+class ConfluentJDBCSourceConnector(BaseConnector):
+    REGEXROUTER = "org.apache.kafka.connect.transforms.RegexRouter"
+    KNOWN_TOPICROUTING_TRANSFORMS = [REGEXROUTER]
+    # https://kafka.apache.org/documentation/#connect_included_transformation
+    KAFKA_NONTOPICROUTING_TRANSFORMS = [
+        "InsertField",
+        "InsertField$Key",
+        "InsertField$Value",
+        "ReplaceField",
+        "ReplaceField$Key",
+        "ReplaceField$Value",
+        "MaskField",
+        "MaskField$Key",
+        "MaskField$Value",
+        "ValueToKey",
+        "ValueToKey$Key",
+        "ValueToKey$Value",
+        "HoistField",
+        "HoistField$Key",
+        "HoistField$Value",
+        "ExtractField",
+        "ExtractField$Key",
+        "ExtractField$Value",
+        "SetSchemaMetadata",
+        "SetSchemaMetadata$Key",
+        "SetSchemaMetadata$Value",
+        "Flatten",
+        "Flatten$Key",
+        "Flatten$Value",
+        "Cast",
+        "Cast$Key",
+        "Cast$Value",
+        "HeadersFrom",
+        "HeadersFrom$Key",
+        "HeadersFrom$Value",
+        "TimestampConverter",
+        "Filter",
+        "InsertHeader",
+        "DropHeaders",
+    ]
+    # https://docs.confluent.io/platform/current/connect/transforms/overview.html
+    CONFLUENT_NONTOPICROUTING_TRANSFORMS = [
+        "Drop",
+        "Drop$Key",
+        "Drop$Value",
+        "Filter",
+        "Filter$Key",
+        "Filter$Value",
+        "TombstoneHandler",
+    ]
+    KNOWN_NONTOPICROUTING_TRANSFORMS = (
+        KAFKA_NONTOPICROUTING_TRANSFORMS
+        + [
+            f"org.apache.kafka.connect.transforms.{t}"
+            for t in KAFKA_NONTOPICROUTING_TRANSFORMS
+        ]
+        + CONFLUENT_NONTOPICROUTING_TRANSFORMS
+        + [
+            f"io.confluent.connect.transforms.{t}"
+            for t in CONFLUENT_NONTOPICROUTING_TRANSFORMS
+        ]
+    )
+
+    @dataclass
+    class JdbcParser:
+        db_connection_url: str
+        source_platform: str
+        database_name: str
+        topic_prefix: str
+        query: str
+        transforms: list
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> JdbcParser:
+        url = remove_prefix(
+            str(connector_manifest.config.get("connection.url")), "jdbc:"
+        )
+        url_instance = make_url(url)
+        source_platform = get_platform_from_sqlalchemy_uri(str(url_instance))
+        database_name = url_instance.database
+        assert database_name
+        db_connection_url = f"{url_instance.drivername}://{url_instance.host}:{url_instance.port}/{database_name}"
+
+        topic_prefix = self.connector_manifest.config.get("topic.prefix", None)
+
+        query = self.connector_manifest.config.get("query", None)
+
+        transform_names = (
+            self.connector_manifest.config.get("transforms", "").split(",")
+            if self.connector_manifest.config.get("transforms")
+            else []
+        )
+
+        transforms = []
+        for name in transform_names:
+            transform = {"name": name}
+            transforms.append(transform)
+            for key in self.connector_manifest.config.keys():
+                if key.startswith(f"transforms.{name}."):
+                    transform[
+                        key.replace(f"transforms.{name}.", "")
+                    ] = self.connector_manifest.config[key]
+
+        return self.JdbcParser(
+            db_connection_url,
+            source_platform,
+            database_name,
+            topic_prefix,
+            query,
+            transforms,
+        )
+
+    def default_get_lineages(
+        self,
+        topic_prefix: str,
+        database_name: str,
+        source_platform: str,
+        topic_names: Optional[Iterable[str]] = None,
+        include_source_dataset: bool = True,
+    ) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = []
+        if not topic_names:
+            topic_names = self.connector_manifest.topic_names
+        table_name_tuples: List[Tuple] = self.get_table_names()
+        for topic in topic_names:
+            # All good for NO_TRANSFORM or (SINGLE_TRANSFORM and KNOWN_NONTOPICROUTING_TRANSFORM) or (not SINGLE_TRANSFORM and all(KNOWN_NONTOPICROUTING_TRANSFORM))
+            source_table: str = (
+                remove_prefix(topic, topic_prefix) if topic_prefix else topic
+            )
+            # include schema name for three-level hierarchies
+            if has_three_level_hierarchy(source_platform):
+                table_name_tuple: Tuple = next(
+                    iter([t for t in table_name_tuples if t and t[-1] == source_table]),
+                    (),
+                )
+                if len(table_name_tuple) > 1:
+                    source_table = f"{table_name_tuple[-2]}.{source_table}"
+                else:
+                    include_source_dataset = False
+                    self.report.warning(
+                        "Could not find schema for table"
+                        f"{self.connector_manifest.name} : {source_table}",
+                    )
+            dataset_name: str = get_dataset_name(database_name, source_table)
+            lineage = KafkaConnectLineage(
+                source_dataset=dataset_name if include_source_dataset else None,
+                source_platform=source_platform,
+                target_dataset=topic,
+                target_platform=KAFKA,
+            )
+            lineages.append(lineage)
+        return lineages
+
+    def get_table_names(self) -> List[Tuple]:
+        sep: str = "."
+        leading_quote_char: str = '"'
+        trailing_quote_char: str = leading_quote_char
+
+        table_ids: List[str] = []
+        if self.connector_manifest.tasks:
+            table_ids = (
+                ",".join(
+                    [
+                        task["config"].get("tables")
+                        for task in self.connector_manifest.tasks
+                    ]
+                )
+            ).split(",")
+            quote_method = self.connector_manifest.config.get(
+                "quote.sql.identifiers", "always"
+            )
+            if (
+                quote_method == "always"
+                and table_ids
+                and table_ids[0]
+                and table_ids[-1]
+            ):
+                leading_quote_char = table_ids[0][0]
+                trailing_quote_char = table_ids[-1][-1]
+                # This will only work for single character quotes
+        elif self.connector_manifest.config.get("table.whitelist"):
+            table_ids = self.connector_manifest.config.get("table.whitelist").split(",")  # type: ignore
+
+        # List of Tuple containing (schema, table)
+        tables: List[Tuple] = [
+            (
+                (
+                    unquote(
+                        table_id.split(sep)[-2], leading_quote_char, trailing_quote_char
+                    )
+                    if len(table_id.split(sep)) > 1
+                    else ""
+                ),
+                unquote(
+                    table_id.split(sep)[-1], leading_quote_char, trailing_quote_char
+                ),
+            )
+            for table_id in table_ids
+        ]
+        return tables
+
+    def extract_flow_property_bag(self) -> Dict[str, str]:
+        flow_property_bag = {
+            k: v
+            for k, v in self.connector_manifest.config.items()
+            if k not in ["connection.password", "connection.user"]
+        }
+
+        # Mask/Remove properties that may reveal credentials
+        flow_property_bag["connection.url"] = self.get_parser(
+            self.connector_manifest
+        ).db_connection_url
+
+        return flow_property_bag
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+        source_platform = parser.source_platform
+        database_name = parser.database_name
+        query = parser.query
+        topic_prefix = parser.topic_prefix
+        transforms = parser.transforms
+
+        logging.debug(
+            f"Extracting source platform: {source_platform} and database name: {database_name} from connection url "
+        )
+
+        if not self.connector_manifest.topic_names:
+            return lineages
+
+        if query:
+            # Lineage source_table can be extracted by parsing query
+            for topic in self.connector_manifest.topic_names:
+                # default method - as per earlier implementation
+                dataset_name: str = get_dataset_name(database_name, topic)
+
+                lineage = KafkaConnectLineage(
+                    source_dataset=None,
+                    source_platform=source_platform,
+                    target_dataset=topic,
+                    target_platform=KAFKA,
+                )
+                lineages.append(lineage)
+                self.report.warning(
+                    "Could not find input dataset, the connector has query configuration set",
+                    self.connector_manifest.name,
+                )
+                return lineages
+
+        SINGLE_TRANSFORM = len(transforms) == 1
+        NO_TRANSFORM = len(transforms) == 0
+        UNKNOWN_TRANSFORM = any(
+            [
+                transform["type"]
+                not in self.KNOWN_TOPICROUTING_TRANSFORMS
+                + self.KNOWN_NONTOPICROUTING_TRANSFORMS
+                for transform in transforms
+            ]
+        )
+        ALL_TRANSFORMS_NON_TOPICROUTING = all(
+            [
+                transform["type"] in self.KNOWN_NONTOPICROUTING_TRANSFORMS
+                for transform in transforms
+            ]
+        )
+
+        if NO_TRANSFORM or ALL_TRANSFORMS_NON_TOPICROUTING:
+            return self.default_get_lineages(
+                database_name=database_name,
+                source_platform=source_platform,
+                topic_prefix=topic_prefix,
+            )
+
+        if SINGLE_TRANSFORM and transforms[0]["type"] == self.REGEXROUTER:
+            tables = self.get_table_names()
+            topic_names = list(self.connector_manifest.topic_names)
+
+            from java.util.regex import Pattern
+
+            for table in tables:
+                source_table: str = table[-1]
+                topic = topic_prefix + source_table if topic_prefix else source_table
+
+                transform_regex = Pattern.compile(transforms[0]["regex"])
+                transform_replacement = transforms[0]["replacement"]
+
+                matcher = transform_regex.matcher(topic)
+                if matcher.matches():
+                    topic = str(matcher.replaceFirst(transform_replacement))
+
+                # Additional check to confirm that the topic present
+                # in connector topics
+
+                if topic in self.connector_manifest.topic_names:
+                    # include schema name for three-level hierarchies
+                    if has_three_level_hierarchy(source_platform) and len(table) > 1:
+                        source_table = f"{table[-2]}.{table[-1]}"
+
+                    dataset_name = get_dataset_name(database_name, source_table)
+
+                    lineage = KafkaConnectLineage(
+                        source_dataset=dataset_name,
+                        source_platform=source_platform,
+                        target_dataset=topic,
+                        target_platform=KAFKA,
+                    )
+                    topic_names.remove(topic)
+                    lineages.append(lineage)
+
+            if topic_names:
+                lineages.extend(
+                    self.default_get_lineages(
+                        database_name=database_name,
+                        source_platform=source_platform,
+                        topic_prefix=topic_prefix,
+                        topic_names=topic_names,
+                        include_source_dataset=False,
+                    )
+                )
+                self.report.warning(
+                    "Could not find input dataset for connector topics",
+                    f"{self.connector_manifest.name} : {topic_names}",
+                )
+            return lineages
+        else:
+            include_source_dataset = True
+            if SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
+                self.report.warning(
+                    "Could not find input dataset, connector has unknown transform",
+                    f"{self.connector_manifest.name} : {transforms[0]['type']}",
+                )
+                include_source_dataset = False
+            if not SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
+                self.report.warning(
+                    "Could not find input dataset, connector has one or more unknown transforms",
+                    self.connector_manifest.name,
+                )
+                include_source_dataset = False
+            lineages = self.default_get_lineages(
+                database_name=database_name,
+                source_platform=source_platform,
+                topic_prefix=topic_prefix,
+                include_source_dataset=include_source_dataset,
+            )
+            return lineages
+
+
+@dataclass
+class MongoSourceConnector(BaseConnector):
+    # https://www.mongodb.com/docs/kafka-connector/current/source-connector/
+
+    @dataclass
+    class MongoSourceParser:
+        db_connection_url: Optional[str]
+        source_platform: str
+        database_name: Optional[str]
+        topic_prefix: Optional[str]
+        transforms: List[str]
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> MongoSourceParser:
+        parser = self.MongoSourceParser(
+            db_connection_url=connector_manifest.config.get("connection.uri"),
+            source_platform="mongodb",
+            database_name=connector_manifest.config.get("database"),
+            topic_prefix=connector_manifest.config.get("topic_prefix"),
+            transforms=(
+                connector_manifest.config["transforms"].split(",")
+                if "transforms" in connector_manifest.config
+                else []
+            ),
+        )
+
+        return parser
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+        source_platform = parser.source_platform
+        topic_naming_pattern = r"mongodb\.(\w+)\.(\w+)"
+
+        if not self.connector_manifest.topic_names:
+            return lineages
+
+        for topic in self.connector_manifest.topic_names:
+            found = re.search(re.compile(topic_naming_pattern), topic)
+
+            if found:
+                table_name = get_dataset_name(found.group(1), found.group(2))
+
+                lineage = KafkaConnectLineage(
+                    source_dataset=table_name,
+                    source_platform=source_platform,
+                    target_dataset=topic,
+                    target_platform=KAFKA,
+                )
+                lineages.append(lineage)
+        return lineages
+
+
+@dataclass
+class DebeziumSourceConnector(BaseConnector):
+    @dataclass
+    class DebeziumParser:
+        source_platform: str
+        server_name: Optional[str]
+        database_name: Optional[str]
+
+    def get_server_name(self, connector_manifest: ConnectorManifest) -> str:
+        if "topic.prefix" in connector_manifest.config:
+            return connector_manifest.config["topic.prefix"]
+        else:
+            return connector_manifest.config.get("database.server.name", "")
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> DebeziumParser:
+        connector_class = connector_manifest.config.get(CONNECTOR_CLASS, "")
+
+        if connector_class == "io.debezium.connector.mysql.MySqlConnector":
+            parser = self.DebeziumParser(
+                source_platform="mysql",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=None,
+            )
+        elif connector_class == "MySqlConnector":
+            parser = self.DebeziumParser(
+                source_platform="mysql",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=None,
+            )
+        elif connector_class == "io.debezium.connector.mongodb.MongoDbConnector":
+            parser = self.DebeziumParser(
+                source_platform="mongodb",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=None,
+            )
+        elif connector_class == "io.debezium.connector.postgresql.PostgresConnector":
+            parser = self.DebeziumParser(
+                source_platform="postgres",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=connector_manifest.config.get("database.dbname"),
+            )
+        elif connector_class == "io.debezium.connector.oracle.OracleConnector":
+            parser = self.DebeziumParser(
+                source_platform="oracle",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=connector_manifest.config.get("database.dbname"),
+            )
+        elif connector_class == "io.debezium.connector.sqlserver.SqlServerConnector":
+            database_name = connector_manifest.config.get(
+                "database.names"
+            ) or connector_manifest.config.get("database.dbname")
+
+            if "," in str(database_name):
+                raise Exception(
+                    f"Only one database is supported for Debezium's SQL Server connector. Found: {database_name}"
+                )
+
+            parser = self.DebeziumParser(
+                source_platform="mssql",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=database_name,
+            )
+        elif connector_class == "io.debezium.connector.db2.Db2Connector":
+            parser = self.DebeziumParser(
+                source_platform="db2",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=connector_manifest.config.get("database.dbname"),
+            )
+        elif connector_class == "io.debezium.connector.vitess.VitessConnector":
+            parser = self.DebeziumParser(
+                source_platform="vitess",
+                server_name=self.get_server_name(connector_manifest),
+                database_name=connector_manifest.config.get("vitess.keyspace"),
+            )
+        else:
+            raise ValueError(f"Connector class '{connector_class}' is unknown.")
+
+        return parser
+
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages: List[KafkaConnectLineage] = list()
+
+        try:
+            parser = self.get_parser(self.connector_manifest)
+            source_platform = parser.source_platform
+            server_name = parser.server_name
+            database_name = parser.database_name
+            topic_naming_pattern = rf"({server_name})\.(\w+\.\w+)"
+
+            if not self.connector_manifest.topic_names:
+                return lineages
+
+            for topic in self.connector_manifest.topic_names:
+                found = re.search(re.compile(topic_naming_pattern), topic)
+
+                if found:
+                    table_name = get_dataset_name(database_name, found.group(2))
+
+                    lineage = KafkaConnectLineage(
+                        source_dataset=table_name,
+                        source_platform=source_platform,
+                        target_dataset=topic,
+                        target_platform=KAFKA,
+                    )
+                    lineages.append(lineage)
+            return lineages
+        except Exception as e:
+            self.report.warning(
+                "Error resolving lineage for connector",
+                self.connector_manifest.name,
+                exc=e,
+            )
+
+        return []
+
+
+@dataclass
+class ConfigDrivenSourceConnector(BaseConnector):
+    def extract_lineages(self) -> List[KafkaConnectLineage]:
+        lineages = []
+        for connector in self.config.generic_connectors:
+            if connector.connector_name == self.connector_manifest.name:
+                target_connector = connector
+                break
+        for topic in self.connector_manifest.topic_names:
+            lineage = KafkaConnectLineage(
+                source_dataset=target_connector.source_dataset,
+                source_platform=target_connector.source_platform,
+                target_dataset=topic,
+                target_platform=KAFKA,
+            )
+            lineages.append(lineage)
+        return lineages
+
+
+JDBC_SOURCE_CONNECTOR_CLASS = "io.confluent.connect.jdbc.JdbcSourceConnector"
+DEBEZIUM_SOURCE_CONNECTOR_PREFIX = "io.debezium.connector"
+MONGO_SOURCE_CONNECTOR_CLASS = "com.mongodb.kafka.connect.MongoSourceConnector"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index 57a251ef2ed14f..a66962f962255f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -31,6 +31,10 @@
 from pydantic.class_validators import validator
 
 import datahub.emitter.mce_builder as builder
+from datahub.api.entities.platformresource.platform_resource import (
+    PlatformResource,
+    PlatformResourceKey,
+)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import ContainerKey, create_embed_mcp
 from datahub.ingestion.api.report import Report
@@ -106,7 +110,7 @@
 from datahub.utilities.url_util import remove_port_from_url
 
 CORPUSER_DATAHUB = "urn:li:corpuser:datahub"
-
+LOOKER = "looker"
 logger = logging.getLogger(__name__)
 
 
@@ -1411,6 +1415,7 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport):
 
     resolved_user_ids: int = 0
     email_ids_missing: int = 0  # resolved users with missing email addresses
+    looker_user_count: int = 0
 
     _looker_api: Optional[LookerAPI] = None
     query_latency: Dict[str, datetime.timedelta] = dataclasses_field(
@@ -1614,9 +1619,21 @@ def get_urn_dashboard_id(self):
 class LookerUserRegistry:
     looker_api_wrapper: LookerAPI
     fields: str = ",".join(["id", "email", "display_name", "first_name", "last_name"])
+    _user_cache: Dict[str, LookerUser] = {}
 
-    def __init__(self, looker_api: LookerAPI):
+    def __init__(self, looker_api: LookerAPI, report: LookerDashboardSourceReport):
         self.looker_api_wrapper = looker_api
+        self.report = report
+        self._initialize_user_cache()
+
+    def _initialize_user_cache(self) -> None:
+        raw_users: Sequence[User] = self.looker_api_wrapper.all_users(
+            user_fields=self.fields
+        )
+
+        for raw_user in raw_users:
+            looker_user = LookerUser.create_looker_user(raw_user)
+            self._user_cache[str(looker_user.id)] = looker_user
 
     def get_by_id(self, id_: str) -> Optional[LookerUser]:
         if not id_:
@@ -1624,6 +1641,9 @@ def get_by_id(self, id_: str) -> Optional[LookerUser]:
 
         logger.debug(f"Will get user {id_}")
 
+        if str(id_) in self._user_cache:
+            return self._user_cache.get(str(id_))
+
         raw_user: Optional[User] = self.looker_api_wrapper.get_user(
             str(id_), user_fields=self.fields
         )
@@ -1632,3 +1652,35 @@ def get_by_id(self, id_: str) -> Optional[LookerUser]:
 
         looker_user = LookerUser.create_looker_user(raw_user)
         return looker_user
+
+    def to_platform_resource(
+        self, platform_instance: Optional[str]
+    ) -> Iterable[MetadataChangeProposalWrapper]:
+        try:
+            platform_resource_key = PlatformResourceKey(
+                platform=LOOKER,
+                resource_type="USER_ID_MAPPING",
+                platform_instance=platform_instance,
+                primary_key="",
+            )
+
+            # Extract user email mappings
+            user_email_cache = {
+                user_id: user.email
+                for user_id, user in self._user_cache.items()
+                if user.email
+            }
+
+            platform_resource = PlatformResource.create(
+                key=platform_resource_key,
+                value=user_email_cache,
+            )
+
+            self.report.looker_user_count = len(user_email_cache)
+            yield from platform_resource.to_mcps()
+
+        except Exception as exc:
+            self.report.warning(
+                message="Failed to generate platform resource for looker id mappings",
+                exc=exc,
+            )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
index ab55d4e15e5de4..c3f2a110136c45 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
@@ -68,6 +68,7 @@ class LookerAPIStats(BaseModel):
     get_look_calls: int = 0
     search_looks_calls: int = 0
     search_dashboards_calls: int = 0
+    all_user_calls: int = 0
 
 
 class LookerAPI:
@@ -135,7 +136,7 @@ def get_available_permissions(self) -> Set[str]:
 
         return permissions
 
-    @lru_cache(maxsize=1000)
+    @lru_cache(maxsize=5000)
     def get_user(self, id_: str, user_fields: str) -> Optional[User]:
         self.client_stats.user_calls += 1
         try:
@@ -154,6 +155,17 @@ def get_user(self, id_: str, user_fields: str) -> Optional[User]:
         # User not found
         return None
 
+    def all_users(self, user_fields: str) -> Sequence[User]:
+        self.client_stats.all_user_calls += 1
+        try:
+            return self.client.all_users(
+                fields=cast(str, user_fields),
+                transport_options=self.transport_options,
+            )
+        except SDKError as e:
+            logger.warning(f"Failure was {e}")
+        return []
+
     def execute_query(self, write_query: WriteQuery) -> List[Dict]:
         logger.debug(f"Executing query {write_query}")
         self.client_stats.query_calls += 1
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index cd8ccb8217257c..815c5dfb1c0147 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -145,7 +145,9 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext):
         self.source_config: LookerDashboardSourceConfig = config
         self.reporter: LookerDashboardSourceReport = LookerDashboardSourceReport()
         self.looker_api: LookerAPI = LookerAPI(self.source_config)
-        self.user_registry: LookerUserRegistry = LookerUserRegistry(self.looker_api)
+        self.user_registry: LookerUserRegistry = LookerUserRegistry(
+            self.looker_api, self.reporter
+        )
         self.explore_registry: LookerExploreRegistry = LookerExploreRegistry(
             self.looker_api, self.reporter, self.source_config
         )
@@ -1673,5 +1675,14 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 yield usage_mcp.as_workunit()
             self.reporter.report_stage_end("usage_extraction")
 
+        # Dump looker user resource mappings.
+        logger.info("Ingesting looker user resource mapping workunits")
+        self.reporter.report_stage_start("user_resource_extraction")
+        yield from auto_workunit(
+            self.user_registry.to_platform_resource(
+                self.source_config.platform_instance
+            )
+        )
+
     def get_report(self) -> SourceReport:
         return self.reporter
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
index cef6d2b1bb5774..26d160acf330cf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
@@ -38,16 +38,30 @@
 class MLflowConfig(EnvConfigMixin):
     tracking_uri: Optional[str] = Field(
         default=None,
-        description="Tracking server URI. If not set, an MLflow default tracking_uri is used (local `mlruns/` directory or `MLFLOW_TRACKING_URI` environment variable)",
+        description=(
+            "Tracking server URI. If not set, an MLflow default tracking_uri is used"
+            " (local `mlruns/` directory or `MLFLOW_TRACKING_URI` environment variable)"
+        ),
     )
     registry_uri: Optional[str] = Field(
         default=None,
-        description="Registry server URI. If not set, an MLflow default registry_uri is used (value of tracking_uri or `MLFLOW_REGISTRY_URI` environment variable)",
+        description=(
+            "Registry server URI. If not set, an MLflow default registry_uri is used"
+            " (value of tracking_uri or `MLFLOW_REGISTRY_URI` environment variable)"
+        ),
     )
     model_name_separator: str = Field(
         default="_",
         description="A string which separates model name from its version (e.g. model_1 or model-1)",
     )
+    base_external_url: Optional[str] = Field(
+        default=None,
+        description=(
+            "Base URL to use when constructing external URLs to MLflow."
+            " If not set, tracking_uri is used if it's an HTTP URL."
+            " If neither is set, external URLs are not generated."
+        ),
+    )
 
 
 @dataclass
@@ -279,12 +293,23 @@ def _make_ml_model_urn(self, model_version: ModelVersion) -> str:
         )
         return urn
 
-    def _make_external_url(self, model_version: ModelVersion) -> Union[None, str]:
+    def _get_base_external_url_from_tracking_uri(self) -> Optional[str]:
+        if isinstance(
+            self.client.tracking_uri, str
+        ) and self.client.tracking_uri.startswith("http"):
+            return self.client.tracking_uri
+        else:
+            return None
+
+    def _make_external_url(self, model_version: ModelVersion) -> Optional[str]:
         """
         Generate URL for a Model Version to MLflow UI.
         """
-        base_uri = self.client.tracking_uri
-        if base_uri.startswith("http"):
+        base_uri = (
+            self.config.base_external_url
+            or self._get_base_external_url_from_tracking_uri()
+        )
+        if base_uri:
             return f"{base_uri.rstrip('/')}/#/models/{model_version.name}/versions/{model_version.version}"
         else:
             return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index f7458c4eb4d5b5..b49d40a0c7eb6a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -9,7 +9,7 @@
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.source_common import DatasetSourceConfigMixin, PlatformDetail
 from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.common.subtypes import BIAssetSubTypes
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -232,19 +232,6 @@ def default_for_dataset_type_mapping() -> Dict[str, str]:
     return dict_
 
 
-class PlatformDetail(ConfigModel):
-    platform_instance: Optional[str] = pydantic.Field(
-        default=None,
-        description="DataHub platform instance name. To generate correct urn for upstream dataset, this should match "
-        "with platform instance name used in ingestion "
-        "recipe of other datahub sources.",
-    )
-    env: str = pydantic.Field(
-        default=builder.DEFAULT_ENV,
-        description="The environment that all assets produced by DataHub platform ingestion source belong to",
-    )
-
-
 class DataBricksPlatformDetail(PlatformDetail):
     """
     metastore is an additional field used in Databricks connector to generate the dataset urn
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py
index baaa8d5b85ae10..6d51e853a2fb06 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py
@@ -2,8 +2,8 @@
 from abc import ABC, abstractmethod
 from typing import Union
 
+from datahub.configuration.source_common import PlatformDetail
 from datahub.ingestion.source.powerbi.config import (
-    PlatformDetail,
     PowerBiDashboardSourceConfig,
     PowerBIPlatformDetail,
 )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
index ffaed79f4e42a6..63520bd731de86 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
@@ -5,13 +5,13 @@
 
 from lark import Tree
 
+from datahub.configuration.source_common import PlatformDetail
 from datahub.emitter import mce_builder as builder
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.powerbi.config import (
     Constant,
     DataBricksPlatformDetail,
     DataPlatformPair,
-    PlatformDetail,
     PowerBiDashboardSourceConfig,
     PowerBiDashboardSourceReport,
     PowerBIPlatformDetail,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
index 15ee995b2d5fdc..f71949b9eb27f7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
@@ -89,7 +89,16 @@ def __init__(self, schema):
             logger.error(f"Invalid JSON schema: {schema_data}. Error: {str(e)}")
             avro_schema = {}
 
-        self.schema_name = avro_schema.get("namespace") + "." + avro_schema.get("name")
+        self.schema_name = "null"
+        if avro_schema.get("namespace") and avro_schema.get("name"):
+            self.schema_name = (
+                avro_schema.get("namespace") + "." + avro_schema.get("name")
+            )
+        elif avro_schema.get("namespace"):
+            self.schema_name = avro_schema.get("namespace")
+        elif avro_schema.get("name"):
+            self.schema_name = avro_schema.get("name")
+
         self.schema_description = avro_schema.get("doc")
         self.schema_type = schema.get("type")
         self.schema_str = schema.get("data")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index 1863663f98bb24..3ddf47b70cdf80 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -9,6 +9,7 @@
 from itertools import groupby
 from pathlib import PurePath
 from typing import Any, Dict, Iterable, List, Optional, Tuple
+from urllib.parse import urlparse
 
 import smart_open.compression as so_compression
 from more_itertools import peekable
@@ -993,9 +994,7 @@ def s3_browser(self, path_spec: PathSpec, sample_size: int) -> Iterable[BrowsePa
                         folders = []
                         for dir in dirs_to_process:
                             logger.info(f"Getting files from folder: {dir}")
-                            prefix_to_process = dir.rstrip("\\").lstrip(
-                                self.create_s3_path(bucket_name, "/")
-                            )
+                            prefix_to_process = urlparse(dir).path.lstrip("/")
 
                             folders.extend(
                                 self.get_folder_info(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
index 93d84d8b246e51..c769c6705ac3f6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
@@ -414,9 +414,13 @@ def _process_upstream_lineage_row(
         except Exception as e:
             self.report.num_upstream_lineage_edge_parsing_failed += 1
             upstream_tables = db_row.get("UPSTREAM_TABLES")
+            downstream_table = db_row.get("DOWNSTREAM_TABLE_NAME")
             self.structured_reporter.warning(
                 "Failed to parse lineage edge",
-                context=f"Upstreams: {upstream_tables} Downstreams: {db_row.get('DOWNSTREAM_TABLE_NAME')}",
+                # Tricky: sometimes the full row data is too large, and so the context
+                # message gets truncated. By pulling out the upstreams and downstream
+                # list, we can at least get the important fields if truncation does occur.
+                context=f"Upstreams: {upstream_tables} Downstream: {downstream_table} Full row: {db_row}",
                 exc=e,
             )
             return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
index 99790de529ac3a..97c398c1962d6b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
@@ -237,6 +237,19 @@ def show_views_for_database(
 LIMIT {limit} {from_clause};
 """
 
+    @staticmethod
+    def get_secure_view_definitions() -> str:
+        # https://docs.snowflake.com/en/sql-reference/account-usage/views
+        return """
+            SELECT
+                TABLE_CATALOG as "TABLE_CATALOG",
+                TABLE_SCHEMA as "TABLE_SCHEMA",
+                TABLE_NAME as "TABLE_NAME",
+                VIEW_DEFINITION as "VIEW_DEFINITION"
+            FROM SNOWFLAKE.ACCOUNT_USAGE.VIEWS
+            WHERE IS_SECURE = 'YES' AND VIEW_DEFINITION !='' AND DELETED IS NULL
+        """
+
     @staticmethod
     def columns_for_schema(
         schema_name: str,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
index 5a69b4bb779d72..780effc82b0163 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
@@ -266,6 +266,22 @@ def get_schemas_for_database(self, db_name: str) -> List[SnowflakeSchema]:
             snowflake_schemas.append(snowflake_schema)
         return snowflake_schemas
 
+    @serialized_lru_cache(maxsize=1)
+    def get_secure_view_definitions(self) -> Dict[str, Dict[str, Dict[str, str]]]:
+        secure_view_definitions: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(
+            lambda: defaultdict(lambda: defaultdict())
+        )
+        cur = self.connection.query(SnowflakeQuery.get_secure_view_definitions())
+        for view in cur:
+            db_name = view["TABLE_CATALOG"]
+            schema_name = view["TABLE_SCHEMA"]
+            view_name = view["TABLE_NAME"]
+            secure_view_definitions[db_name][schema_name][view_name] = view[
+                "VIEW_DEFINITION"
+            ]
+
+        return secure_view_definitions
+
     @serialized_lru_cache(maxsize=1)
     def get_tables_for_database(
         self, db_name: str
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
index 4ceeb8560c1758..bc64693b6a1084 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
@@ -424,6 +424,10 @@ def _process_schema(
                     view_identifier = self.identifiers.get_dataset_identifier(
                         view.name, schema_name, db_name
                     )
+                    if view.is_secure and not view.view_definition:
+                        view.view_definition = self.fetch_secure_view_definition(
+                            view.name, schema_name, db_name
+                        )
                     if view.view_definition:
                         self.aggregator.add_view_definition(
                             view_urn=self.identifiers.gen_dataset_urn(view_identifier),
@@ -449,6 +453,25 @@ def _process_schema(
                 context=f"{db_name}.{schema_name}",
             )
 
+    def fetch_secure_view_definition(
+        self, table_name: str, schema_name: str, db_name: str
+    ) -> Optional[str]:
+        try:
+            view_definitions = self.data_dictionary.get_secure_view_definitions()
+            return view_definitions[db_name][schema_name][table_name]
+        except Exception as e:
+            if isinstance(e, SnowflakePermissionError):
+                error_msg = (
+                    "Failed to get secure views definitions. Please check permissions."
+                )
+            else:
+                error_msg = "Failed to get secure views definitions"
+            self.structured_reporter.warning(
+                error_msg,
+                exc=e,
+            )
+            return None
+
     def fetch_views_for_schema(
         self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str
     ) -> List[SnowflakeView]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index c3a7912c40e8ee..e5883dd0349a3a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -540,6 +540,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 identifiers=self.identifiers,
                 schema_resolver=schema_resolver,
                 discovered_tables=discovered_datasets,
+                graph=self.ctx.graph,
             )
 
             # TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index 6844b8a425a7b6..6cc2220d90fd93 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -111,6 +111,8 @@
     tableau_field_to_schema_field,
     workbook_graphql_query,
 )
+from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
+from datahub.ingestion.source.tableau.tableau_validation import check_user_role
 from datahub.metadata.com.linkedin.pegasus2avro.common import (
     AuditStamp,
     ChangeAuditStamps,
@@ -167,7 +169,7 @@
 
 try:
     # On earlier versions of the tableauserverclient, the NonXMLResponseError
-    # was thrown when reauthentication was needed. We'll keep both exceptions
+    # was thrown when reauthentication was necessary. We'll keep both exceptions
     # around for now, but can remove this in the future.
     from tableauserverclient.server.endpoint.exceptions import (  # type: ignore
         NotSignedInError,
@@ -632,6 +634,33 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
     num_upstream_table_lineage_failed_parse_sql: int = 0
     num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
     num_hidden_assets_skipped: int = 0
+    logged_in_user: List[UserInfo] = []
+
+
+def report_user_role(report: TableauSourceReport, server: Server) -> None:
+    title: str = "Insufficient Permissions"
+    message: str = "The user must have the `Site Administrator Explorer` role to perform metadata ingestion."
+    try:
+        # TableauSiteSource instance is per site, so each time we need to find-out user detail
+        # the site-role might be different on another site
+        logged_in_user: UserInfo = UserInfo.from_server(server=server)
+
+        if not logged_in_user.is_site_administrator_explorer():
+            report.warning(
+                title=title,
+                message=message,
+                context=f"user-name={logged_in_user.user_name}, role={logged_in_user.site_role}, site_id={logged_in_user.site_id}",
+            )
+
+        report.logged_in_user.append(logged_in_user)
+
+    except Exception as e:
+        report.warning(
+            title=title,
+            message="Failed to verify the user's role. The user must have `Site Administrator Explorer` role.",
+            context=f"{e}",
+            exc=e,
+        )
 
 
 @platform_name("Tableau")
@@ -676,6 +705,7 @@ def _authenticate(self, site_content_url: str) -> None:
         try:
             logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
             self.server = self.config.make_tableau_client(site_content_url)
+            report_user_role(report=self.report, server=self.server)
         # Note that we're not catching ConfigurationError, since we want that to throw.
         except ValueError as e:
             self.report.failure(
@@ -689,9 +719,17 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
         test_report = TestConnectionReport()
         try:
             source_config = TableauConfig.parse_obj_allow_extras(config_dict)
-            source_config.make_tableau_client(source_config.site)
+
+            server = source_config.make_tableau_client(source_config.site)
+
             test_report.basic_connectivity = CapabilityReport(capable=True)
+
+            test_report.capability_report = check_user_role(
+                logged_in_user=UserInfo.from_server(server=server)
+            )
+
         except Exception as e:
+            logger.warning(f"{e}", exc_info=e)
             test_report.basic_connectivity = CapabilityReport(
                 capable=False, failure_reason=str(e)
             )
@@ -831,6 +869,8 @@ def __init__(
         # when emitting custom SQL data sources.
         self.custom_sql_ids_being_used: List[str] = []
 
+        report_user_role(report=report, server=server)
+
     @property
     def no_env_browse_prefix(self) -> str:
         # Prefix to use with browse path (v1)
@@ -1290,7 +1330,6 @@ def get_connection_objects(
         page_size = page_size_override or self.config.page_size
 
         filter_pages = get_filter_pages(query_filter, page_size)
-
         for filter_page in filter_pages:
             has_next_page = 1
             current_cursor: Optional[str] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
index c5d14e0afe15a5..61b56c4bee5bda 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
@@ -975,15 +975,22 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
     # a few ten thousand, then tableau server responds with empty response
     # causing below error:
     # tableauserverclient.server.endpoint.exceptions.NonXMLResponseError: b''
+
+    # in practice, we only do pagination if len(query_filter.keys()) == 1
+    if len(query_filter.keys()) != 1:
+        return filter_pages
+
+    current_key = (list(query_filter.keys()))[0]
+
     if (
-        len(query_filter.keys()) == 1
-        and query_filter.get(c.ID_WITH_IN)
-        and isinstance(query_filter[c.ID_WITH_IN], list)
+        current_key in [c.ID_WITH_IN, c.PROJECT_NAME_WITH_IN]
+        and query_filter.get(current_key)
+        and isinstance(query_filter[current_key], list)
     ):
-        ids = query_filter[c.ID_WITH_IN]
+        ids = query_filter[current_key]
         filter_pages = [
             {
-                c.ID_WITH_IN: ids[
+                current_key: ids[
                     start : (
                         start + page_size if start + page_size < len(ids) else len(ids)
                     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
index d1dd0d92819991..ea0878143ef354 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
@@ -81,3 +81,5 @@
 PROJECT = "Project"
 SITE = "Site"
 IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
+SITE_PERMISSION = "sitePermission"
+SITE_ROLE = "SiteAdministratorExplorer"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py
new file mode 100644
index 00000000000000..f309622d12b91b
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+
+from tableauserverclient import Server, UserItem
+
+from datahub.ingestion.source.tableau import tableau_constant as c
+
+
+@dataclass
+class UserInfo:
+    user_name: str
+    site_role: str
+    site_id: str
+
+    def is_site_administrator_explorer(self):
+        return self.site_role == c.SITE_ROLE
+
+    @staticmethod
+    def from_server(server: Server) -> "UserInfo":
+        assert server.user_id, "make the connection with tableau"
+
+        user: UserItem = server.users.get_by_id(server.user_id)
+
+        assert user.site_role, "site_role is not available"  # to silent the lint
+
+        assert user.name, "user name is not available"  # to silent the lint
+
+        assert server.site_id, "site identifier is not available"  # to silent the lint
+
+        return UserInfo(
+            user_name=user.name,
+            site_role=user.site_role,
+            site_id=server.site_id,
+        )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py
new file mode 100644
index 00000000000000..4a703faf6091b3
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py
@@ -0,0 +1,48 @@
+import logging
+from typing import Dict, Union
+
+from datahub.ingestion.api.source import CapabilityReport, SourceCapability
+from datahub.ingestion.source.tableau import tableau_constant as c
+from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
+
+logger = logging.getLogger(__name__)
+
+
+def check_user_role(
+    logged_in_user: UserInfo,
+) -> Dict[Union[SourceCapability, str], CapabilityReport]:
+    capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = {
+        c.SITE_PERMISSION: CapabilityReport(
+            capable=True,
+        )
+    }
+
+    failure_reason: str = (
+        "The user does not have the `Site Administrator Explorer` role."
+    )
+
+    mitigation_message_prefix: str = (
+        "Assign `Site Administrator Explorer` role to the user"
+    )
+    mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup"
+
+    try:
+        # TODO: Add check for `Enable Derived Permissions`
+        if not logged_in_user.is_site_administrator_explorer():
+            capability_dict[c.SITE_PERMISSION] = CapabilityReport(
+                capable=False,
+                failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
+                mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}",
+            )
+
+        return capability_dict
+
+    except Exception as e:
+        logger.warning(msg=e, exc_info=e)
+        capability_dict[c.SITE_PERMISSION] = CapabilityReport(
+            capable=False,
+            failure_reason="Failed to verify user role.",
+            mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}",  # user is unknown
+        )
+
+        return capability_dict
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
index 79ea98d1c7f54e..f81eb291e89e1d 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
@@ -490,7 +490,7 @@ def __init__(
             self._exit_stack.push(self._query_usage_counts)
 
         # Tool Extractor
-        self._tool_meta_extractor = ToolMetaExtractor()
+        self._tool_meta_extractor = ToolMetaExtractor.create(graph)
         self.report.tool_meta_report = self._tool_meta_extractor.report
 
     def close(self) -> None:
diff --git a/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py b/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py
index 0d85002776e5e2..5af9d9d4f0fffc 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py
@@ -1,3 +1,4 @@
+import contextlib
 import json
 import logging
 from dataclasses import dataclass, field
@@ -5,8 +6,15 @@
 
 from typing_extensions import Protocol
 
+from datahub.api.entities.platformresource.platform_resource import (
+    ElasticPlatformResourceQuery,
+    PlatformResource,
+    PlatformResourceSearchFields,
+)
 from datahub.ingestion.api.report import Report
+from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn
+from datahub.utilities.search_utils import LogicalOperator
 from datahub.utilities.stats_collections import int_top_k_dict
 
 UrnStr = str
@@ -31,6 +39,7 @@ def _get_last_line(query: str) -> str:
 @dataclass
 class ToolMetaExtractorReport(Report):
     num_queries_meta_extracted: Dict[str, int] = field(default_factory=int_top_k_dict)
+    failures: List[str] = field(default_factory=list)
 
 
 class ToolMetaExtractor:
@@ -42,14 +51,81 @@ class ToolMetaExtractor:
     by warehouse query logs.
     """
 
-    def __init__(self) -> None:
-        self.report = ToolMetaExtractorReport()
+    def __init__(
+        self,
+        report: ToolMetaExtractorReport,
+        looker_user_mapping: Optional[Dict[str, str]] = None,
+    ) -> None:
+        self.report = report
         self.known_tool_extractors: List[Tuple[str, Callable[[QueryLog], bool]]] = [
             (
                 "mode",
                 self._extract_mode_query,
-            )
+            ),
+            (
+                "looker",
+                self._extract_looker_query,
+            ),
         ]
+        # maps user id (as string) to email address
+        self.looker_user_mapping = looker_user_mapping
+
+    @classmethod
+    def create(
+        cls,
+        graph: Optional[DataHubGraph] = None,
+    ) -> "ToolMetaExtractor":
+        report = ToolMetaExtractorReport()
+        looker_user_mapping = None
+        if graph:
+            try:
+                looker_user_mapping = cls.extract_looker_user_mapping_from_graph(
+                    graph, report
+                )
+            except Exception as e:
+                report.failures.append(
+                    f"Unexpected error during Looker user metadata extraction: {str(e)}"
+                )
+
+        return cls(report, looker_user_mapping)
+
+    @classmethod
+    def extract_looker_user_mapping_from_graph(
+        cls, graph: DataHubGraph, report: ToolMetaExtractorReport
+    ) -> Optional[Dict[str, str]]:
+        looker_user_mapping = None
+        query = (
+            ElasticPlatformResourceQuery.create_from()
+            .group(LogicalOperator.AND)
+            .add_field_match(PlatformResourceSearchFields.PLATFORM, "looker")
+            .add_field_match(
+                PlatformResourceSearchFields.RESOURCE_TYPE,
+                "USER_ID_MAPPING",
+            )
+            .end()
+        )
+        platform_resources = list(
+            PlatformResource.search_by_filters(query=query, graph_client=graph)
+        )
+
+        if len(platform_resources) > 1:
+            report.failures.append(
+                "Looker user metadata extraction failed. Found more than one looker user id mappings."
+            )
+        else:
+            platform_resource = platform_resources[0]
+
+            if (
+                platform_resource
+                and platform_resource.resource_info
+                and platform_resource.resource_info.value
+            ):
+                with contextlib.suppress(ValueError, AssertionError):
+                    value = platform_resource.resource_info.value.as_raw_json()
+                    if value:
+                        looker_user_mapping = value
+
+        return looker_user_mapping
 
     def _extract_mode_query(self, entry: QueryLog) -> bool:
         """
@@ -78,14 +154,49 @@ def _extract_mode_query(self, entry: QueryLog) -> bool:
 
         return True
 
+    def _extract_looker_query(self, entry: QueryLog) -> bool:
+        """
+        Returns:
+            bool: whether QueryLog entry is that of looker and looker user info
+            is extracted into entry.
+        """
+        if not self.looker_user_mapping:
+            return False
+
+        last_line = _get_last_line(entry.query_text)
+
+        if not (last_line.startswith("--") and "Looker Query Context" in last_line):
+            return False
+
+        start_quote_idx = last_line.index("'")
+        end_quote_idx = last_line.rindex("'")
+        if start_quote_idx == -1 or end_quote_idx == -1:
+            return False
+
+        looker_json_raw = last_line[start_quote_idx + 1 : end_quote_idx]
+        looker_json = json.loads(looker_json_raw)
+
+        user_id = str(looker_json["user_id"])
+        email = self.looker_user_mapping.get(user_id)
+        if not email:
+            return False
+
+        original_user = entry.user
+
+        entry.user = email_to_user_urn(email)
+        entry.extra_info = entry.extra_info or {}
+        entry.extra_info["user_via"] = original_user
+
+        return True
+
     def extract_bi_metadata(self, entry: QueryLog) -> bool:
         for tool, meta_extractor in self.known_tool_extractors:
             try:
                 if meta_extractor(entry):
                     self.report.num_queries_meta_extracted[tool] += 1
                     return True
-            except Exception:
-                logger.debug("Tool metadata extraction failed with error : {e}")
+            except Exception as e:
+                logger.debug(f"Tool metadata extraction failed with error : {e}")
         return False
 
 
diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
index bedc5bc8fcd5e5..9dbadd4804997d 100644
--- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
+++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
@@ -117,7 +117,7 @@ def diff_metadata_json(
     ignore_paths: Sequence[str] = (),
     ignore_order: bool = True,
 ) -> Union[DeepDiff, MCPDiff]:
-    ignore_paths = (*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info")
+    ignore_paths = [*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info"]
     try:
         if ignore_order:
             golden_map = get_aspects_by_urn(golden)
diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
index b0f5022446de15..b8c27666d7f538 100644
--- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
@@ -1,6 +1,7 @@
 import collections
 import gzip
 import logging
+import os
 import pathlib
 import pickle
 import shutil
@@ -33,6 +34,14 @@
 
 logger: logging.Logger = logging.getLogger(__name__)
 
+OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR = (
+    os.environ.get("OVERRIDE_SQLITE_VERSION_REQ") or ""
+)
+OVERRIDE_SQLITE_VERSION_REQUIREMENT = (
+    OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR
+    and OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR.lower() != "false"
+)
+
 _DEFAULT_FILE_NAME = "sqlite.db"
 _DEFAULT_TABLE_NAME = "data"
 
@@ -212,6 +221,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
     _active_object_cache: OrderedDict[str, Tuple[_VT, bool]] = field(
         init=False, repr=False
     )
+    _use_sqlite_on_conflict: bool = field(repr=False, default=True)
 
     def __post_init__(self) -> None:
         assert (
@@ -232,7 +242,10 @@ def __post_init__(self) -> None:
             # We use the ON CONFLICT clause to implement UPSERTs with sqlite.
             # This was added in 3.24.0 from 2018-06-04.
             # See https://www.sqlite.org/lang_conflict.html
-            raise RuntimeError("SQLite version 3.24.0 or later is required")
+            if OVERRIDE_SQLITE_VERSION_REQUIREMENT:
+                self.use_sqlite_on_conflict = False
+            else:
+                raise RuntimeError("SQLite version 3.24.0 or later is required")
 
         # We keep a small cache in memory to avoid having to serialize/deserialize
         # data from the database too often. We use an OrderedDict to build
@@ -295,7 +308,7 @@ def _prune_cache(self, num_items_to_prune: int) -> None:
                     values.append(column_serializer(value))
                 items_to_write.append(tuple(values))
 
-        if items_to_write:
+        if items_to_write and self._use_sqlite_on_conflict:
             # Tricky: By using a INSERT INTO ... ON CONFLICT (key) structure, we can
             # ensure that the rowid remains the same if a value is updated but is
             # autoincremented when rows are inserted.
@@ -312,6 +325,26 @@ def _prune_cache(self, num_items_to_prune: int) -> None:
                 """,
                 items_to_write,
             )
+        else:
+            for item in items_to_write:
+                try:
+                    self._conn.execute(
+                        f"""INSERT INTO {self.tablename} (
+                            key,
+                            value
+                            {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+                        )
+                        VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""",
+                        item,
+                    )
+                except sqlite3.IntegrityError:
+                    self._conn.execute(
+                        f"""UPDATE {self.tablename} SET
+                            value = ?
+                            {''.join(f', {column_name} = ?' for column_name in self.extra_columns.keys())}
+                        WHERE key = ?""",
+                        (*item[1:], item[0]),
+                    )
 
     def flush(self) -> None:
         self._prune_cache(len(self._active_object_cache))
diff --git a/metadata-ingestion/tests/integration/git/test_git_clone.py b/metadata-ingestion/tests/integration/git/test_git_clone.py
index 60cf20fefcbdd1..01e075930998a4 100644
--- a/metadata-ingestion/tests/integration/git/test_git_clone.py
+++ b/metadata-ingestion/tests/integration/git/test_git_clone.py
@@ -1,4 +1,5 @@
 import os
+import pathlib
 
 import pytest
 from pydantic import SecretStr
@@ -12,7 +13,7 @@
 LOOKML_TEST_SSH_KEY = os.environ.get("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
 
 
-def test_base_url_guessing():
+def test_base_url_guessing() -> None:
     # Basic GitHub repo.
     config = GitInfo(repo="https://github.com/datahub-project/datahub", branch="master")
     assert config.repo_ssh_locator == "git@github.com:datahub-project/datahub.git"
@@ -70,7 +71,7 @@ def test_base_url_guessing():
         )
 
 
-def test_github_branch():
+def test_github_branch() -> None:
     config = GitInfo(
         repo="owner/repo",
     )
@@ -83,11 +84,37 @@ def test_github_branch():
     assert config.branch_for_clone == "main"
 
 
+def test_url_subdir() -> None:
+    git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="dbt")
+    assert (
+        git_ref.get_url_for_file_path("model.sql")
+        == "https://github.com/org/repo/blob/main/dbt/model.sql"
+    )
+
+    git_ref = GitReference(repo="https://gitlab.com/org/repo", url_subdir="dbt")
+    assert (
+        git_ref.get_url_for_file_path("model.sql")
+        == "https://gitlab.com/org/repo/-/blob/main/dbt/model.sql"
+    )
+
+    git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="")
+    assert (
+        git_ref.get_url_for_file_path("model.sql")
+        == "https://github.com/org/repo/blob/main/model.sql"
+    )
+
+    git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="dbt/models")
+    assert (
+        git_ref.get_url_for_file_path("model.sql")
+        == "https://github.com/org/repo/blob/main/dbt/models/model.sql"
+    )
+
+
 def test_sanitize_repo_url() -> None:
     assert_doctest(datahub.ingestion.source.git.git_import)
 
 
-def test_git_clone_public(tmp_path):
+def test_git_clone_public(tmp_path: pathlib.Path) -> None:
     git_clone = GitClone(str(tmp_path))
     checkout_dir = git_clone.clone(
         ssh_key=None,
@@ -107,7 +134,7 @@ def test_git_clone_public(tmp_path):
     LOOKML_TEST_SSH_KEY is None,
     reason="DATAHUB_LOOKML_GIT_TEST_SSH_KEY env variable is not configured",
 )
-def test_git_clone_private(tmp_path):
+def test_git_clone_private(tmp_path: pathlib.Path) -> None:
     git_clone = GitClone(str(tmp_path))
     secret_key = SecretStr(LOOKML_TEST_SSH_KEY) if LOOKML_TEST_SSH_KEY else None
 
diff --git a/metadata-ingestion/tests/integration/kafka/test_kafka.py b/metadata-ingestion/tests/integration/kafka/test_kafka.py
index 0d9a714625e96b..648c4b26b20a76 100644
--- a/metadata-ingestion/tests/integration/kafka/test_kafka.py
+++ b/metadata-ingestion/tests/integration/kafka/test_kafka.py
@@ -102,7 +102,7 @@ def test_kafka_test_connection(mock_kafka_service, config_dict, is_success):
         test_connection_helpers.assert_capability_report(
             capability_report=report.capability_report,
             failure_capabilities={
-                SourceCapability.SCHEMA_METADATA: "Failed to establish a new connection"
+                SourceCapability.SCHEMA_METADATA: "[Errno 111] Connection refused"
             },
         )
 
diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
index a9c445b5986efe..6ae772c134cb32 100644
--- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
@@ -842,6 +842,62 @@
         "pipelineName": "stateful-looker-pipeline"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
index af9c62a2a41803..d7620980a9cedb 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
@@ -497,6 +497,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
index b89bc356b48fdc..13963af55bfe56 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
@@ -735,6 +735,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
index 810fefd8f6cb85..f11d060102851c 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
@@ -735,6 +735,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json
index 3d78397f54a235..f6e39dd5286cd0 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json
@@ -828,6 +828,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
index 5a540e61e768d7..203bed843155c8 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
@@ -464,6 +464,21 @@
                             "/Folders/Shared"
                         ]
                     }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:test-1@looker.com",
+                                "type": "DATAOWNER"
+                            }
+                        ],
+                        "ownerTypes": {},
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        }
+                    }
                 }
             ]
         }
@@ -708,6 +723,21 @@
                             "/Folders/Personal"
                         ]
                     }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:test-2@looker.com",
+                                "type": "DATAOWNER"
+                            }
+                        ],
+                        "ownerTypes": {},
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        }
+                    }
                 }
             ]
         }
@@ -1108,12 +1138,12 @@
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.BrowsePaths": {
                         "paths": [
-                            "/Explore/sales_model"
+                            "/Explore/data"
                         ]
                     }
                 },
@@ -1126,12 +1156,12 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "project": "lkml_samples",
-                            "model": "sales_model",
+                            "model": "data",
                             "looker.explore.label": "My Explore View",
-                            "looker.explore.name": "sales_explore",
+                            "looker.explore.name": "my_view",
                             "looker.explore.file": "test_source_file.lkml"
                         },
-                        "externalUrl": "https://looker.company.com/explore/sales_model/sales_explore",
+                        "externalUrl": "https://looker.company.com/explore/data/my_view",
                         "name": "My Explore View",
                         "description": "lorem ipsum",
                         "tags": []
@@ -1153,7 +1183,7 @@
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "sales_explore",
+                        "schemaName": "my_view",
                         "platform": "urn:li:dataPlatform:looker",
                         "version": 0,
                         "created": {
@@ -1208,7 +1238,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -1227,12 +1257,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "embed",
     "aspect": {
         "json": {
-            "renderUrl": "https://looker.company.com/embed/explore/sales_model/sales_explore"
+            "renderUrl": "https://looker.company.com/embed/explore/data/my_view"
         }
     },
     "systemMetadata": {
@@ -1244,12 +1274,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
         "json": {
-            "container": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5"
+            "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
         }
     },
     "systemMetadata": {
@@ -1261,7 +1291,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -1271,8 +1301,8 @@
                     "id": "Explore"
                 },
                 {
-                    "id": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5",
-                    "urn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5"
+                    "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
+                    "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
                 }
             ]
         }
@@ -1287,12 +1317,12 @@
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.BrowsePaths": {
                         "paths": [
-                            "/Explore/data"
+                            "/Explore/order_model"
                         ]
                     }
                 },
@@ -1305,12 +1335,12 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "project": "lkml_samples",
-                            "model": "data",
+                            "model": "order_model",
                             "looker.explore.label": "My Explore View",
-                            "looker.explore.name": "my_view",
+                            "looker.explore.name": "order_explore",
                             "looker.explore.file": "test_source_file.lkml"
                         },
-                        "externalUrl": "https://looker.company.com/explore/data/my_view",
+                        "externalUrl": "https://looker.company.com/explore/order_model/order_explore",
                         "name": "My Explore View",
                         "description": "lorem ipsum",
                         "tags": []
@@ -1332,7 +1362,7 @@
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "my_view",
+                        "schemaName": "order_explore",
                         "platform": "urn:li:dataPlatform:looker",
                         "version": 0,
                         "created": {
@@ -1387,7 +1417,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -1406,12 +1436,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "embed",
     "aspect": {
         "json": {
-            "renderUrl": "https://looker.company.com/embed/explore/data/my_view"
+            "renderUrl": "https://looker.company.com/embed/explore/order_model/order_explore"
         }
     },
     "systemMetadata": {
@@ -1423,12 +1453,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
         "json": {
-            "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
+            "container": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60"
         }
     },
     "systemMetadata": {
@@ -1440,7 +1470,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -1450,8 +1480,8 @@
                     "id": "Explore"
                 },
                 {
-                    "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42",
-                    "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42"
+                    "id": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60",
+                    "urn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60"
                 }
             ]
         }
@@ -1466,12 +1496,12 @@
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.BrowsePaths": {
                         "paths": [
-                            "/Explore/order_model"
+                            "/Explore/sales_model"
                         ]
                     }
                 },
@@ -1484,12 +1514,12 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "project": "lkml_samples",
-                            "model": "order_model",
+                            "model": "sales_model",
                             "looker.explore.label": "My Explore View",
-                            "looker.explore.name": "order_explore",
+                            "looker.explore.name": "sales_explore",
                             "looker.explore.file": "test_source_file.lkml"
                         },
-                        "externalUrl": "https://looker.company.com/explore/order_model/order_explore",
+                        "externalUrl": "https://looker.company.com/explore/sales_model/sales_explore",
                         "name": "My Explore View",
                         "description": "lorem ipsum",
                         "tags": []
@@ -1511,7 +1541,7 @@
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "order_explore",
+                        "schemaName": "sales_explore",
                         "platform": "urn:li:dataPlatform:looker",
                         "version": 0,
                         "created": {
@@ -1566,7 +1596,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -1585,12 +1615,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "embed",
     "aspect": {
         "json": {
-            "renderUrl": "https://looker.company.com/embed/explore/order_model/order_explore"
+            "renderUrl": "https://looker.company.com/embed/explore/sales_model/sales_explore"
         }
     },
     "systemMetadata": {
@@ -1602,12 +1632,12 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
         "json": {
-            "container": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60"
+            "container": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5"
         }
     },
     "systemMetadata": {
@@ -1619,7 +1649,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -1629,8 +1659,8 @@
                     "id": "Explore"
                 },
                 {
-                    "id": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60",
-                    "urn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60"
+                    "id": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5",
+                    "urn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5"
                 }
             ]
         }
@@ -1705,6 +1735,62 @@
         "pipelineName": "execution-1"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
index 9ac95b8482a475..87af50f95ed6bb 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
@@ -793,6 +793,60 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker",
+            "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
index 3a2c6359ea63c2..b990ce7c67dab6 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
@@ -759,6 +759,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
index 007eee348aeaf8..391192b3d16f36 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
@@ -513,6 +513,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json b/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json
index 859b9163d7aad6..4909a6af73a225 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json
@@ -464,6 +464,21 @@
                             "/Folders/Shared"
                         ]
                     }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:test-1@looker.com",
+                                "type": "DATAOWNER"
+                            }
+                        ],
+                        "ownerTypes": {},
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        }
+                    }
                 }
             ]
         }
@@ -1185,6 +1200,62 @@
         "pipelineName": "execution-1"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "execution-1"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
index 8256c984afb274..ddeb5428b1d726 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
@@ -762,6 +762,62 @@
         "pipelineName": "stateful-looker-pipeline"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided",
+        "pipelineName": "stateful-looker-pipeline"
+    }
+},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
@@ -814,8 +870,8 @@
     }
 },
 {
-    "entityType": "dashboard",
-    "entityUrn": "urn:li:dashboard:(looker,dashboards.11)",
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -831,8 +887,8 @@
     }
 },
 {
-    "entityType": "chart",
-    "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)",
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(looker,dashboards.11)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -865,8 +921,8 @@
     }
 },
 {
-    "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)",
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
index 0b3530f9c24629..594983c8fb0f2a 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
@@ -678,6 +678,59 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "platformResourceInfo",
+    "aspect": {
+        "json": {
+            "resourceType": "USER_ID_MAPPING",
+            "primaryKey": "",
+            "value": {
+                "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}",
+                "contentType": "JSON"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "platformResource",
+    "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py
index 8bbf14709ff9fb..a39de8384efb23 100644
--- a/metadata-ingestion/tests/integration/looker/test_looker.py
+++ b/metadata-ingestion/tests/integration/looker/test_looker.py
@@ -83,6 +83,7 @@ def test_looker_ingest(pytestconfig, tmp_path, mock_time):
     with mock.patch("looker_sdk.init40") as mock_sdk:
         mock_sdk.return_value = mocked_client
         setup_mock_dashboard(mocked_client)
+        mocked_client.run_inline_query.side_effect = side_effect_query_inline
         setup_mock_explore(mocked_client)
 
         test_resources_dir = pytestconfig.rootpath / "tests/integration/looker"
@@ -319,6 +320,7 @@ def setup_mock_look(mocked_client):
     mocked_client.all_looks.return_value = [
         Look(
             id="1",
+            user_id="1",
             title="Outer Look",
             description="I am not part of any Dashboard",
             query_id="1",
@@ -327,6 +329,7 @@ def setup_mock_look(mocked_client):
         Look(
             id="2",
             title="Personal Look",
+            user_id="2",
             description="I am not part of any Dashboard and in personal folder",
             query_id="2",
             folder=FolderBase(
@@ -561,6 +564,20 @@ def get_user(
     mocked_client.user.side_effect = get_user
 
 
+def setup_mock_all_user(mocked_client):
+    def all_users(
+        fields: Optional[str] = None,
+        transport_options: Optional[transport.TransportOptions] = None,
+    ) -> List[User]:
+        return [
+            User(id="1", email="test-1@looker.com"),
+            User(id="2", email="test-2@looker.com"),
+            User(id="3", email="test-3@looker.com"),
+        ]
+
+    mocked_client.all_users.side_effect = all_users
+
+
 def side_effect_query_inline(
     result_format: str, body: WriteQuery, transport_options: Optional[TransportOptions]
 ) -> str:
@@ -714,6 +731,7 @@ def test_looker_ingest_usage_history(pytestconfig, tmp_path, mock_time):
         mocked_client.run_inline_query.side_effect = side_effect_query_inline
         setup_mock_explore(mocked_client)
         setup_mock_user(mocked_client)
+        setup_mock_all_user(mocked_client)
 
         test_resources_dir = pytestconfig.rootpath / "tests/integration/looker"
 
@@ -946,6 +964,8 @@ def ingest_independent_looks(
         mock_sdk.return_value = mocked_client
         setup_mock_dashboard(mocked_client)
         setup_mock_explore(mocked_client)
+        setup_mock_user(mocked_client)
+        setup_mock_all_user(mocked_client)
         setup_mock_look(mocked_client)
 
         test_resources_dir = pytestconfig.rootpath / "tests/integration/looker"
diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py
index 9e4bb2f0eb634f..862d27186703a8 100644
--- a/metadata-ingestion/tests/integration/snowflake/common.py
+++ b/metadata-ingestion/tests/integration/snowflake/common.py
@@ -14,6 +14,11 @@
 NUM_OPS = 10
 NUM_USAGE = 0
 
+
+def is_secure(view_idx):
+    return view_idx == 1
+
+
 FROZEN_TIME = "2022-06-07 17:00:00"
 large_sql_query = """WITH object_access_history AS
         (
@@ -247,9 +252,25 @@ def default_query_results(  # noqa: C901
                 "name": f"VIEW_{view_idx}",
                 "created_on": datetime(2021, 6, 8, 0, 0, 0, 0),
                 "comment": "Comment for View",
-                "text": f"create view view_{view_idx} as select * from table_{view_idx}",
+                "is_secure": "true" if is_secure(view_idx) else "false",
+                "text": (
+                    f"create view view_{view_idx} as select * from table_{view_idx}"
+                    if not is_secure(view_idx)
+                    else None
+                ),
+            }
+            for view_idx in range(1, num_views + 1)
+        ]
+    elif query == SnowflakeQuery.get_secure_view_definitions():
+        return [
+            {
+                "TABLE_CATALOG": "TEST_DB",
+                "TABLE_SCHEMA": "TEST_SCHEMA",
+                "TABLE_NAME": f"VIEW_{view_idx}",
+                "VIEW_DEFINITION": f"create view view_{view_idx} as select * from table_{view_idx}",
             }
             for view_idx in range(1, num_views + 1)
+            if is_secure(view_idx)
         ]
     elif query == SnowflakeQuery.columns_for_schema("TEST_SCHEMA", "TEST_DB"):
         return [
diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
index 4415b1ad3e5159..48ec46af069cef 100644
--- a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
+++ b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
@@ -490,7 +490,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/",
             "name": "TABLE_1",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1",
@@ -789,7 +791,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/",
             "name": "TABLE_2",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2",
@@ -1088,7 +1092,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/",
             "name": "TABLE_3",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3",
@@ -1387,7 +1393,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/",
             "name": "TABLE_4",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4",
@@ -1686,7 +1694,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/",
             "name": "TABLE_5",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_5",
@@ -1985,7 +1995,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/",
             "name": "TABLE_6",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6",
@@ -2284,7 +2296,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/",
             "name": "TABLE_7",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7",
@@ -2583,7 +2597,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/",
             "name": "TABLE_8",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8",
@@ -2882,7 +2898,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/",
             "name": "TABLE_9",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9",
@@ -3181,7 +3199,9 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"},
+            "customProperties": {
+                "CLUSTERING_KEY": "LINEAR(COL_1)"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/",
             "name": "TABLE_10",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10",
@@ -3471,23 +3491,25 @@
     "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "customProperties": {},
+            "customProperties": {
+                "IS_SECURE": "true"
+            },
             "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/",
             "name": "VIEW_1",
             "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_1",
             "description": "Comment for View",
             "created": {
-                "time": 1623103200000
+                "time": 1623090600000
             },
             "lastModified": {
-                "time": 1623103200000
+                "time": 1623090600000
             },
             "tags": []
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "snowflake-2023_12_18-10_16_09",
+        "runId": "snowflake-2024_12_16-15_30_20-649nax",
         "lastRunId": "no-run-id-provided"
     }
 },
diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
index 3040c6c4e9196f..f22cbd122361dc 100644
--- a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
+++ b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
@@ -621,12 +621,17 @@
                 "op": "add",
                 "path": "/qualifiedName",
                 "value": "TEST_DB.TEST_SCHEMA.VIEW_1"
+            },
+            {
+                "op": "add",
+                "path": "/customProperties/IS_SECURE",
+                "value": "true"
             }
         ]
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00-ad3hnf",
+        "runId": "snowflake-2022_06_07-17_00_00-ivthci",
         "lastRunId": "no-run-id-provided"
     }
 },
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 5b557efdab0bb0..4b2ac96931b950 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -7,6 +7,7 @@
 
 import pytest
 from freezegun import freeze_time
+from pydantic import ValidationError
 from requests.adapters import ConnectionError
 from tableauserverclient import PermissionsRule, Server
 from tableauserverclient.models import (
@@ -21,7 +22,9 @@
 
 from datahub.emitter.mce_builder import DEFAULT_ENV, make_schema_field_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.run.pipeline import Pipeline, PipelineContext, PipelineInitError
+from datahub.ingestion.api.source import TestConnectionReport
+from datahub.ingestion.run.pipeline import Pipeline, PipelineContext
+from datahub.ingestion.source.tableau import tableau_constant as c
 from datahub.ingestion.source.tableau.tableau import (
     TableauConfig,
     TableauSiteSource,
@@ -61,6 +64,7 @@
     "projects": ["default", "Project 2", "Samples"],
     "extract_project_hierarchy": False,
     "page_size": 1000,
+    "workbook_page_size": 1000,
     "ingest_tags": True,
     "ingest_owner": True,
     "ingest_tables_external": True,
@@ -571,52 +575,28 @@ def test_extract_all_project(pytestconfig, tmp_path, mock_datahub_graph):
 def test_value_error_projects_and_project_pattern(
     pytestconfig, tmp_path, mock_datahub_graph
 ):
-    # Ingestion should raise ValueError
-    output_file_name: str = "tableau_project_pattern_precedence_mces.json"
-    golden_file_name: str = "tableau_project_pattern_precedence_mces_golden.json"
-
     new_config = config_source_default.copy()
     new_config["projects"] = ["default"]
     new_config["project_pattern"] = {"allow": ["^Samples$"]}
 
     with pytest.raises(
-        PipelineInitError,
+        ValidationError,
         match=r".*projects is deprecated. Please use project_path_pattern only.*",
     ):
-        tableau_ingest_common(
-            pytestconfig,
-            tmp_path,
-            mock_data(),
-            golden_file_name,
-            output_file_name,
-            mock_datahub_graph,
-            pipeline_config=new_config,
-        )
+        TableauConfig.parse_obj(new_config)
 
 
 def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph):
-    # Ingestion should raise ValueError
-    output_file_name: str = "tableau_project_pattern_deprecation_mces.json"
-    golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json"
-
     new_config = config_source_default.copy()
     del new_config["projects"]
     new_config["project_pattern"] = {"allow": ["^Samples$"]}
     new_config["project_path_pattern"] = {"allow": ["^Samples$"]}
 
     with pytest.raises(
-        PipelineInitError,
+        ValidationError,
         match=r".*project_pattern is deprecated. Please use project_path_pattern only*",
     ):
-        tableau_ingest_common(
-            pytestconfig,
-            tmp_path,
-            mock_data(),
-            golden_file_name,
-            output_file_name,
-            mock_datahub_graph,
-            pipeline_config=new_config,
-        )
+        TableauConfig.parse_obj(new_config)
 
 
 def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph):
@@ -674,6 +654,7 @@ def test_tableau_ingest_with_platform_instance(
         "platform_instance": "acryl_site1",
         "projects": ["default", "Project 2"],
         "page_size": 1000,
+        "workbook_page_size": 1000,
         "ingest_tags": True,
         "ingest_owner": True,
         "ingest_tables_external": True,
@@ -1296,31 +1277,21 @@ def test_hidden_asset_tags(pytestconfig, tmp_path, mock_datahub_graph):
 @pytest.mark.integration
 def test_hidden_assets_without_ingest_tags(pytestconfig, tmp_path, mock_datahub_graph):
     enable_logging()
-    output_file_name: str = "tableau_hidden_asset_tags_error_mces.json"
-    golden_file_name: str = "tableau_hidden_asset_tags_error_mces_golden.json"
 
     new_config = config_source_default.copy()
     new_config["tags_for_hidden_assets"] = ["hidden", "private"]
     new_config["ingest_tags"] = False
 
     with pytest.raises(
-        PipelineInitError,
+        ValidationError,
         match=r".*tags_for_hidden_assets is only allowed with ingest_tags enabled.*",
     ):
-        tableau_ingest_common(
-            pytestconfig,
-            tmp_path,
-            mock_data(),
-            golden_file_name,
-            output_file_name,
-            mock_datahub_graph,
-            pipeline_config=new_config,
-        )
+        TableauConfig.parse_obj(new_config)
 
 
 @freeze_time(FROZEN_TIME)
 @pytest.mark.integration
-def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):
+def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph):
     with mock.patch(
         "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
         mock_datahub_graph,
@@ -1357,11 +1328,99 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra
 
             warnings = list(reporter.warnings)
 
-            assert len(warnings) == 1
+            assert len(warnings) == 2
+
+            assert warnings[0].title == "Insufficient Permissions"
 
-            assert warnings[0].title == "Derived Permission Error"
+            assert warnings[1].title == "Derived Permission Error"
 
-            assert warnings[0].message == (
+            assert warnings[1].message == (
                 "Turn on your derived permissions. See for details "
                 "https://community.tableau.com/s/question/0D54T00000QnjHbSAJ/how-to-fix-the-permissionsmodeswitched-error"
             )
+
+
+@freeze_time(FROZEN_TIME)
+@pytest.mark.integration
+def test_connection_report_test(requests_mock):
+    server_info_response = """
+        <tsResponse xmlns:t="http://tableau.com/api">
+            <t:serverInfo>
+                <t:productVersion build="build-number">foo</t:productVersion>
+                <t:restApiVersion>2.4</t:restApiVersion>
+            </t:serverInfo>
+        </tsResponse>
+
+    """
+
+    requests_mock.register_uri(
+        "GET",
+        "https://do-not-connect/api/2.4/serverInfo",
+        text=server_info_response,
+        status_code=200,
+        headers={"Content-Type": "application/xml"},
+    )
+
+    signin_response = """
+        <tsResponse xmlns:t="http://tableau.com/api">
+            <t:credentials token="fake_token">
+                <t:site id="fake_site_luid" contentUrl="fake_site_content_url"/>
+                <t:user id="fake_user_id"/>
+            </t:credentials>
+        </tsResponse>
+    """
+
+    requests_mock.register_uri(
+        "POST",
+        "https://do-not-connect/api/2.4/auth/signin",
+        text=signin_response,
+        status_code=200,
+        headers={"Content-Type": "application/xml"},
+    )
+
+    user_by_id_response = """
+        <tsResponse xmlns:t="http://tableau.com/api">
+          <t:user id="user-id" name="foo@abc.com" siteRole="SiteAdministratorExplorer" />
+        </tsResponse>
+    """
+
+    requests_mock.register_uri(
+        "GET",
+        "https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
+        text=user_by_id_response,
+        status_code=200,
+        headers={"Content-Type": "application/xml"},
+    )
+
+    report: TestConnectionReport = TableauSource.test_connection(config_source_default)
+
+    assert report
+    assert report.capability_report
+    assert report.capability_report.get(c.SITE_PERMISSION)
+    assert report.capability_report[c.SITE_PERMISSION].capable
+
+    # Role other than SiteAdministratorExplorer
+    user_by_id_response = """
+        <tsResponse xmlns:t="http://tableau.com/api">
+          <t:user id="user-id" name="foo@abc.com" siteRole="Explorer" />
+        </tsResponse>
+    """
+
+    requests_mock.register_uri(
+        "GET",
+        "https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
+        text=user_by_id_response,
+        status_code=200,
+        headers={"Content-Type": "application/xml"},
+    )
+
+    report = TableauSource.test_connection(config_source_default)
+
+    assert report
+    assert report.capability_report
+    assert report.capability_report.get(c.SITE_PERMISSION)
+    assert report.capability_report[c.SITE_PERMISSION].capable is False
+    assert (
+        report.capability_report[c.SITE_PERMISSION].failure_reason
+        == "The user does not have the `Site Administrator Explorer` role. Their current role is Explorer."
+    )
diff --git a/metadata-ingestion/tests/unit/api/entities/structuredproperties/__init__.py b/metadata-ingestion/tests/unit/api/entities/structuredproperties/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/metadata-ingestion/tests/unit/api/entities/structuredproperties/example_structured_properties_golden.json b/metadata-ingestion/tests/unit/api/entities/structuredproperties/example_structured_properties_golden.json
new file mode 100644
index 00000000000000..29386ece7b0ca1
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/entities/structuredproperties/example_structured_properties_golden.json
@@ -0,0 +1,194 @@
+[
+{
+    "entityType": "structuredProperty",
+    "entityUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
+    "changeType": "UPSERT",
+    "aspectName": "propertyDefinition",
+    "aspect": {
+        "json": {
+            "qualifiedName": "io.acryl.privacy.retentionTime",
+            "displayName": "Retention Time",
+            "valueType": "urn:li:dataType:datahub.number",
+            "allowedValues": [
+                {
+                    "value": {
+                        "string": "30"
+                    },
+                    "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
+                },
+                {
+                    "value": {
+                        "string": "90"
+                    },
+                    "description": "Use this for datasets that drive monthly reporting but contain pii"
+                },
+                {
+                    "value": {
+                        "string": "365"
+                    },
+                    "description": "Use this for non-sensitive data that can be retained for longer"
+                }
+            ],
+            "cardinality": "MULTIPLE",
+            "entityTypes": [
+                "urn:li:entityType:datahub.dataset",
+                "urn:li:entityType:datahub.dataFlow"
+            ],
+            "description": "Retention Time is used to figure out how long to retain records in a dataset",
+            "immutable": false
+        }
+    }
+},
+{
+    "entityType": "structuredProperty",
+    "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.replicationSLA",
+    "changeType": "UPSERT",
+    "aspectName": "propertyDefinition",
+    "aspect": {
+        "json": {
+            "qualifiedName": "io.acryl.dataManagement.replicationSLA",
+            "displayName": "Replication SLA",
+            "valueType": "urn:li:dataType:datahub.number",
+            "cardinality": "SINGLE",
+            "entityTypes": [
+                "urn:li:entityType:datahub.dataset"
+            ],
+            "description": "SLA for how long data can be delayed before replicating to the destination cluster",
+            "immutable": false
+        }
+    }
+},
+{
+    "entityType": "structuredProperty",
+    "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.deprecationDate",
+    "changeType": "UPSERT",
+    "aspectName": "propertyDefinition",
+    "aspect": {
+        "json": {
+            "qualifiedName": "io.acryl.dataManagement.deprecationDate",
+            "displayName": "Deprecation Date",
+            "valueType": "urn:li:dataType:datahub.date",
+            "cardinality": "SINGLE",
+            "entityTypes": [
+                "urn:li:entityType:datahub.dataset",
+                "urn:li:entityType:datahub.dataFlow",
+                "urn:li:entityType:datahub.dataJob"
+            ],
+            "immutable": false
+        }
+    }
+},
+{
+    "entityType": "structuredProperty",
+    "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.steward",
+    "changeType": "UPSERT",
+    "aspectName": "propertyDefinition",
+    "aspect": {
+        "json": {
+            "qualifiedName": "io.acryl.dataManagement.steward",
+            "displayName": "Steward",
+            "valueType": "urn:li:dataType:datahub.urn",
+            "typeQualifier": {
+                "allowedTypes": [
+                    "urn:li:entityType:datahub.corpuser",
+                    "urn:li:entityType:datahub.corpGroup"
+                ]
+            },
+            "cardinality": "SINGLE",
+            "entityTypes": [
+                "urn:li:entityType:datahub.dataset",
+                "urn:li:entityType:datahub.dataFlow",
+                "urn:li:entityType:datahub.dataJob"
+            ],
+            "immutable": false
+        }
+    }
+},
+{
+    "entityType": "structuredProperty",
+    "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.certifier",
+    "changeType": "UPSERT",
+    "aspectName": "propertyDefinition",
+    "aspect": {
+        "json": {
+            "qualifiedName": "io.acryl.dataManagement.certifier",
+            "displayName": "Person Certifying the asset",
+            "valueType": "urn:li:dataType:datahub.urn",
+            "cardinality": "SINGLE",
+            "entityTypes": [
+                "urn:li:entityType:datahub.dataset",
+                "urn:li:entityType:datahub.schemaField"
+            ],
+            "immutable": false
+        }
+    }
+},
+{
+    "entityType": "structuredProperty",
+    "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.team",
+    "changeType": "UPSERT",
+    "aspectName": "propertyDefinition",
+    "aspect": {
+        "json": {
+            "qualifiedName": "io.acryl.dataManagement.team",
+            "displayName": "Management team",
+            "valueType": "urn:li:dataType:datahub.string",
+            "cardinality": "SINGLE",
+            "entityTypes": [
+                "urn:li:entityType:datahub.dataset"
+            ],
+            "immutable": false
+        }
+    }
+},
+{
+    "entityType": "structuredProperty",
+    "entityUrn": "urn:li:structuredProperty:projectNames",
+    "changeType": "UPSERT",
+    "aspectName": "propertyDefinition",
+    "aspect": {
+        "json": {
+            "qualifiedName": "projectNames",
+            "displayName": "Project names",
+            "valueType": "urn:li:dataType:datahub.string",
+            "allowedValues": [
+                {
+                    "value": {
+                        "string": "Tracking"
+                    },
+                    "description": "test value 1 for project"
+                },
+                {
+                    "value": {
+                        "string": "DataHub"
+                    },
+                    "description": "test value 2 for project"
+                }
+            ],
+            "cardinality": "MULTIPLE",
+            "entityTypes": [
+                "urn:li:entityType:datahub.dataset"
+            ],
+            "immutable": false
+        }
+    }
+},
+{
+    "entityType": "structuredProperty",
+    "entityUrn": "urn:li:structuredProperty:namespace",
+    "changeType": "UPSERT",
+    "aspectName": "propertyDefinition",
+    "aspect": {
+        "json": {
+            "qualifiedName": "namespace",
+            "displayName": "Namespace",
+            "valueType": "urn:li:dataType:datahub.string",
+            "cardinality": "SINGLE",
+            "entityTypes": [
+                "urn:li:entityType:datahub.dataset"
+            ],
+            "immutable": false
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py b/metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py
new file mode 100644
index 00000000000000..e96b7c1f98437e
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py
@@ -0,0 +1,38 @@
+import pathlib
+
+import pydantic
+import pytest
+
+from datahub.api.entities.structuredproperties.structuredproperties import (
+    StructuredProperties,
+    TypeQualifierAllowedTypes,
+)
+from tests.test_helpers.mce_helpers import check_goldens_stream
+
+RESOURCE_DIR = pathlib.Path(__file__).parent
+
+
+def test_type_validation() -> None:
+    with pytest.raises(pydantic.ValidationError):
+        TypeQualifierAllowedTypes(allowed_types=["thisdoesnotexist"])
+
+    types = TypeQualifierAllowedTypes(allowed_types=["dataset"])
+    assert types.allowed_types == ["urn:li:entityType:datahub.dataset"]
+
+
+def test_structuredproperties_load(pytestconfig: pytest.Config) -> None:
+    example_properties_file = (
+        pytestconfig.rootpath
+        / "examples/structured_properties/structured_properties.yaml"
+    )
+
+    properties = StructuredProperties.from_yaml(str(example_properties_file))
+    mcps = []
+    for property in properties:
+        mcps.extend(property.generate_mcps())
+
+    check_goldens_stream(
+        pytestconfig,
+        mcps,
+        golden_path=RESOURCE_DIR / "example_structured_properties_golden.json",
+    )
diff --git a/metadata-ingestion/tests/unit/serde/test_codegen.py b/metadata-ingestion/tests/unit/serde/test_codegen.py
index 37ac35586950e1..98d62d5643ff2d 100644
--- a/metadata-ingestion/tests/unit/serde/test_codegen.py
+++ b/metadata-ingestion/tests/unit/serde/test_codegen.py
@@ -18,6 +18,7 @@
     UpstreamClass,
     _Aspect,
 )
+from datahub.utilities.urns._urn_base import URN_TYPES
 
 _UPDATE_ENTITY_REGISTRY = os.getenv("UPDATE_ENTITY_REGISTRY", "false").lower() == "true"
 ENTITY_REGISTRY_PATH = pathlib.Path(
@@ -165,3 +166,9 @@ def test_enum_options():
     # This is mainly a sanity check to ensure that it doesn't do anything too crazy.
     env_options = get_enum_options(FabricTypeClass)
     assert "PROD" in env_options
+
+
+def test_urn_types() -> None:
+    assert len(URN_TYPES) > 10
+    for checked_type in ["dataset", "dashboard", "dataFlow", "schemaField"]:
+        assert checked_type in URN_TYPES
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
index 2d32e1328fbb4f..fd8475090f009e 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
@@ -185,7 +185,7 @@
     "aspect": {
         "json": {
             "statement": {
-                "value": "ALTER TABLE dev.public.foo_staging RENAME TO foo",
+                "value": "ALTER TABLE dev.public.foo_staging RENAME TO foo /* Datahub generated query text-- */",
                 "language": "SQL"
             },
             "source": "SYSTEM",
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
index af0fca485777ff..d9d46a4b14a146 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
@@ -185,7 +185,7 @@
     "aspect": {
         "json": {
             "statement": {
-                "value": "ALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info",
+                "value": "ALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info /* Datahub generated query text-- */",
                 "language": "SQL"
             },
             "source": "SYSTEM",
@@ -438,7 +438,7 @@
     "aspect": {
         "json": {
             "statement": {
-                "value": "ALTER TABLE dev.public.person_info SWAP WITH dev.public.person_info_swap",
+                "value": "ALTER TABLE dev.public.person_info SWAP WITH dev.public.person_info_swap /* Datahub generated query text-- */",
                 "language": "SQL"
             },
             "source": "SYSTEM",
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json
index ceaaf8f6887c7c..b4eaf76a149337 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json
@@ -175,7 +175,7 @@
     "aspect": {
         "json": {
             "statement": {
-                "value": "CREATE TABLE person_info_swap CLONE person_info;\n\nCREATE TABLE person_info_incremental AS\nSELECT\n  *\nFROM person_info_dep;\n\nINSERT INTO person_info_swap\nSELECT\n  *\nFROM person_info_incremental;\n\nALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info",
+                "value": "CREATE TABLE person_info_swap CLONE person_info;\n\nCREATE TABLE person_info_incremental AS\nSELECT\n  *\nFROM person_info_dep;\n\nINSERT INTO person_info_swap\nSELECT\n  *\nFROM person_info_incremental;\n\nALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info /* Datahub generated query text-- */",
                 "language": "SQL"
             },
             "source": "SYSTEM",
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json
index f5f573f3d51136..9621b7d1c265b4 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json
@@ -1,7 +1,7 @@
 {
     "query_type": "SELECT",
     "query_type_props": {},
-    "query_fingerprint": "c721ce16410601b36e5f32bd9c5c28488500a93e617363739faebfe71496f163",
+    "query_fingerprint": "a204522c98a01568d8575a98a715de98985aeef0e822feb8450153f71891d6c6",
     "in_tables": [
         "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMNS,PROD)",
         "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS,PROD)"
@@ -178,6 +178,6 @@
     ],
     "debug_info": {
         "confidence": 0.2,
-        "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"
+        "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.`INFORMATION_SCHEMA.COLUMNS` AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.`INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"
     }
 }
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py b/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py
index 6f590b53071467..f6566f007f5e6b 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py
@@ -1,11 +1,14 @@
 from datahub.configuration.datetimes import parse_absolute_time
 from datahub.metadata.urns import CorpUserUrn
 from datahub.sql_parsing.sql_parsing_aggregator import PreparsedQuery
-from datahub.sql_parsing.tool_meta_extractor import ToolMetaExtractor
+from datahub.sql_parsing.tool_meta_extractor import (
+    ToolMetaExtractor,
+    ToolMetaExtractorReport,
+)
 
 
 def test_extract_mode_metadata() -> None:
-    extractor = ToolMetaExtractor()
+    extractor = ToolMetaExtractor(report=ToolMetaExtractorReport())
     query = """\
 select * from LONG_TAIL_COMPANIONS.ADOPTION.PET_PROFILES
 LIMIT 100
@@ -30,8 +33,42 @@ def test_extract_mode_metadata() -> None:
     assert extractor.report.num_queries_meta_extracted["mode"] == 1
 
 
+def test_extract_looker_metadata() -> None:
+    extractor = ToolMetaExtractor(
+        report=ToolMetaExtractorReport(), looker_user_mapping={"7": "john.doe@xyz.com"}
+    )
+    looker_query = """\
+SELECT
+    all_entities_extended_sibling."ENTITY"  AS "all_entities_extended_sibling.entity_type",
+    COUNT(DISTINCT ( all_entities_extended_sibling."URN" )) AS "all_entities_extended_sibling.distinct_count"
+FROM "PUBLIC"."ALL_ENTITIES"
+     AS all_entities_extended_sibling
+GROUP BY
+    1
+ORDER BY
+    1
+FETCH NEXT 50 ROWS ONLY
+-- Looker Query Context '{"user_id":7,"history_slug":"264797031bc403cf382cbefbe3700849","instance_slug":"32654f2ffadf10b1949d4009e52fc6a4"}'
+"""
+
+    entry = PreparsedQuery(
+        query_id=None,
+        query_text=looker_query,
+        upstreams=[],
+        downstream=None,
+        column_lineage=None,
+        column_usage=None,
+        inferred_schema=None,
+        user=CorpUserUrn("mode"),
+        timestamp=parse_absolute_time("2021-08-01T01:02:03Z"),
+    )
+    assert extractor.extract_bi_metadata(entry)
+    assert entry.user == CorpUserUrn("john.doe")
+    assert extractor.report.num_queries_meta_extracted["looker"] == 1
+
+
 def test_extract_no_metadata() -> None:
-    extractor = ToolMetaExtractor()
+    extractor = ToolMetaExtractor(report=ToolMetaExtractorReport())
     query = """\
 select * from LONG_TAIL_COMPANIONS.ADOPTION.PET_PROFILES
 LIMIT 100
@@ -53,3 +90,4 @@ def test_extract_no_metadata() -> None:
     assert not extractor.extract_bi_metadata(entry)
 
     assert extractor.report.num_queries_meta_extracted["mode"] == 0
+    assert extractor.report.num_queries_meta_extracted["looker"] == 0
diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py
index 85c86f8d205d9a..5631ad2c69f949 100644
--- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py
+++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py
@@ -37,7 +37,11 @@ def stateful_source(mock_datahub_graph: DataHubGraph) -> Iterable[SnowflakeV2Sou
         ),
     )
 
-    with mock.patch("snowflake.connector.connect"):
+    with mock.patch(
+        "datahub.sql_parsing.sql_parsing_aggregator.ToolMetaExtractor.create",
+    ) as mock_checkpoint, mock.patch("snowflake.connector.connect"):
+        mock_checkpoint.return_value = mock.MagicMock()
+
         yield SnowflakeV2Source(ctx=ctx, config=config)
 
 
diff --git a/metadata-ingestion/tests/unit/test_mlflow_source.py b/metadata-ingestion/tests/unit/test_mlflow_source.py
index d213dd92352e62..e882296b6f331d 100644
--- a/metadata-ingestion/tests/unit/test_mlflow_source.py
+++ b/metadata-ingestion/tests/unit/test_mlflow_source.py
@@ -136,3 +136,16 @@ def test_make_external_link_remote(source, model_version):
     url = source._make_external_url(model_version)
 
     assert url == expected_url
+
+
+def test_make_external_link_remote_via_config(source, model_version):
+    custom_base_url = "https://custom-server.org"
+    source.config.base_external_url = custom_base_url
+    source.client = MlflowClient(
+        tracking_uri="https://dummy-mlflow-tracking-server.org"
+    )
+    expected_url = f"{custom_base_url}/#/models/{model_version.name}/versions/{model_version.version}"
+
+    url = source._make_external_url(model_version)
+
+    assert url == expected_url
diff --git a/metadata-ingestion/tests/unit/test_tableau_source.py b/metadata-ingestion/tests/unit/test_tableau_source.py
index c81aa0bd8a1b1a..44e59decaecbd7 100644
--- a/metadata-ingestion/tests/unit/test_tableau_source.py
+++ b/metadata-ingestion/tests/unit/test_tableau_source.py
@@ -182,8 +182,14 @@ def test_get_filter_pages_simple():
     assert get_filter_pages(filter_dict, 10) == [filter_dict]
 
 
-def test_get_filter_pages_non_id_large_filter_passthrough():
-    projects = [f"project{i}" for i in range(20000)]
+def test_get_filter_pages_non_id_large_filter():
+    projects = [f"project{i}" for i in range(10)]
+    filter_dict = {c.PROJECT_NAME_WITH_IN: projects}
+    assert get_filter_pages(filter_dict, 10) == [filter_dict]
+
+
+def test_get_filter_pages_for_single_key():
+    projects = ["project1"]
     filter_dict = {c.PROJECT_NAME_WITH_IN: projects}
     assert get_filter_pages(filter_dict, 10) == [filter_dict]
 
diff --git a/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py b/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py
index f4062f9a911453..6230c2e37edc6a 100644
--- a/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py
+++ b/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py
@@ -15,11 +15,13 @@
 )
 
 
-def test_file_dict() -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_file_dict(use_sqlite_on_conflict: bool) -> None:
     cache = FileBackedDict[int](
         tablename="cache",
         cache_max_size=10,
         cache_eviction_batch_size=10,
+        _use_sqlite_on_conflict=use_sqlite_on_conflict,
     )
 
     for i in range(100):
@@ -92,7 +94,8 @@ def test_file_dict() -> None:
         cache["a"] = 1
 
 
-def test_custom_serde() -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_custom_serde(use_sqlite_on_conflict: bool) -> None:
     @dataclass(frozen=True)
     class Label:
         a: str
@@ -139,6 +142,7 @@ def deserialize(s: str) -> Main:
         deserializer=deserialize,
         # Disable the in-memory cache to force all reads/writes to the DB.
         cache_max_size=0,
+        _use_sqlite_on_conflict=use_sqlite_on_conflict,
     )
     first = Main(3, {Label("one", 1): 0.1, Label("two", 2): 0.2})
     second = Main(-100, {Label("z", 26): 0.26})
@@ -186,7 +190,8 @@ def test_file_dict_stores_counter() -> None:
         assert in_memory_counters[i].most_common(2) == cache[str(i)].most_common(2)
 
 
-def test_file_dict_ordering() -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_file_dict_ordering(use_sqlite_on_conflict: bool) -> None:
     """
     We require that FileBackedDict maintains insertion order, similar to Python's
     built-in dict. This test makes one of each and validates that they behave the same.
@@ -196,6 +201,7 @@ def test_file_dict_ordering() -> None:
         serializer=str,
         deserializer=int,
         cache_max_size=1,
+        _use_sqlite_on_conflict=use_sqlite_on_conflict,
     )
     data = {}
 
@@ -229,12 +235,14 @@ class Pair:
 
 
 @pytest.mark.parametrize("cache_max_size", [0, 1, 10])
-def test_custom_column(cache_max_size: int) -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_custom_column(cache_max_size: int, use_sqlite_on_conflict: bool) -> None:
     cache = FileBackedDict[Pair](
         extra_columns={
             "x": lambda m: m.x,
         },
         cache_max_size=cache_max_size,
+        _use_sqlite_on_conflict=use_sqlite_on_conflict,
     )
 
     cache["first"] = Pair(3, "a")
@@ -275,7 +283,8 @@ def test_custom_column(cache_max_size: int) -> None:
     ]
 
 
-def test_shared_connection() -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_shared_connection(use_sqlite_on_conflict: bool) -> None:
     with ConnectionWrapper() as connection:
         cache1 = FileBackedDict[int](
             shared_connection=connection,
@@ -283,6 +292,7 @@ def test_shared_connection() -> None:
             extra_columns={
                 "v": lambda v: v,
             },
+            _use_sqlite_on_conflict=use_sqlite_on_conflict,
         )
         cache2 = FileBackedDict[Pair](
             shared_connection=connection,
@@ -291,6 +301,7 @@ def test_shared_connection() -> None:
                 "x": lambda m: m.x,
                 "y": lambda m: m.y,
             },
+            _use_sqlite_on_conflict=use_sqlite_on_conflict,
         )
 
         cache1["a"] = 3
diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle
index cec3164f10d6cc..42861cf235b56f 100644
--- a/metadata-integration/java/datahub-client/build.gradle
+++ b/metadata-integration/java/datahub-client/build.gradle
@@ -95,6 +95,11 @@ test {
   finalizedBy jacocoTestReport
 }
 
+// no submodule depends on datahub-schematron:cli
+// and tests there are the ones checking python-java compatibility
+test.dependsOn tasks.getByPath(":metadata-integration:java:datahub-schematron:cli:test")
+test.dependsOn tasks.getByPath(":metadata-integration:java:datahub-schematron:lib:test")
+
 task checkShadowJar(type: Exec) {
   commandLine 'sh', '-c', 'scripts/check_jar.sh'
 }
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java b/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java
new file mode 100644
index 00000000000000..d6522c2d84670f
--- /dev/null
+++ b/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java
@@ -0,0 +1,942 @@
+package io.datahubproject.schematron.converters.avro;
+
+import static org.testng.Assert.*;
+
+import com.linkedin.common.urn.DataPlatformUrn;
+import com.linkedin.data.template.StringArray;
+import com.linkedin.schema.*;
+import java.io.File;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.Collections;
+import org.apache.avro.Schema;
+import org.testng.annotations.*;
+
+@Test(groups = "unit")
+class AvroSchemaConverterTest {
+
+  private AvroSchemaConverter avroSchemaConverter = AvroSchemaConverter.builder().build();
+  private DataPlatformUrn dataPlatformUrn =
+      DataPlatformUrn.createFromString("urn:li:dataPlatform:foo");
+
+  AvroSchemaConverterTest() throws URISyntaxException {}
+
+  @Test(groups = "basic")
+  void testPrimitiveTypes() throws IOException {
+    SchemaMetadata schema =
+        avroSchemaConverter.toDataHubSchema(
+            readAvroSchema("primitive_types.avsc"), false, false, dataPlatformUrn, null);
+
+    schema.getFields().forEach(System.out::println);
+
+    assertEquals(schema.getFields().size(), 14);
+
+    assertSchemaField(
+        schema.getFields().get(0),
+        "[version=2.0].[type=PrimitiveType].[type=int].intField",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(1),
+        "[version=2.0].[type=PrimitiveType].[type=union].intFieldV2",
+        "union",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new UnionType()
+                        .setNestedTypes(new StringArray(Collections.singletonList("union"))))));
+    assertSchemaField(
+        schema.getFields().get(2),
+        "[version=2.0].[type=PrimitiveType].[type=union].[type=int].intFieldV2",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(3),
+        "[version=2.0].[type=PrimitiveType].[type=null].nullField",
+        "null",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NullType())));
+    assertSchemaField(
+        schema.getFields().get(4),
+        "[version=2.0].[type=PrimitiveType].[type=union].nullFieldV2",
+        "union",
+        true,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new UnionType()
+                        .setNestedTypes(new StringArray(Collections.singletonList("union"))))));
+    assertSchemaField(
+        schema.getFields().get(5),
+        "[version=2.0].[type=PrimitiveType].[type=long].longField",
+        "long",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(6),
+        "[version=2.0].[type=PrimitiveType].[type=float].floatField",
+        "float",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(7),
+        "[version=2.0].[type=PrimitiveType].[type=double].doubleField",
+        "double",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(8),
+        "[version=2.0].[type=PrimitiveType].[type=string].stringField",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(9),
+        "[version=2.0].[type=PrimitiveType].[type=boolean].booleanField",
+        "boolean",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType())));
+    assertSchemaField(
+        schema.getFields().get(10),
+        "[version=2.0].[type=PrimitiveType].[type=int].nullableIntField",
+        "int",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(11),
+        "[version=2.0].[type=PrimitiveType].[type=long].nullableLongField",
+        "long",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(12),
+        "[version=2.0].[type=PrimitiveType].[type=string].nullableStringField",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(13),
+        "[version=2.0].[type=PrimitiveType].[type=enum].status",
+        "Enum",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new EnumType())));
+  }
+
+  @Test(groups = "basic")
+  void testComplexMaps() throws IOException {
+    SchemaMetadata schema =
+        avroSchemaConverter.toDataHubSchema(
+            readAvroSchema("complex_maps.avsc"), false, false, dataPlatformUrn, null);
+
+    schema.getFields().forEach(System.out::println);
+
+    assertEquals(schema.getFields().size(), 15);
+
+    assertSchemaField(
+        schema.getFields().get(0),
+        "[version=2.0].[type=MapType].[type=map].mapOfString",
+        "map<string,string>",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("string"))));
+    assertSchemaField(
+        schema.getFields().get(1),
+        "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType",
+        "ComplexType",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("ComplexType"))));
+    assertSchemaField(
+        schema.getFields().get(2),
+        "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType.[type=string].field1",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(3),
+        "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType.[type=int].field2",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(4),
+        "[version=2.0].[type=MapType].[type=map].[type=union].mapOfNullableString",
+        "union",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("union"))));
+    assertSchemaField(
+        schema.getFields().get(5),
+        "[version=2.0].[type=MapType].[type=map].[type=union].[type=string].mapOfNullableString",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(6),
+        "[version=2.0].[type=MapType].[type=map].[type=union].mapOfNullableComplexType",
+        "union",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("union"))));
+    assertSchemaField(
+        schema.getFields().get(7),
+        "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType",
+        "ComplexTypeNullable",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())));
+    assertSchemaField(
+        schema.getFields().get(8),
+        "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType.[type=string].field1",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(9),
+        "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType.[type=int].field2",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(10),
+        "[version=2.0].[type=MapType].[type=map].[type=array].mapOfArray",
+        "array(string)",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new ArrayType().setNestedType(new StringArray("string")))));
+    assertSchemaField(
+        schema.getFields().get(11),
+        "[version=2.0].[type=MapType].[type=map].[type=map].mapOfMap",
+        "map<string,int>",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("int"))));
+    assertSchemaField(
+        schema.getFields().get(12),
+        "[version=2.0].[type=MapType].[type=map].[type=union].mapOfUnion",
+        "union",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("union"))));
+    assertSchemaField(
+        schema.getFields().get(13),
+        "[version=2.0].[type=MapType].[type=map].[type=union].[type=string].mapOfUnion",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(14),
+        "[version=2.0].[type=MapType].[type=map].[type=union].[type=int].mapOfUnion",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+  }
+
+  @Test(groups = "basic")
+  void testComplexArrays() throws IOException {
+    SchemaMetadata schema =
+        avroSchemaConverter.toDataHubSchema(
+            readAvroSchema("complex_arrays.avsc"), false, false, dataPlatformUrn, null);
+
+    schema.getFields().forEach(System.out::println);
+
+    assertEquals(schema.getFields().size(), 16);
+
+    assertSchemaField(
+        schema.getFields().get(0),
+        "[version=2.0].[type=ArrayType].[type=array].arrayOfString",
+        "array(string)",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new ArrayType().setNestedType(new StringArray("string")))));
+    assertSchemaField(
+        schema.getFields().get(1),
+        "[version=2.0].[type=ArrayType].[type=array].[type=map].arrayOfMap",
+        "map<string,string>",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("string"))));
+    assertSchemaField(
+        schema.getFields().get(2),
+        "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord",
+        "ComplexType",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new ArrayType().setNestedType(new StringArray("ComplexType")))));
+    assertSchemaField(
+        schema.getFields().get(3),
+        "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord.[type=string].field1",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(4),
+        "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord.[type=int].field2",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(5),
+        "[version=2.0].[type=ArrayType].[type=array].[type=array].arrayOfArray",
+        "array(string)",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new ArrayType().setNestedType(new StringArray("string")))));
+    assertSchemaField(
+        schema.getFields().get(6),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfUnion",
+        "union",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new ArrayType().setNestedType(new StringArray("union")))));
+    assertSchemaField(
+        schema.getFields().get(7),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=string].arrayOfUnion",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(8),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=int].arrayOfUnion",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(9),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=boolean].arrayOfUnion",
+        "boolean",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType())));
+    assertSchemaField(
+        schema.getFields().get(10),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfNullableString",
+        "union",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new ArrayType().setNestedType(new StringArray("union")))));
+    assertSchemaField(
+        schema.getFields().get(11),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=string].arrayOfNullableString",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(12),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfNullableRecord",
+        "union",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new ArrayType().setNestedType(new StringArray("union")))));
+    assertSchemaField(
+        schema.getFields().get(13),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord",
+        "ComplexTypeNullable",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())));
+    assertSchemaField(
+        schema.getFields().get(14),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord.[type=string].field1",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(15),
+        "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord.[type=int].field2",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+  }
+
+  @Test(groups = "basic")
+  void testComplexStructs() throws IOException {
+    SchemaMetadata schema =
+        avroSchemaConverter.toDataHubSchema(
+            readAvroSchema("complex_structs.avsc"), false, false, dataPlatformUrn, null);
+
+    schema.getFields().forEach(System.out::println);
+
+    assertEquals(schema.getFields().size(), 13);
+
+    assertSchemaField(
+        schema.getFields().get(0),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField",
+        "ComplexStruct",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())));
+    assertSchemaField(
+        schema.getFields().get(1),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=string].fieldString",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(2),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=int].fieldInt",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(3),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=boolean].fieldBoolean",
+        "boolean",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType())));
+    assertSchemaField(
+        schema.getFields().get(4),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=map].fieldMap",
+        "map<string,string>",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("string"))));
+    assertSchemaField(
+        schema.getFields().get(5),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord",
+        "NestedRecord",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())));
+    assertSchemaField(
+        schema.getFields().get(6),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord.[type=string].nestedField1",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(7),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord.[type=int].nestedField2",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(8),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=array].fieldArray",
+        "array(string)",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new ArrayType().setNestedType(new StringArray("string")))));
+    assertSchemaField(
+        schema.getFields().get(9),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].fieldUnion",
+        "union",
+        true,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new UnionType().setNestedTypes(new StringArray("union")))));
+    assertSchemaField(
+        schema.getFields().get(10),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].[type=string].fieldUnion",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(11),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].[type=int].fieldUnion",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(12),
+        "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=map].fieldNullableMap",
+        "map<string,string>",
+        true,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("string"))));
+  }
+
+  @Test(groups = "basic")
+  void testComplexUnions() throws IOException {
+    SchemaMetadata schema =
+        avroSchemaConverter.toDataHubSchema(
+            readAvroSchema("complex_unions.avsc"), false, false, dataPlatformUrn, null);
+
+    schema.getFields().forEach(System.out::println);
+
+    assertEquals(schema.getFields().size(), 14);
+
+    assertSchemaField(
+        schema.getFields().get(0),
+        "[version=2.0].[type=UnionType].[type=union].fieldUnionNullablePrimitives",
+        "union",
+        true,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new UnionType().setNestedTypes(new StringArray("union")))));
+    assertSchemaField(
+        schema.getFields().get(1),
+        "[version=2.0].[type=UnionType].[type=union].[type=string].fieldUnionNullablePrimitives",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(2),
+        "[version=2.0].[type=UnionType].[type=union].[type=int].fieldUnionNullablePrimitives",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(3),
+        "[version=2.0].[type=UnionType].[type=union].[type=boolean].fieldUnionNullablePrimitives",
+        "boolean",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType())));
+    assertSchemaField(
+        schema.getFields().get(4),
+        "[version=2.0].[type=UnionType].[type=union].fieldUnionComplexTypes",
+        "union",
+        true,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new UnionType().setNestedTypes(new StringArray("union")))));
+    assertSchemaField(
+        schema.getFields().get(5),
+        "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes",
+        "NestedRecord",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())));
+    assertSchemaField(
+        schema.getFields().get(6),
+        "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes.[type=string].nestedField1",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(7),
+        "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes.[type=int].nestedField2",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(8),
+        "[version=2.0].[type=UnionType].[type=union].[type=map].fieldUnionComplexTypes",
+        "map<string,string>",
+        false,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new MapType().setKeyType("string").setValueType("string"))));
+    assertSchemaField(
+        schema.getFields().get(9),
+        "[version=2.0].[type=UnionType].[type=union].fieldUnionPrimitiveAndComplex",
+        "union",
+        true,
+        false,
+        new SchemaFieldDataType()
+            .setType(
+                SchemaFieldDataType.Type.create(
+                    new UnionType().setNestedTypes(new StringArray("union")))));
+    assertSchemaField(
+        schema.getFields().get(10),
+        "[version=2.0].[type=UnionType].[type=union].[type=string].fieldUnionPrimitiveAndComplex",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(11),
+        "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex",
+        "ComplexTypeRecord",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())));
+    assertSchemaField(
+        schema.getFields().get(12),
+        "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex.[type=string].complexField1",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(13),
+        "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex.[type=int].complexField2",
+        "int",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+  }
+
+  @Test(groups = "basic")
+  void testLogicalTypes() throws IOException {
+    SchemaMetadata schema =
+        avroSchemaConverter.toDataHubSchema(
+            readAvroSchema("logical_types.avsc"), false, false, dataPlatformUrn, null);
+
+    schema.getFields().forEach(System.out::println);
+
+    assertEquals(schema.getFields().size(), 9);
+
+    assertSchemaField(
+        schema.getFields().get(0),
+        "[version=2.0].[type=LogicalTypes].[type=bytes].decimalField",
+        "bytes(decimal)",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())),
+        "{\"scale\":2,\"logicalType\":\"decimal\",\"precision\":9}");
+    assertSchemaField(
+        schema.getFields().get(1),
+        "[version=2.0].[type=LogicalTypes].[type=bytes].decimalFieldWithoutScale",
+        "bytes(decimal)",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())),
+        "{\"logicalType\":\"decimal\",\"precision\":9}");
+    assertSchemaField(
+        schema.getFields().get(2),
+        "[version=2.0].[type=LogicalTypes].[type=bytes].decimalFieldWithoutPrecisionAndScale",
+        "bytes",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType())),
+        "{\"logicalType\":\"decimal\"}");
+    assertSchemaField(
+        schema.getFields().get(3),
+        "[version=2.0].[type=LogicalTypes].[type=long].timestampMillisField",
+        "long(timestamp-millis)",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())),
+        "{\"logicalType\":\"timestamp-millis\"}");
+    assertSchemaField(
+        schema.getFields().get(4),
+        "[version=2.0].[type=LogicalTypes].[type=long].timestampMicrosField",
+        "long(timestamp-micros)",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())),
+        "{\"logicalType\":\"timestamp-micros\"}");
+    assertSchemaField(
+        schema.getFields().get(5),
+        "[version=2.0].[type=LogicalTypes].[type=int].dateField",
+        "int(date)",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new DateType())),
+        "{\"logicalType\":\"date\"}");
+    assertSchemaField(
+        schema.getFields().get(6),
+        "[version=2.0].[type=LogicalTypes].[type=int].timeMillisField",
+        "int(time-millis)",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())),
+        "{\"logicalType\":\"time-millis\"}");
+    assertSchemaField(
+        schema.getFields().get(7),
+        "[version=2.0].[type=LogicalTypes].[type=long].timeMicrosField",
+        "long(time-micros)",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())),
+        "{\"logicalType\":\"time-micros\"}");
+    assertSchemaField(
+        schema.getFields().get(8),
+        "[version=2.0].[type=LogicalTypes].[type=string].uuidField",
+        "string(uuid)",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())),
+        "{\"logicalType\":\"uuid\"}");
+  }
+
+  @Test(groups = "basic")
+  void testUsersRecord() throws IOException {
+    // this is a test case got during the Hudi integration
+    SchemaMetadata schema =
+        avroSchemaConverter.toDataHubSchema(
+            readAvroSchema("users_record.avsc"), false, false, dataPlatformUrn, null);
+
+    schema.getFields().forEach(System.out::println);
+
+    assertEquals(schema.getFields().size(), 20);
+
+    assertSchemaField(
+        schema.getFields().get(0),
+        "[version=2.0].[type=users_record].[type=string]._hoodie_commit_time",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(1),
+        "[version=2.0].[type=users_record].[type=string]._hoodie_commit_seqno",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(2),
+        "[version=2.0].[type=users_record].[type=string]._hoodie_record_key",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(3),
+        "[version=2.0].[type=users_record].[type=string]._hoodie_partition_path",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(4),
+        "[version=2.0].[type=users_record].[type=string]._hoodie_file_name",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(5),
+        "[version=2.0].[type=users_record].[type=string].user_id",
+        "string",
+        false,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(6),
+        "[version=2.0].[type=users_record].[type=string].name",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(7),
+        "[version=2.0].[type=users_record].[type=address].address",
+        "address",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())));
+    assertSchemaField(
+        schema.getFields().get(8),
+        "[version=2.0].[type=users_record].[type=address].address.[type=string].street",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(9),
+        "[version=2.0].[type=users_record].[type=address].address.[type=string].city",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(10),
+        "[version=2.0].[type=users_record].[type=address].address.[type=string].country",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(11),
+        "[version=2.0].[type=users_record].[type=address].address.[type=string].postal_code",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(12),
+        "[version=2.0].[type=users_record].[type=address].address.[type=long].created_at",
+        "long(timestamp-micros)",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())),
+        "{\"logicalType\":\"timestamp-micros\"}");
+    assertSchemaField(
+        schema.getFields().get(13),
+        "[version=2.0].[type=users_record].[type=contact].contact",
+        "contact",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())));
+    assertSchemaField(
+        schema.getFields().get(14),
+        "[version=2.0].[type=users_record].[type=contact].contact.[type=string].email",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(15),
+        "[version=2.0].[type=users_record].[type=contact].contact.[type=string].phone",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+    assertSchemaField(
+        schema.getFields().get(16),
+        "[version=2.0].[type=users_record].[type=long].created_at",
+        "long(timestamp-micros)",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())),
+        "{\"logicalType\":\"timestamp-micros\"}");
+    assertSchemaField(
+        schema.getFields().get(17),
+        "[version=2.0].[type=users_record].[type=long].updated_at",
+        "long(timestamp-micros)",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())),
+        "{\"logicalType\":\"timestamp-micros\"}");
+    assertSchemaField(
+        schema.getFields().get(18),
+        "[version=2.0].[type=users_record].[type=map].[type=int].props",
+        "int",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())));
+    assertSchemaField(
+        schema.getFields().get(19),
+        "[version=2.0].[type=users_record].[type=string].country",
+        "string",
+        true,
+        false,
+        new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())));
+  }
+
+  private void assertSchemaField(
+      SchemaField field,
+      String expectedPath,
+      String expectedNativeType,
+      boolean expectedNullable,
+      boolean expectedIsPartOfKey,
+      SchemaFieldDataType expectedType) {
+    assertSchemaField(
+        field,
+        expectedPath,
+        expectedNativeType,
+        expectedNullable,
+        expectedIsPartOfKey,
+        expectedType,
+        null);
+  }
+
+  private void assertSchemaField(
+      SchemaField field,
+      String expectedPath,
+      String expectedNativeType,
+      boolean expectedNullable,
+      boolean expectedIsPartOfKey,
+      SchemaFieldDataType expectedType,
+      String expectedJsonProps) {
+    assertEquals(field.getFieldPath(), expectedPath);
+    assertEquals(field.getNativeDataType(), expectedNativeType);
+    assertEquals(field.isNullable(), expectedNullable);
+    assertEquals(field.isIsPartOfKey(), expectedIsPartOfKey);
+    assertEquals(field.getType(), expectedType);
+    if (expectedJsonProps != null) {
+      assertEquals(field.getJsonProps(), expectedJsonProps);
+    }
+  }
+
+  private Schema readAvroSchema(String schemaFileName) throws IOException {
+    String schemaPath = getClass().getClassLoader().getResource(schemaFileName).getPath();
+    File schemaFile = new File(schemaPath);
+    return new Schema.Parser().parse(schemaFile);
+  }
+}
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile.avsc
deleted file mode 100644
index 81f8b0e54b11e0..00000000000000
--- a/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile.avsc
+++ /dev/null
@@ -1,456 +0,0 @@
-{
-    "type": "record",
-    "name": "CustomerProfile",
-    "namespace": "com.example.customer",
-    "doc": "A complex customer profile schema demonstrating various union types and optional fields",
-    "fields": [
-        {
-            "name": "customerId",
-            "type": {
-                "type": "string",
-                "logicalType": "uuid"
-            },
-            "doc": "Unique identifier for the customer"
-        },
-        {
-            "name": "identificationDocument",
-            "type": [
-                "null",
-                {
-                    "type": "record",
-                    "name": "Passport",
-                    "fields": [
-                        {
-                            "name": "passportNumber",
-                            "type": "string"
-                        },
-                        {
-                            "name": "expiryDate",
-                            "type": {
-                                "type": "long",
-                                "logicalType": "date"
-                            }
-                        }
-                    ]
-                },
-                {
-                    "type": "record",
-                    "name": "DriversLicense",
-                    "fields": [
-                        {
-                            "name": "licenseNumber",
-                            "type": "string"
-                        },
-                        {
-                            "name": "state",
-                            "type": "string"
-                        },
-                        {
-                            "name": "validUntil",
-                            "type": {
-                                "type": "long",
-                                "logicalType": "date"
-                            }
-                        }
-                    ]
-                },
-                {
-                    "type": "record",
-                    "name": "NationalID",
-                    "fields": [
-                        {
-                            "name": "idNumber",
-                            "type": "string"
-                        },
-                        {
-                            "name": "country",
-                            "type": "string"
-                        }
-                    ]
-                }
-            ],
-            "default": null,
-            "doc": "Customer's identification document - can be passport, driver's license, or national ID"
-        },
-        {
-            "name": "contactInfo",
-            "type": {
-                "type": "record",
-                "name": "ContactInformation",
-                "fields": [
-                    {
-                        "name": "primaryContact",
-                        "type": [
-                            {
-                                "type": "record",
-                                "name": "EmailContact",
-                                "fields": [
-                                    {
-                                        "name": "emailAddress",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "isVerified",
-                                        "type": "boolean",
-                                        "default": false
-                                    }
-                                ]
-                            },
-                            {
-                                "type": "record",
-                                "name": "PhoneContact",
-                                "fields": [
-                                    {
-                                        "name": "countryCode",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "number",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "type",
-                                        "type": {
-                                            "type": "enum",
-                                            "name": "PhoneType",
-                                            "symbols": [
-                                                "MOBILE",
-                                                "LANDLINE"
-                                            ]
-                                        }
-                                    }
-                                ]
-                            }
-                        ],
-                        "doc": "Primary contact method - either email or phone"
-                    },
-                    {
-                        "name": "alternativeContacts",
-                        "type": {
-                            "type": "array",
-                            "items": [
-                                "null",
-                                "EmailContact",
-                                "PhoneContact"
-                            ]
-                        },
-                        "default": [],
-                        "doc": "List of alternative contact methods"
-                    }
-                ]
-            }
-        },
-        {
-            "name": "addresses",
-            "type": {
-                "type": "array",
-                "items": {
-                    "type": "record",
-                    "name": "Address",
-                    "fields": [
-                        {
-                            "name": "type",
-                            "type": {
-                                "type": "enum",
-                                "name": "AddressType",
-                                "symbols": [
-                                    "RESIDENTIAL",
-                                    "BUSINESS",
-                                    "SHIPPING"
-                                ]
-                            },
-                            "default": "RESIDENTIAL"
-                        },
-                        {
-                            "name": "street",
-                            "type": "string"
-                        },
-                        {
-                            "name": "city",
-                            "type": "string"
-                        },
-                        {
-                            "name": "state",
-                            "type": [
-                                "null",
-                                "string"
-                            ],
-                            "default": null
-                        },
-                        {
-                            "name": "country",
-                            "type": "string"
-                        },
-                        {
-                            "name": "postalCode",
-                            "type": [
-                                "null",
-                                "string"
-                            ],
-                            "default": null
-                        },
-                        {
-                            "name": "validationStatus",
-                            "type": [
-                                "null",
-                                {
-                                    "type": "record",
-                                    "name": "AddressValidation",
-                                    "fields": [
-                                        {
-                                            "name": "isValid",
-                                            "type": "boolean"
-                                        },
-                                        {
-                                            "name": "verificationDate",
-                                            "type": {
-                                                "type": "long",
-                                                "logicalType": "timestamp-millis"
-                                            }
-                                        },
-                                        {
-                                            "name": "verificationMethod",
-                                            "type": {
-                                                "type": "enum",
-                                                "name": "VerificationMethod",
-                                                "symbols": [
-                                                    "MANUAL",
-                                                    "AUTOMATED"
-                                                ]
-                                            }
-                                        }
-                                    ]
-                                }
-                            ],
-                            "default": null
-                        }
-                    ]
-                }
-            },
-            "doc": "Customer's addresses with validation information"
-        },
-        {
-            "name": "preferences",
-            "type": {
-                "type": "map",
-                "values": [
-                    "null",
-                    "string",
-                    "boolean",
-                    {
-                        "type": "record",
-                        "name": "FrequencyPreference",
-                        "fields": [
-                            {
-                                "name": "frequency",
-                                "type": {
-                                    "type": "enum",
-                                    "name": "Frequency",
-                                    "symbols": [
-                                        "DAILY",
-                                        "WEEKLY",
-                                        "MONTHLY"
-                                    ]
-                                }
-                            },
-                            {
-                                "name": "enabled",
-                                "type": "boolean",
-                                "default": true
-                            },
-                            {
-                                "name": "lastUpdated",
-                                "type": {
-                                    "type": "long",
-                                    "logicalType": "timestamp-millis"
-                                }
-                            }
-                        ]
-                    }
-                ]
-            },
-            "doc": "Customer preferences with various possible value types"
-        },
-        {
-            "name": "subscriptionHistory",
-            "type": [
-                "null",
-                {
-                    "type": "array",
-                    "items": {
-                        "type": "record",
-                        "name": "Subscription",
-                        "fields": [
-                            {
-                                "name": "planName",
-                                "type": "string"
-                            },
-                            {
-                                "name": "startDate",
-                                "type": {
-                                    "type": "long",
-                                    "logicalType": "date"
-                                }
-                            },
-                            {
-                                "name": "endDate",
-                                "type": [
-                                    "null",
-                                    {
-                                        "type": "long",
-                                        "logicalType": "date"
-                                    }
-                                ],
-                                "default": null
-                            },
-                            {
-                                "name": "status",
-                                "type": {
-                                    "type": "enum",
-                                    "name": "SubscriptionStatus",
-                                    "symbols": [
-                                        "ACTIVE",
-                                        "CANCELLED",
-                                        "EXPIRED",
-                                        "SUSPENDED"
-                                    ]
-                                }
-                            },
-                            {
-                                "name": "paymentMethod",
-                                "type": [
-                                    "null",
-                                    {
-                                        "type": "record",
-                                        "name": "PaymentMethod",
-                                        "fields": [
-                                            {
-                                                "name": "type",
-                                                "type": {
-                                                    "type": "enum",
-                                                    "name": "PaymentType",
-                                                    "symbols": [
-                                                        "CREDIT_CARD",
-                                                        "DEBIT_CARD",
-                                                        "BANK_TRANSFER",
-                                                        "DIGITAL_WALLET"
-                                                    ]
-                                                }
-                                            },
-                                            {
-                                                "name": "lastFourDigits",
-                                                "type": [
-                                                    "null",
-                                                    "string"
-                                                ],
-                                                "default": null
-                                            },
-                                            {
-                                                "name": "expiryDate",
-                                                "type": [
-                                                    "null",
-                                                    {
-                                                        "type": "long",
-                                                        "logicalType": "date"
-                                                    }
-                                                ],
-                                                "default": null
-                                            }
-                                        ]
-                                    }
-                                ],
-                                "default": null
-                            }
-                        ]
-                    }
-                }
-            ],
-            "default": null,
-            "doc": "Historical record of customer subscriptions"
-        },
-        {
-            "name": "metadata",
-            "type": {
-                "type": "map",
-                "values": [
-                    "null",
-                    "string",
-                    "long",
-                    "boolean",
-                    {
-                        "type": "record",
-                        "name": "MetadataValue",
-                        "fields": [
-                            {
-                                "name": "value",
-                                "type": [
-                                    "null",
-                                    "string",
-                                    "long",
-                                    "boolean"
-                                ],
-                                "default": null
-                            },
-                            {
-                                "name": "timestamp",
-                                "type": {
-                                    "type": "long",
-                                    "logicalType": "timestamp-millis"
-                                }
-                            },
-                            {
-                                "name": "source",
-                                "type": "string"
-                            }
-                        ]
-                    }
-                ]
-            },
-            "doc": "Flexible metadata storage with various possible value types"
-        },
-        {
-            "name": "tags",
-            "type": [
-                "null",
-                {
-                    "type": "array",
-                    "items": {
-                        "type": "record",
-                        "name": "Tag",
-                        "fields": [
-                            {
-                                "name": "name",
-                                "type": "string"
-                            },
-                            {
-                                "name": "value",
-                                "type": [
-                                    "null",
-                                    "string"
-                                ],
-                                "default": null
-                            },
-                            {
-                                "name": "score",
-                                "type": [
-                                    "null",
-                                    "double"
-                                ],
-                                "default": null
-                            },
-                            {
-                                "name": "addedAt",
-                                "type": {
-                                    "type": "long",
-                                    "logicalType": "timestamp-millis"
-                                }
-                            }
-                        ]
-                    }
-                }
-            ],
-            "default": null,
-            "doc": "Optional tags associated with the customer profile"
-        }
-    ]
-}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile2.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile2.avsc
deleted file mode 100644
index b8c7654ea072a2..00000000000000
--- a/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile2.avsc
+++ /dev/null
@@ -1,244 +0,0 @@
-{
-    "type": "record",
-    "name": "CustomerProfile2",
-    "namespace": "com.example.customer",
-    "doc": "A complex customer profile schema demonstrating various union types and optional fields",
-    "fields": [
-        {
-            "name": "customerId",
-            "type": {
-                "type": "string",
-                "logicalType": "uuid"
-            },
-            "doc": "Unique identifier for the customer"
-        },
-        {
-            "name": "identificationDocument",
-            "type": [
-                "null",
-                {
-                    "type": "record",
-                    "name": "Passport",
-                    "fields": [
-                        {
-                            "name": "passportNumber",
-                            "type": "string"
-                        },
-                        {
-                            "name": "expiryDate",
-                            "type": {
-                                "type": "long",
-                                "logicalType": "date"
-                            }
-                        }
-                    ]
-                },
-                {
-                    "type": "record",
-                    "name": "DriversLicense",
-                    "fields": [
-                        {
-                            "name": "licenseNumber",
-                            "type": "string"
-                        },
-                        {
-                            "name": "state",
-                            "type": "string"
-                        },
-                        {
-                            "name": "validUntil",
-                            "type": {
-                                "type": "long",
-                                "logicalType": "date"
-                            }
-                        }
-                    ]
-                },
-                {
-                    "type": "record",
-                    "name": "NationalID",
-                    "fields": [
-                        {
-                            "name": "idNumber",
-                            "type": "string"
-                        },
-                        {
-                            "name": "country",
-                            "type": "string"
-                        }
-                    ]
-                }
-            ],
-            "default": null,
-            "doc": "Customer's identification document"
-        },
-        {
-            "name": "contactInfo",
-            "type": {
-                "type": "record",
-                "name": "ContactInformation",
-                "fields": [
-                    {
-                        "name": "primaryEmailContact",
-                        "type": [
-                            "null",
-                            {
-                                "type": "record",
-                                "name": "PrimaryEmailContact",
-                                "fields": [
-                                    {
-                                        "name": "emailAddress",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "isVerified",
-                                        "type": "boolean",
-                                        "default": false
-                                    }
-                                ]
-                            }
-                        ],
-                        "default": null
-                    },
-                    {
-                        "name": "primaryPhoneContact",
-                        "type": [
-                            "null",
-                            {
-                                "type": "record",
-                                "name": "PrimaryPhoneContact",
-                                "fields": [
-                                    {
-                                        "name": "countryCode",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "number",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "type",
-                                        "type": {
-                                            "type": "enum",
-                                            "name": "PhoneType",
-                                            "symbols": [
-                                                "MOBILE",
-                                                "LANDLINE"
-                                            ]
-                                        }
-                                    }
-                                ]
-                            }
-                        ],
-                        "default": null
-                    },
-                    {
-                        "name": "alternativeEmailContacts",
-                        "type": {
-                            "type": "array",
-                            "items": {
-                                "type": "record",
-                                "name": "AlternativeEmailContact",
-                                "fields": [
-                                    {
-                                        "name": "emailAddress",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "isVerified",
-                                        "type": "boolean",
-                                        "default": false
-                                    }
-                                ]
-                            }
-                        },
-                        "default": []
-                    },
-                    {
-                        "name": "alternativePhoneContacts",
-                        "type": {
-                            "type": "array",
-                            "items": {
-                                "type": "record",
-                                "name": "AlternativePhoneContact",
-                                "fields": [
-                                    {
-                                        "name": "countryCode",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "number",
-                                        "type": "string"
-                                    },
-                                    {
-                                        "name": "type",
-                                        "type": "PhoneType"
-                                    }
-                                ]
-                            }
-                        },
-                        "default": []
-                    }
-                ]
-            }
-        },
-        {
-            "name": "preferences",
-            "type": {
-                "type": "record",
-                "name": "Preferences",
-                "fields": [
-                    {
-                        "name": "simplePreferences",
-                        "type": {
-                            "type": "map",
-                            "values": [
-                                "null",
-                                "string",
-                                "boolean"
-                            ]
-                        },
-                        "default": {}
-                    },
-                    {
-                        "name": "frequencyPreferences",
-                        "type": {
-                            "type": "map",
-                            "values": {
-                                "type": "record",
-                                "name": "FrequencyPreference",
-                                "fields": [
-                                    {
-                                        "name": "frequency",
-                                        "type": {
-                                            "type": "enum",
-                                            "name": "Frequency",
-                                            "symbols": [
-                                                "DAILY",
-                                                "WEEKLY",
-                                                "MONTHLY"
-                                            ]
-                                        }
-                                    },
-                                    {
-                                        "name": "enabled",
-                                        "type": "boolean",
-                                        "default": true
-                                    },
-                                    {
-                                        "name": "lastUpdated",
-                                        "type": {
-                                            "type": "long",
-                                            "logicalType": "timestamp-millis"
-                                        }
-                                    }
-                                ]
-                            }
-                        },
-                        "default": {}
-                    }
-                ]
-            }
-        }
-    ]
-}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/FlatUser.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/FlatUser.avsc
deleted file mode 100644
index c796878c32ae41..00000000000000
--- a/metadata-integration/java/datahub-schematron/lib/src/test/resources/FlatUser.avsc
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-    "type": "record",
-    "name": "FlatUser",
-    "namespace": "com.example",
-    "fields": [
-        {
-            "name": "id",
-            "type": "int",
-            "doc": "The unique identifier for a user",
-            "default": -1,
-            "metadata": {
-                "key1": "value1",
-                "key2": "value2"
-            }
-        },
-        {
-            "name": "username",
-            "type": "string",
-            "doc": "The username of the user"
-        },
-        {
-            "name": "email",
-            "type": "string",
-            "doc": "The email of the user"
-        },
-        {
-            "name": "age",
-            "type": "int",
-            "doc": "The age of the user"
-        },
-        {
-            "name": "isActive",
-            "type": "boolean",
-            "doc": "Whether the user is active or not"
-        },
-        {
-            "name": "registrationDate",
-            "type": {
-                "type": "long",
-                "logicalType": "timestamp-millis"
-            },
-            "doc": "The registration date of the user"
-        }
-    ]
-}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_arrays.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_arrays.avsc
new file mode 100644
index 00000000000000..8e8bcdaa0a7dce
--- /dev/null
+++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_arrays.avsc
@@ -0,0 +1,87 @@
+{
+  "type": "record",
+  "name": "ArrayType",
+  "fields": [
+    {
+      "name": "arrayOfString",
+      "type": {
+        "type": "array",
+        "items": "string"
+      }
+    },
+    {
+      "name": "arrayOfMap",
+      "type": {
+        "type": "array",
+        "items": {
+          "type": "map",
+          "values": "string"
+        }
+      }
+    },
+    {
+      "name": "arrayOfRecord",
+      "type": {
+        "type": "array",
+        "items": {
+          "type": "record",
+          "name": "ComplexType",
+          "fields": [
+            {
+              "name": "field1",
+              "type": "string"
+            },
+            {
+              "name": "field2",
+              "type": "int"
+            }
+          ]
+        }
+      }
+    },
+    {
+      "name": "arrayOfArray",
+      "type": {
+        "type": "array",
+        "items": {
+          "type": "array",
+          "items": "string"
+        }
+      }
+    },
+    {
+      "name": "arrayOfUnion",
+      "type": {
+        "type": "array",
+        "items": ["string", "int", "boolean"]
+      }
+    },
+    {
+      "name": "arrayOfNullableString",
+      "type": {
+        "type": "array",
+        "items": ["null", "string"]
+      }
+    },
+    {
+      "name": "arrayOfNullableRecord",
+      "type": {
+        "type": "array",
+        "items": ["null", {
+          "type": "record",
+          "name": "ComplexTypeNullable",
+          "fields": [
+            {
+              "name": "field1",
+              "type": "string"
+            },
+            {
+              "name": "field2",
+              "type": "int"
+            }
+          ]
+        }]
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_maps.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_maps.avsc
new file mode 100644
index 00000000000000..baedae1b9dcc15
--- /dev/null
+++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_maps.avsc
@@ -0,0 +1,87 @@
+{
+  "type": "record",
+  "name": "MapType",
+  "fields": [
+    {
+      "name": "mapOfString",
+      "type": {
+        "type": "map",
+        "values": "string"
+      }
+    },
+    {
+      "name": "mapOfComplexType",
+      "type": {
+        "type": "map",
+        "values": {
+          "type": "record",
+          "name": "ComplexType",
+          "fields": [
+            {
+              "name": "field1",
+              "type": "string"
+            },
+            {
+              "name": "field2",
+              "type": "int"
+            }
+          ]
+        }
+      }
+    },
+    {
+      "name": "mapOfNullableString",
+      "type": {
+        "type": "map",
+        "values": ["null", "string"]
+      }
+    },
+    {
+      "name": "mapOfNullableComplexType",
+      "type": {
+        "type": "map",
+        "values": ["null", {
+          "type": "record",
+          "name": "ComplexTypeNullable",
+          "fields": [
+            {
+              "name": "field1",
+              "type": "string"
+            },
+            {
+              "name": "field2",
+              "type": "int"
+            }
+          ]
+        }]
+      }
+    },
+    {
+      "name": "mapOfArray",
+      "type": {
+        "type": "map",
+        "values": {
+          "type": "array",
+          "items": "string"
+        }
+      }
+    },
+    {
+      "name": "mapOfMap",
+      "type": {
+        "type": "map",
+        "values": {
+          "type": "map",
+          "values": "int"
+        }
+      }
+    },
+    {
+      "name": "mapOfUnion",
+      "type": {
+        "type": "map",
+        "values": ["null", "string", "int"]
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_structs.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_structs.avsc
new file mode 100644
index 00000000000000..7f5824192d3062
--- /dev/null
+++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_structs.avsc
@@ -0,0 +1,76 @@
+{
+  "type": "record",
+  "name": "StructType",
+  "fields": [
+    {
+      "name": "structField",
+      "type": {
+        "type": "record",
+        "name": "ComplexStruct",
+        "fields": [
+          {
+            "name": "fieldString",
+            "type": "string"
+          },
+          {
+            "name": "fieldInt",
+            "type": "int"
+          },
+          {
+            "name": "fieldBoolean",
+            "type": "boolean"
+          },
+          {
+            "name": "fieldMap",
+            "type": {
+              "type": "map",
+              "values": "string"
+            }
+          },
+          {
+            "name": "fieldRecord",
+            "type": {
+              "type": "record",
+              "name": "NestedRecord",
+              "fields": [
+                {
+                  "name": "nestedField1",
+                  "type": "string"
+                },
+                {
+                  "name": "nestedField2",
+                  "type": "int"
+                }
+              ]
+            }
+          },
+          {
+            "name": "fieldArray",
+            "type": {
+              "type": "array",
+              "items": "string"
+            }
+          },
+          {
+            "name": "fieldUnion",
+            "type": [
+              "null",
+              "string",
+              "int"
+            ]
+          },
+          {
+            "name": "fieldNullableMap",
+            "type": [
+              "null",
+              {
+                "type": "map",
+                "values": "string"
+              }
+            ]
+          }
+        ]
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_unions.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_unions.avsc
new file mode 100644
index 00000000000000..1a35f1cfa0e6d6
--- /dev/null
+++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_unions.avsc
@@ -0,0 +1,60 @@
+{
+  "type": "record",
+  "name": "UnionType",
+  "fields": [
+    {
+      "name": "fieldUnionNullablePrimitives",
+      "type": [
+        "null",
+        "string",
+        "int",
+        "boolean"
+      ]
+    },
+    {
+      "name": "fieldUnionComplexTypes",
+      "type": [
+        "null",
+        {
+          "type": "record",
+          "name": "NestedRecord",
+          "fields": [
+            {
+              "name": "nestedField1",
+              "type": "string"
+            },
+            {
+              "name": "nestedField2",
+              "type": "int"
+            }
+          ]
+        },
+        {
+          "type": "map",
+          "values": "string"
+        }
+      ]
+    },
+    {
+      "name": "fieldUnionPrimitiveAndComplex",
+      "type": [
+        "null",
+        "string",
+        {
+          "type": "record",
+          "name": "ComplexTypeRecord",
+          "fields": [
+            {
+              "name": "complexField1",
+              "type": "string"
+            },
+            {
+              "name": "complexField2",
+              "type": "int"
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/logical_types.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/logical_types.avsc
new file mode 100644
index 00000000000000..24919d82149653
--- /dev/null
+++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/logical_types.avsc
@@ -0,0 +1,72 @@
+{
+  "type": "record",
+  "name": "LogicalTypes",
+  "fields": [
+    {
+      "name": "decimalField",
+      "type": {
+        "type": "bytes",
+        "logicalType": "decimal",
+        "precision": 9,
+        "scale": 2
+      }
+    },
+    {
+      "name": "decimalFieldWithoutScale",
+      "type": {
+        "type": "bytes",
+        "logicalType": "decimal",
+        "precision": 9
+      }
+    },
+    {
+      "name": "decimalFieldWithoutPrecisionAndScale",
+      "type": {
+        "type": "bytes",
+        "logicalType": "decimal"
+      }
+    },
+    {
+      "name": "timestampMillisField",
+      "type": {
+        "type": "long",
+        "logicalType": "timestamp-millis"
+      }
+    },
+    {
+      "name": "timestampMicrosField",
+      "type": {
+        "type": "long",
+        "logicalType": "timestamp-micros"
+      }
+    },
+    {
+      "name": "dateField",
+      "type": {
+        "type": "int",
+        "logicalType": "date"
+      }
+    },
+    {
+      "name": "timeMillisField",
+      "type": {
+        "type": "int",
+        "logicalType": "time-millis"
+      }
+    },
+    {
+      "name": "timeMicrosField",
+      "type": {
+        "type": "long",
+        "logicalType": "time-micros"
+      }
+    },
+    {
+      "name": "uuidField",
+      "type": {
+        "type": "string",
+        "logicalType": "uuid"
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/primitive_types.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/primitive_types.avsc
new file mode 100644
index 00000000000000..c618299748fab1
--- /dev/null
+++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/primitive_types.avsc
@@ -0,0 +1,62 @@
+{
+  "type": "record",
+  "name": "PrimitiveType",
+  "fields": [
+    {
+      "name": "intField",
+      "type": "int"
+    },
+    {
+      "name": "intFieldV2",
+      "type": ["int"]
+    },
+    {
+      "name": "nullField",
+      "type": "null"
+    },
+    {
+      "name": "nullFieldV2",
+      "type": ["null"]
+    },
+    {
+      "name": "longField",
+      "type": "long"
+    },
+    {
+      "name": "floatField",
+      "type": "float"
+    },
+    {
+      "name": "doubleField",
+      "type": "double"
+    },
+    {
+      "name": "stringField",
+      "type": "string"
+    },
+    {
+      "name": "booleanField",
+      "type": "boolean"
+    },
+    {
+      "name": "nullableIntField",
+      "type": ["null", "int"]
+    },
+    {
+      "name": "nullableLongField",
+      "type": ["null", "long"]
+    },
+    {
+      "name": "nullableStringField",
+      "type": ["null", "string"]
+    },
+    {
+      "name": "status",
+      "type": {
+        "type": "enum",
+        "name": "StatusEnum",
+        "symbols": ["ACTIVE", "INACTIVE", "PENDING"]
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/users_record.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/users_record.avsc
new file mode 100644
index 00000000000000..bd46ae715a4810
--- /dev/null
+++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/users_record.avsc
@@ -0,0 +1,195 @@
+{
+  "type": "record",
+  "name": "users_record",
+  "namespace": "hoodie.users",
+  "fields": [
+    {
+      "name": "_hoodie_commit_time",
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "",
+      "default": null
+    },
+    {
+      "name": "_hoodie_commit_seqno",
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "",
+      "default": null
+    },
+    {
+      "name": "_hoodie_record_key",
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "",
+      "default": null
+    },
+    {
+      "name": "_hoodie_partition_path",
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "",
+      "default": null
+    },
+    {
+      "name": "_hoodie_file_name",
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "",
+      "default": null
+    },
+    {
+      "name": "user_id",
+      "type": "string"
+    },
+    {
+      "name": "name",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    },
+    {
+      "name": "address",
+      "type": [
+        "null",
+        {
+          "type": "record",
+          "name": "address",
+          "namespace": "hoodie.users.users_record",
+          "fields": [
+            {
+              "name": "street",
+              "type": [
+                "null",
+                "string"
+              ],
+              "default": null
+            },
+            {
+              "name": "city",
+              "type": [
+                "null",
+                "string"
+              ],
+              "default": null
+            },
+            {
+              "name": "country",
+              "type": [
+                "null",
+                "string"
+              ],
+              "default": null
+            },
+            {
+              "name": "postal_code",
+              "type": [
+                "null",
+                "string"
+              ],
+              "default": null
+            },
+            {
+              "name": "created_at",
+              "type": [
+                "null",
+                {
+                  "type": "long",
+                  "logicalType": "timestamp-micros"
+                }
+              ],
+              "default": null
+            }
+          ]
+        }
+      ],
+      "default": null
+    },
+    {
+      "name": "contact",
+      "type": [
+        "null",
+        {
+          "type": "record",
+          "name": "contact",
+          "namespace": "hoodie.users.users_record",
+          "fields": [
+            {
+              "name": "email",
+              "type": [
+                "null",
+                "string"
+              ],
+              "default": null
+            },
+            {
+              "name": "phone",
+              "type": [
+                "null",
+                "string"
+              ],
+              "default": null
+            }
+          ]
+        }
+      ],
+      "default": null
+    },
+    {
+      "name": "created_at",
+      "type": [
+        "null",
+        {
+          "type": "long",
+          "logicalType": "timestamp-micros"
+        }
+      ],
+      "default": null
+    },
+    {
+      "name": "updated_at",
+      "type": [
+        "null",
+        {
+          "type": "long",
+          "logicalType": "timestamp-micros"
+        }
+      ],
+      "default": null
+    },
+    {
+      "name": "props",
+      "type": [
+        "null",
+        {
+          "type": "map",
+          "values": [
+            "null",
+            "int"
+          ]
+        }
+      ],
+      "default": null
+    },
+    {
+      "name": "country",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    }
+  ]
+}
\ No newline at end of file
diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
index 9f57d36f800de3..a3099b9ee21ea4 100644
--- a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
+++ b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
@@ -16,7 +16,7 @@
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
 import com.linkedin.dataset.DatasetProperties;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.batch.MCPItem;
 import com.linkedin.metadata.aspect.patch.GenericJsonPatch;
@@ -56,7 +56,7 @@
 
 public class AspectsBatchImplTest {
   private EntityRegistry testRegistry;
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeTest
@@ -75,12 +75,12 @@ public void beforeTest() throws EntityRegistryException {
 
   @BeforeMethod
   public void setup() {
-    this.mockAspectRetriever = mock(AspectRetriever.class);
+    this.mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(this.mockAspectRetriever.getEntityRegistry()).thenReturn(testRegistry);
     this.retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mock(SearchRetriever.class))
-            .aspectRetriever(mockAspectRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
             .graphRetriever(mock(GraphRetriever.class))
             .build();
   }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
index 99eadd223acd1a..82bc0ae1409c52 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
@@ -137,7 +137,7 @@ public static List<MCPItem> getAdditionalChanges(
                               getProposalFromAspectForDefault(
                                   entry.getKey(), entry.getValue(), entityKeyAspect, templateItem),
                               templateItem.getAuditStamp(),
-                              opContext.getAspectRetrieverOpt().get()))
+                              opContext.getAspectRetriever()))
                   .filter(Objects::nonNull);
             })
         .collect(Collectors.toList());
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
index bba8324d0c5612..669ec751f87c69 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
@@ -35,7 +35,7 @@ public EntityRegistry getEntityRegistry() {
   @Override
   public Aspect getLatestAspectObject(@Nonnull Urn urn, @Nonnull String aspectName) {
     try {
-      return entityClient.getLatestAspectObject(systemOperationContext, urn, aspectName);
+      return entityClient.getLatestAspectObject(systemOperationContext, urn, aspectName, false);
     } catch (RemoteInvocationException | URISyntaxException e) {
       throw new RuntimeException(e);
     }
@@ -49,7 +49,7 @@ public Map<Urn, Map<String, Aspect>> getLatestAspectObjects(
       return Map.of();
     } else {
       try {
-        return entityClient.getLatestAspects(systemOperationContext, urns, aspectNames);
+        return entityClient.getLatestAspects(systemOperationContext, urns, aspectNames, false);
       } catch (RemoteInvocationException | URISyntaxException e) {
         throw new RuntimeException(e);
       }
@@ -70,7 +70,8 @@ public Map<Urn, Map<String, SystemAspect>> getLatestSystemAspects(
             urnAspectNames.keySet(),
             urnAspectNames.values().stream()
                 .flatMap(Collection::stream)
-                .collect(Collectors.toSet()));
+                .collect(Collectors.toSet()),
+            false);
       } catch (RemoteInvocationException | URISyntaxException e) {
         throw new RuntimeException(e);
       }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
index 29faa3955ea662..3d35f5956b0f4f 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
@@ -106,11 +106,17 @@ public EntityResponse getV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Urn urn,
-      @Nullable final Set<String> aspectNames)
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     final Set<String> projectedAspects =
         aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames;
-    return entityService.getEntityV2(opContext, entityName, urn, projectedAspects);
+    return entityService.getEntityV2(
+        opContext,
+        entityName,
+        urn,
+        projectedAspects,
+        alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect);
   }
 
   @Override
@@ -126,7 +132,8 @@ public Map<Urn, EntityResponse> batchGetV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull Set<Urn> urns,
-      @Nullable Set<String> aspectNames)
+      @Nullable Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     final Set<String> projectedAspects =
         aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames;
@@ -139,7 +146,11 @@ public Map<Urn, EntityResponse> batchGetV2(
               try {
                 responseMap.putAll(
                     entityService.getEntitiesV2(
-                        opContext, entityName, new HashSet<>(batch), projectedAspects));
+                        opContext,
+                        entityName,
+                        new HashSet<>(batch),
+                        projectedAspects,
+                        alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect));
               } catch (URISyntaxException e) {
                 throw new RuntimeException(e);
               }
@@ -772,7 +783,7 @@ public List<String> batchIngestProposals(
                       .mcps(
                           batch,
                           auditStamp,
-                          opContext.getRetrieverContext().get(),
+                          opContext.getRetrieverContext(),
                           opContext.getValidationContext().isAlternateValidation())
                       .build();
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
index eda9b3a880228f..1d2fd422d7f460 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
@@ -89,6 +89,6 @@ public Map<Urn, EntityResponse> batchGetV2NoCache(
       @Nonnull Set<Urn> urns,
       @Nullable Set<String> aspectNames)
       throws RemoteInvocationException, URISyntaxException {
-    return super.batchGetV2(opContext, entityName, urns, aspectNames);
+    return super.batchGetV2(opContext, entityName, urns, aspectNames, false);
   }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
index 626a1f72f5fb73..50cf8af30d606a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
@@ -5,7 +5,7 @@
 
 import com.linkedin.common.urn.Urn;
 import com.linkedin.entity.Aspect;
-import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.AspectRetriever;
 import com.linkedin.metadata.aspect.SystemAspect;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import io.datahubproject.metadata.context.OperationContext;
@@ -22,7 +22,7 @@
 
 @Getter
 @Builder
-public class EntityServiceAspectRetriever implements CachingAspectRetriever {
+public class EntityServiceAspectRetriever implements AspectRetriever {
 
   @Setter private OperationContext systemOperationContext;
   private final EntityRegistry entityRegistry;
@@ -46,7 +46,8 @@ public Map<Urn, Map<String, com.linkedin.entity.Aspect>> getLatestAspectObjects(
       String entityName = urns.stream().findFirst().map(Urn::getEntityType).get();
       try {
         return entityResponseToAspectMap(
-            entityService.getEntitiesV2(systemOperationContext, entityName, urns, aspectNames));
+            entityService.getEntitiesV2(
+                systemOperationContext, entityName, urns, aspectNames, false));
       } catch (URISyntaxException e) {
         throw new RuntimeException(e);
       }
@@ -71,7 +72,8 @@ public Map<Urn, Map<String, SystemAspect>> getLatestSystemAspects(
                 urnAspectNames.keySet(),
                 urnAspectNames.values().stream()
                     .flatMap(Collection::stream)
-                    .collect(Collectors.toSet())),
+                    .collect(Collectors.toSet()),
+                false),
             entityRegistry);
       } catch (URISyntaxException e) {
         throw new RuntimeException(e);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
index 6de7784bfbc0ec..8ae09111204cab 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
@@ -261,8 +261,7 @@ public Map<Urn, List<RecordTemplate>> getLatestAspects(
     }
 
     List<SystemAspect> systemAspects =
-        EntityUtils.toSystemAspects(
-            opContext.getRetrieverContext().get(), batchGetResults.values());
+        EntityUtils.toSystemAspects(opContext.getRetrieverContext(), batchGetResults.values());
 
     systemAspects.stream()
         // for now, don't add the key aspect here we have already added it above
@@ -290,8 +289,7 @@ public Map<String, RecordTemplate> getLatestAspectsForUrn(
     Map<EntityAspectIdentifier, EntityAspect> batchGetResults =
         getLatestAspect(opContext, new HashSet<>(Arrays.asList(urn)), aspectNames, forUpdate);
 
-    return EntityUtils.toSystemAspects(
-            opContext.getRetrieverContext().get(), batchGetResults.values())
+    return EntityUtils.toSystemAspects(opContext.getRetrieverContext(), batchGetResults.values())
         .stream()
         .map(
             systemAspect -> Pair.of(systemAspect.getAspectName(), systemAspect.getRecordTemplate()))
@@ -335,7 +333,7 @@ public Pair<RecordTemplate, Long> getAspectVersionPair(
     final Optional<EntityAspect> maybeAspect = Optional.ofNullable(aspectDao.getAspect(primaryKey));
 
     return Pair.of(
-        EntityUtils.toSystemAspect(opContext.getRetrieverContext().get(), maybeAspect.orElse(null))
+        EntityUtils.toSystemAspect(opContext.getRetrieverContext(), maybeAspect.orElse(null))
             .map(SystemAspect::getRecordTemplate)
             .orElse(null),
         version);
@@ -721,7 +719,7 @@ public ListResult<RecordTemplate> listLatestAspects(
     }
 
     return new ListResult<>(
-        EntityUtils.toSystemAspects(opContext.getRetrieverContext().get(), entityAspects).stream()
+        EntityUtils.toSystemAspects(opContext.getRetrieverContext(), entityAspects).stream()
             .map(SystemAspect::getRecordTemplate)
             .collect(Collectors.toList()),
         aspectMetadataList.getMetadata(),
@@ -758,12 +756,12 @@ public List<UpdateAspectResult> ingestAspects(
                         .recordTemplate(pair.getValue())
                         .systemMetadata(systemMetadata)
                         .auditStamp(auditStamp)
-                        .build(opContext.getAspectRetrieverOpt().get()))
+                        .build(opContext.getAspectRetriever()))
             .collect(Collectors.toList());
     return ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -815,13 +813,13 @@ private void processPostCommitMCLSideEffects(
     log.debug("Considering {} MCLs post commit side effects.", mcls.size());
     List<MCLItem> batch =
         mcls.stream()
-            .map(mcl -> MCLItemImpl.builder().build(mcl, opContext.getAspectRetrieverOpt().get()))
+            .map(mcl -> MCLItemImpl.builder().build(mcl, opContext.getAspectRetriever()))
             .collect(Collectors.toList());
 
     Iterable<List<MCPItem>> iterable =
         () ->
             Iterators.partition(
-                AspectsBatch.applyPostMCPSideEffects(batch, opContext.getRetrieverContext().get())
+                AspectsBatch.applyPostMCPSideEffects(batch, opContext.getRetrieverContext())
                     .iterator(),
                 MCP_SIDE_EFFECT_KAFKA_BATCH_SIZE);
     StreamSupport.stream(iterable.spliterator(), false)
@@ -831,7 +829,7 @@ private void processPostCommitMCLSideEffects(
                   ingestProposalAsync(
                           AspectsBatchImpl.builder()
                               .items(sideEffects)
-                              .retrieverContext(opContext.getRetrieverContext().get())
+                              .retrieverContext(opContext.getRetrieverContext())
                               .build())
                       .count();
               log.info("Generated {} MCP SideEffects for async processing", count);
@@ -879,8 +877,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB(
                   aspectDao.getLatestAspects(urnAspects, true);
 
               final Map<String, Map<String, SystemAspect>> batchAspects =
-                  EntityUtils.toSystemAspects(
-                      opContext.getRetrieverContext().get(), databaseAspects);
+                  EntityUtils.toSystemAspects(opContext.getRetrieverContext(), databaseAspects);
 
               // read #2 (potentially)
               final Map<String, Map<String, Long>> nextVersions =
@@ -903,7 +900,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB(
 
                 Map<String, Map<String, SystemAspect>> newLatestAspects =
                     EntityUtils.toSystemAspects(
-                        opContext.getRetrieverContext().get(),
+                        opContext.getRetrieverContext(),
                         aspectDao.getLatestAspects(updatedItems.getFirst(), true));
                 // merge
                 updatedLatestAspects = AspectsBatch.merge(batchAspects, newLatestAspects);
@@ -941,7 +938,7 @@ private List<UpdateAspectResult> ingestAspectsToLocalDB(
 
               // do final pre-commit checks with previous aspect value
               ValidationExceptionCollection exceptions =
-                  AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext().get());
+                  AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext());
 
               if (exceptions.hasFatalExceptions()) {
                 // IF this is a client request/API request we fail the `transaction batch`
@@ -1143,8 +1140,8 @@ public RecordTemplate ingestAspectIfNotPresent(
                     .recordTemplate(newValue)
                     .systemMetadata(systemMetadata)
                     .auditStamp(auditStamp)
-                    .build(opContext.getAspectRetrieverOpt().get()),
-                opContext.getRetrieverContext().get())
+                    .build(opContext.getAspectRetriever()),
+                opContext.getRetrieverContext())
             .build();
     List<UpdateAspectResult> ingested = ingestAspects(opContext, aspectsBatch, true, false);
 
@@ -1169,7 +1166,7 @@ public IngestResult ingestProposal(
     return ingestProposal(
             opContext,
             AspectsBatchImpl.builder()
-                .mcps(List.of(proposal), auditStamp, opContext.getRetrieverContext().get())
+                .mcps(List.of(proposal), auditStamp, opContext.getRetrieverContext())
                 .build(),
             async)
         .stream()
@@ -1246,7 +1243,7 @@ private Stream<IngestResult> ingestTimeseriesProposal(
                           .recordTemplate(
                               EntityApiUtils.buildKeyAspect(
                                   opContext.getEntityRegistry(), item.getUrn()))
-                          .build(opContext.getAspectRetrieverOpt().get()))
+                          .build(opContext.getAspectRetriever()))
               .collect(Collectors.toList());
 
       ingestProposalSync(
@@ -1469,7 +1466,7 @@ public List<RestoreIndicesResult> restoreIndices(
 
                 List<SystemAspect> systemAspects =
                     EntityUtils.toSystemAspectFromEbeanAspects(
-                        opContext.getRetrieverContext().get(), batch.collect(Collectors.toList()));
+                        opContext.getRetrieverContext(), batch.collect(Collectors.toList()));
 
                 RestoreIndicesResult result = restoreIndices(opContext, systemAspects, logger);
                 result.timeSqlQueryMs = timeSqlQueryMs;
@@ -1513,7 +1510,7 @@ public List<RestoreIndicesResult> restoreIndices(
         long startTime = System.currentTimeMillis();
         List<SystemAspect> systemAspects =
             EntityUtils.toSystemAspects(
-                opContext.getRetrieverContext().get(),
+                opContext.getRetrieverContext(),
                 getLatestAspect(opContext, entityBatch.getValue(), aspectNames, false).values());
         long timeSqlQueryMs = System.currentTimeMillis() - startTime;
 
@@ -1649,12 +1646,12 @@ private RestoreIndicesResult restoreIndices(
                   .auditStamp(auditStamp)
                   .systemMetadata(latestSystemMetadata)
                   .recordTemplate(EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn))
-                  .build(opContext.getAspectRetrieverOpt().get()));
+                  .build(opContext.getAspectRetriever()));
       Stream<IngestResult> defaultAspectsResult =
           ingestProposalSync(
               opContext,
               AspectsBatchImpl.builder()
-                  .retrieverContext(opContext.getRetrieverContext().get())
+                  .retrieverContext(opContext.getRetrieverContext())
                   .items(keyAspect)
                   .build());
       defaultAspectsCreated += defaultAspectsResult.count();
@@ -1966,7 +1963,7 @@ private void ingestSnapshotUnion(
 
     AspectsBatchImpl aspectsBatch =
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(
                 aspectRecordsToIngest.stream()
                     .map(
@@ -1977,7 +1974,7 @@ private void ingestSnapshotUnion(
                                 .recordTemplate(pair.getValue())
                                 .auditStamp(auditStamp)
                                 .systemMetadata(systemMetadata)
-                                .build(opContext.getAspectRetrieverOpt().get()))
+                                .build(opContext.getAspectRetriever()))
                     .collect(Collectors.toList()))
             .build();
 
@@ -2128,7 +2125,7 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn)
     }
 
     SystemMetadata latestKeySystemMetadata =
-        EntityUtils.toSystemAspect(opContext.getRetrieverContext().get(), latestKey)
+        EntityUtils.toSystemAspect(opContext.getRetrieverContext(), latestKey)
             .map(SystemAspect::getSystemMetadata)
             .get();
     RollbackResult result =
@@ -2253,11 +2250,11 @@ private RollbackResult deleteAspectWithoutMCL(
             .urn(entityUrn)
             .aspectName(aspectName)
             .auditStamp(auditStamp)
-            .build(opContext.getAspectRetrieverOpt().get());
+            .build(opContext.getAspectRetriever());
 
     // Delete validation hooks
     ValidationExceptionCollection exceptions =
-        AspectsBatch.validateProposed(List.of(deleteItem), opContext.getRetrieverContext().get());
+        AspectsBatch.validateProposed(List.of(deleteItem), opContext.getRetrieverContext());
     if (!exceptions.isEmpty()) {
       throw new ValidationException(collectMetrics(exceptions).toString());
     }
@@ -2271,7 +2268,7 @@ private RollbackResult deleteAspectWithoutMCL(
               final EntityAspect.EntitySystemAspect latest =
                   (EntityAspect.EntitySystemAspect)
                       EntityUtils.toSystemAspect(
-                              opContext.getRetrieverContext().get(),
+                              opContext.getRetrieverContext(),
                               aspectDao.getLatestAspect(urn, aspectName, false))
                           .orElse(null);
 
@@ -2299,7 +2296,7 @@ private RollbackResult deleteAspectWithoutMCL(
                 EntityAspect.EntitySystemAspect candidateAspect =
                     (EntityAspect.EntitySystemAspect)
                         EntityUtils.toSystemAspect(
-                                opContext.getRetrieverContext().get(),
+                                opContext.getRetrieverContext(),
                                 aspectDao.getAspect(urn, aspectName, maxVersion))
                             .orElse(null);
                 SystemMetadata previousSysMetadata =
@@ -2325,13 +2322,9 @@ private RollbackResult deleteAspectWithoutMCL(
                                       .urn(UrnUtils.getUrn(toDelete.getUrn()))
                                       .aspectName(toDelete.getAspect())
                                       .auditStamp(auditStamp)
-                                      .build(
-                                          opContext
-                                              .getRetrieverContext()
-                                              .get()
-                                              .getAspectRetriever()))
+                                      .build(opContext.getAspectRetriever()))
                           .collect(Collectors.toList()),
-                      opContext.getRetrieverContext().get());
+                      opContext.getRetrieverContext());
               if (!preCommitExceptions.isEmpty()) {
                 throw new ValidationException(collectMetrics(preCommitExceptions).toString());
               }
@@ -2509,7 +2502,7 @@ private Map<EntityAspectIdentifier, EnvelopedAspect> getEnvelopedAspects(
     final Map<EntityAspectIdentifier, EntityAspect> dbEntries = aspectDao.batchGet(dbKeys, false);
 
     List<SystemAspect> envelopedAspects =
-        EntityUtils.toSystemAspects(opContext.getRetrieverContext().get(), dbEntries.values());
+        EntityUtils.toSystemAspects(opContext.getRetrieverContext(), dbEntries.values());
 
     return envelopedAspects.stream()
         .collect(
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
index 3c4109970e9d0b..da48a2b76d6d56 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
@@ -72,7 +72,7 @@ public static void ingestChangeProposals(
     entityService.ingestProposal(
         opContext,
         AspectsBatchImpl.builder()
-            .mcps(changes, getAuditStamp(actor), opContext.getRetrieverContext().get())
+            .mcps(changes, getAuditStamp(actor), opContext.getRetrieverContext())
             .build(),
         async);
   }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
index ccc1910ba5cdbd..c595e3e07b8342 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
@@ -64,7 +64,7 @@ protected AspectsBatch buildAspectsBatch(
       List<MetadataChangeProposal> mcps,
       @Nonnull AuditStamp auditStamp) {
     return AspectsBatchImpl.builder()
-        .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
+        .mcps(mcps, auditStamp, opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
index bd6cc67561b883..ea580a97c51886 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
@@ -93,8 +93,14 @@ public class EbeanAspectDao implements AspectDao, AspectMigrationsDao {
    */
   private final LoadingCache<String, Lock> locks;
 
+  private final String batchGetMethod;
+
   public EbeanAspectDao(@Nonnull final Database server, EbeanConfiguration ebeanConfiguration) {
     _server = server;
+    this.batchGetMethod =
+        ebeanConfiguration.getBatchGetMethod() != null
+            ? ebeanConfiguration.getBatchGetMethod()
+            : "IN";
     if (ebeanConfiguration.getLocking().isEnabled()) {
       this.locks =
           CacheBuilder.newBuilder()
@@ -371,23 +377,37 @@ private List<EbeanAspectV2> batchGet(
 
     final int totalPageCount = QueryUtils.getTotalPageCount(keys.size(), keysCount);
     final List<EbeanAspectV2> finalResult =
-        batchGetUnion(new ArrayList<>(keys), keysCount, position, forUpdate);
+        batchGetSelectString(new ArrayList<>(keys), keysCount, position, forUpdate);
 
     while (QueryUtils.hasMore(position, keysCount, totalPageCount)) {
       position += keysCount;
       final List<EbeanAspectV2> oneStatementResult =
-          batchGetUnion(new ArrayList<>(keys), keysCount, position, forUpdate);
+          batchGetSelectString(new ArrayList<>(keys), keysCount, position, forUpdate);
       finalResult.addAll(oneStatementResult);
     }
 
     return finalResult;
   }
 
+  @Nonnull
+  private List<EbeanAspectV2> batchGetSelectString(
+      @Nonnull final List<EbeanAspectV2.PrimaryKey> keys,
+      final int keysCount,
+      final int position,
+      boolean forUpdate) {
+
+    if (batchGetMethod.equals("IN")) {
+      return batchGetIn(keys, keysCount, position, forUpdate);
+    }
+
+    return batchGetUnion(keys, keysCount, position, forUpdate);
+  }
+
   /**
    * Builds a single SELECT statement for batch get, which selects one entity, and then can be
    * UNION'd with other SELECT statements.
    */
-  private String batchGetSelect(
+  private String batchGetSelectString(
       final int selectId,
       @Nonnull final String urn,
       @Nonnull final String aspect,
@@ -434,7 +454,7 @@ private List<EbeanAspectV2> batchGetUnion(
     final Map<String, Object> params = new HashMap<>();
     for (int index = position; index < end; index++) {
       sb.append(
-          batchGetSelect(
+          batchGetSelectString(
               index - position,
               keys.get(index).getUrn(),
               keys.get(index).getAspect(),
@@ -467,6 +487,65 @@ private List<EbeanAspectV2> batchGetUnion(
     return query.findList();
   }
 
+  @Nonnull
+  private List<EbeanAspectV2> batchGetIn(
+      @Nonnull final List<EbeanAspectV2.PrimaryKey> keys,
+      final int keysCount,
+      final int position,
+      boolean forUpdate) {
+    validateConnection();
+
+    // Build a single SELECT with IN clause using composite key comparison
+    // Query will look like:
+    // SELECT * FROM metadata_aspect WHERE (urn, aspect, version) IN
+    // (('urn0', 'aspect0', 0), ('urn1', 'aspect1', 1))
+    final StringBuilder sb = new StringBuilder();
+    sb.append(
+        "SELECT urn, aspect, version, metadata, systemMetadata, createdOn, createdBy, createdFor ");
+    sb.append("FROM metadata_aspect_v2 WHERE (urn, aspect, version) IN (");
+
+    final int end = Math.min(keys.size(), position + keysCount);
+    final Map<String, Object> params = new HashMap<>();
+
+    for (int index = position; index < end; index++) {
+      int paramIndex = index - position;
+      String urnParam = "urn" + paramIndex;
+      String aspectParam = "aspect" + paramIndex;
+      String versionParam = "version" + paramIndex;
+
+      params.put(urnParam, keys.get(index).getUrn());
+      params.put(aspectParam, keys.get(index).getAspect());
+      params.put(versionParam, keys.get(index).getVersion());
+
+      sb.append("(:" + urnParam + ", :" + aspectParam + ", :" + versionParam + ")");
+
+      if (index != end - 1) {
+        sb.append(",");
+      }
+    }
+
+    sb.append(")");
+
+    if (forUpdate) {
+      sb.append(" FOR UPDATE");
+    }
+
+    final RawSql rawSql =
+        RawSqlBuilder.parse(sb.toString())
+            .columnMapping(EbeanAspectV2.URN_COLUMN, "key.urn")
+            .columnMapping(EbeanAspectV2.ASPECT_COLUMN, "key.aspect")
+            .columnMapping(EbeanAspectV2.VERSION_COLUMN, "key.version")
+            .create();
+
+    final Query<EbeanAspectV2> query = _server.find(EbeanAspectV2.class).setRawSql(rawSql);
+
+    for (Map.Entry<String, Object> param : params.entrySet()) {
+      query.setParameter(param.getKey(), param.getValue());
+    }
+
+    return query.findList();
+  }
+
   @Override
   @Nonnull
   public ListResult<String> listUrns(
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
index 49fa555e006f61..74d0d8b0964de0 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
@@ -59,7 +59,7 @@ protected AspectsBatch buildAspectsBatch(
       List<MetadataChangeProposal> mcps,
       @Nonnull AuditStamp auditStamp) {
     return AspectsBatchImpl.builder()
-        .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
+        .mcps(mcps, auditStamp, opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
index 367705d369c7ce..6c5c6243d33620 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
@@ -143,7 +143,7 @@ private static QueryBuilder expandTerms(
     if (!queryUrns.isEmpty()) {
 
       scrollGraph(
-          opContext.getRetrieverContext().get().getGraphRetriever(),
+          opContext.getRetrieverContext().getGraphRetriever(),
           queryUrns,
           relationshipTypes,
           relationshipDirection,
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
index 4bb8e0630de480..b4ad847cb7afc2 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
@@ -437,8 +437,6 @@ private void setStructuredPropertiesSearchValue(
 
     Map<Urn, Map<String, Aspect>> definitions =
         opContext
-            .getRetrieverContext()
-            .get()
             .getAspectRetriever()
             .getLatestAspectObjects(
                 propertyMap.keySet(), Set.of(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME));
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
index ad2825ead3d0da..4a692e95346222 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
@@ -112,7 +112,7 @@ private void fetchRelatedEntities(
       @Nullable String scrollId,
       int consumedEntityCount,
       int batchNumber) {
-    GraphRetriever graph = opContext.getRetrieverContext().get().getGraphRetriever();
+    GraphRetriever graph = opContext.getRetrieverContext().getGraphRetriever();
     final ArrayList<Future<ExecutionResult>> futureList = new ArrayList<>();
     RelatedEntitiesScrollResult result =
         graph.scrollRelatedEntities(
@@ -165,7 +165,7 @@ private Callable<ExecutionResult> processBatch(
     return () -> {
       StopWatch stopWatch = new StopWatch();
       stopWatch.start();
-      AspectRetriever aspectRetriever = opContext.getAspectRetrieverOpt().get();
+      AspectRetriever aspectRetriever = opContext.getAspectRetriever();
       log.info("Batch {} for BA:{} started", batchNumber, entityKey);
       ExecutionResult executionResult = new ExecutionResult();
       executionResult.setBatchNumber(batchNumber);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
index efe073fc00dfdc..4b09bc00efb61a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
@@ -94,8 +94,7 @@ public UpdateGraphIndicesService(
   public void handleChangeEvent(
       @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) {
     try {
-      MCLItemImpl mclItem =
-          MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get());
+      MCLItemImpl mclItem = MCLItemImpl.builder().build(event, opContext.getAspectRetriever());
 
       if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) {
         handleUpdateChangeEvent(opContext, mclItem);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
index 187ef3e8c62290..c5fc9ebdac9fa6 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
@@ -121,11 +121,10 @@ public UpdateIndicesService(
   public void handleChangeEvent(
       @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) {
     try {
-      MCLItemImpl batch =
-          MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get());
+      MCLItemImpl batch = MCLItemImpl.builder().build(event, opContext.getAspectRetriever());
 
       Stream<MCLItem> sideEffects =
-          AspectsBatch.applyMCLSideEffects(List.of(batch), opContext.getRetrieverContext().get());
+          AspectsBatch.applyMCLSideEffects(List.of(batch), opContext.getRetrieverContext());
 
       for (MCLItem mclItem :
           Stream.concat(Stream.of(batch), sideEffects).collect(Collectors.toList())) {
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
index 12b12cf105196e..fa6ab7932001b6 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
@@ -46,12 +46,12 @@ public static Map<Urn, CorpUserKey> ingestCorpUserKeyAspects(
               .recordTemplate(aspect)
               .auditStamp(AspectGenerationUtils.createAuditStamp())
               .systemMetadata(AspectGenerationUtils.createSystemMetadata())
-              .build(opContext.getAspectRetrieverOpt().get()));
+              .build(opContext.getAspectRetriever()));
     }
     entityService.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -83,12 +83,12 @@ public static Map<Urn, CorpUserInfo> ingestCorpUserInfoAspects(
               .recordTemplate(aspect)
               .auditStamp(AspectGenerationUtils.createAuditStamp())
               .systemMetadata(AspectGenerationUtils.createSystemMetadata())
-              .build(opContext.getAspectRetrieverOpt().get()));
+              .build(opContext.getAspectRetriever()));
     }
     entityService.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -121,12 +121,12 @@ public static Map<Urn, ChartInfo> ingestChartInfoAspects(
               .recordTemplate(aspect)
               .auditStamp(AspectGenerationUtils.createAuditStamp())
               .systemMetadata(AspectGenerationUtils.createSystemMetadata())
-              .build(opContext.getAspectRetrieverOpt().get()));
+              .build(opContext.getAspectRetriever()));
     }
     entityService.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
index 11a3153abcaeed..19be1eb14667d8 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
@@ -16,7 +16,8 @@
 import com.linkedin.data.template.StringMap;
 import com.linkedin.dataset.DatasetProperties;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.batch.MCPItem;
 import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
 import com.linkedin.metadata.entity.SearchRetriever;
@@ -28,7 +29,6 @@
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.test.metadata.aspect.TestEntityRegistry;
 import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
 import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -53,17 +53,17 @@ public class IgnoreUnknownMutatorTest {
   private static final Urn TEST_DATASET_URN =
       UrnUtils.getUrn(
           "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)");
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeMethod
   public void setup() {
-    mockAspectRetriever = mock(AspectRetriever.class);
+    mockAspectRetriever = mock(CachingAspectRetriever.class);
     retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mock(SearchRetriever.class))
-            .aspectRetriever(mockAspectRetriever)
-            .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
+            .graphRetriever(GraphRetriever.EMPTY)
             .build();
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
index 04aff4edf456d9..e7ed2671131592 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
@@ -56,8 +56,7 @@ public void testAdditionalChanges() {
         DefaultAspectsUtil.getAdditionalChanges(
                 opContext,
                 AspectsBatchImpl.builder()
-                    .mcps(
-                        List.of(proposal1), new AuditStamp(), opContext.getRetrieverContext().get())
+                    .mcps(List.of(proposal1), new AuditStamp(), opContext.getRetrieverContext())
                     .build()
                     .getMCPItems(),
                 entityServiceImpl,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
index 976b165fea53df..215e1e2431efa0 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
@@ -15,7 +15,7 @@
 import com.linkedin.dataproduct.DataProductAssociationArray;
 import com.linkedin.dataproduct.DataProductProperties;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.SystemAspect;
 import com.linkedin.metadata.aspect.batch.MCPItem;
@@ -75,12 +75,12 @@ public class DataProductUnsetSideEffectTest {
                       .build()))
           .build();
 
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeMethod
   public void setup() {
-    mockAspectRetriever = mock(AspectRetriever.class);
+    mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
     GraphRetriever graphRetriever = mock(GraphRetriever.class);
     RelatedEntities relatedEntities =
@@ -139,7 +139,7 @@ public void setup() {
     retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mock(SearchRetriever.class))
-            .aspectRetriever(mockAspectRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
             .graphRetriever(graphRetriever)
             .build();
   }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
index 0386031cbcad86..88f84ee94c8ee7 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
@@ -19,6 +19,7 @@
 import com.linkedin.metadata.AspectGenerationUtils;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.EbeanTestUtils;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.config.EbeanConfiguration;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.linkedin.metadata.entity.ebean.EbeanAspectDao;
@@ -98,12 +99,15 @@ public void setupTest() {
                             .entityService(_entityServiceImpl)
                             .entityRegistry(_testEntityRegistry)
                             .build())
-                    .graphRetriever(TestOperationContexts.emptyGraphRetriever)
-                    .searchRetriever(TestOperationContexts.emptySearchRetriever)
+                    .cachingAspectRetriever(
+                        TestOperationContexts.emptyActiveUsersAspectRetriever(
+                            () -> _testEntityRegistry))
+                    .graphRetriever(GraphRetriever.EMPTY)
+                    .searchRetriever(SearchRetriever.EMPTY)
                     .build(),
             null,
             opContext ->
-                ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get())
+                ((EntityServiceAspectRetriever) opContext.getAspectRetriever())
                     .setSystemOperationContext(opContext),
             null);
   }
@@ -152,25 +156,25 @@ public void testIngestListLatestAspects() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)),
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
             ChangeItemImpl.builder()
                 .urn(entityUrn2)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect2)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)),
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
             ChangeItemImpl.builder()
                 .urn(entityUrn3)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect3)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)));
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -230,25 +234,25 @@ public void testIngestListUrns() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)),
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
             ChangeItemImpl.builder()
                 .urn(entityUrn2)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect2)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)),
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
             ChangeItemImpl.builder()
                 .urn(entityUrn3)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect3)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(TestOperationContexts.emptyAspectRetriever(null)));
+                .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -310,11 +314,11 @@ public void testSystemMetadataDuplicateKey() throws Exception {
             .recordTemplate(new Status().setRemoved(true))
             .systemMetadata(systemMetadata)
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(item))
             .build(),
         false,
@@ -356,7 +360,7 @@ public void testSystemMetadataDuplicateKey() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(
                 List.of(
                     ChangeItemImpl.builder()
@@ -365,7 +369,7 @@ public void testSystemMetadataDuplicateKey() throws Exception {
                         .recordTemplate(new Status().setRemoved(false))
                         .systemMetadata(systemMetadata)
                         .auditStamp(TEST_AUDIT_STAMP)
-                        .build(TestOperationContexts.emptyAspectRetriever(null))))
+                        .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null))))
             .build(),
         false,
         true);
@@ -600,7 +604,7 @@ public void run() {
           auditStamp.setTime(System.currentTimeMillis());
           AspectsBatchImpl batch =
               AspectsBatchImpl.builder()
-                  .mcps(mcps, auditStamp, operationContext.getRetrieverContext().get())
+                  .mcps(mcps, auditStamp, operationContext.getRetrieverContext())
                   .build();
           entityService.ingestProposal(operationContext, batch, false);
         }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
index 2d59632e6f3c6d..c00632e5cf5424 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
@@ -945,32 +945,32 @@ public void testRollbackAspect() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn2)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect2)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn3)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect3)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1Overwrite)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1037,25 +1037,25 @@ public void testRollbackKey() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(keyAspectName)
                 .recordTemplate(writeKey1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1Overwrite)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1130,39 +1130,39 @@ public void testRollbackUrn() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(keyAspectName)
                 .recordTemplate(writeKey1)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn2)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect2)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn3)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect3)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn1)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1Overwrite)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1208,11 +1208,11 @@ public void testIngestGetLatestAspect() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1264,11 +1264,11 @@ public void testIngestGetLatestAspect() throws AssertionError {
                 .recordTemplate(writeAspect2)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata2)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1320,11 +1320,11 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception {
                 .recordTemplate(writeAspect1)
                 .auditStamp(TEST_AUDIT_STAMP)
                 .systemMetadata(metadata1)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1347,11 +1347,11 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception {
                 .recordTemplate(writeAspect2)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1416,11 +1416,11 @@ public void testIngestSameAspect() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(metadata1)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1472,11 +1472,11 @@ public void testIngestSameAspect() throws AssertionError {
                 .recordTemplate(writeAspect2)
                 .systemMetadata(metadata2)
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1534,46 +1534,46 @@ public void testRetention() throws AssertionError {
                 .recordTemplate(writeAspect1)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1a)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName)
                 .recordTemplate(writeAspect1b)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName2)
                 .recordTemplate(writeAspect2)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName2)
                 .recordTemplate(writeAspect2a)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName2)
                 .recordTemplate(writeAspect2b)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1610,18 +1610,18 @@ public void testRetention() throws AssertionError {
                 .recordTemplate(writeAspect1c)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()),
+                .build(opContext.getAspectRetriever()),
             ChangeItemImpl.builder()
                 .urn(entityUrn)
                 .aspectName(aspectName2)
                 .recordTemplate(writeAspect2c)
                 .systemMetadata(AspectGenerationUtils.createSystemMetadata())
                 .auditStamp(TEST_AUDIT_STAMP)
-                .build(opContext.getAspectRetrieverOpt().get()));
+                .build(opContext.getAspectRetriever()));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(items)
             .build(),
         true,
@@ -1982,8 +1982,7 @@ public void testStructuredPropertyIngestProposal() throws Exception {
           stream
               .map(
                   entityAspect ->
-                      EntityUtils.toSystemAspect(
-                              opContext.getRetrieverContext().get(), entityAspect)
+                      EntityUtils.toSystemAspect(opContext.getRetrieverContext(), entityAspect)
                           .get()
                           .getAspect(StructuredPropertyDefinition.class))
               .collect(Collectors.toSet());
@@ -1995,7 +1994,10 @@ public void testStructuredPropertyIngestProposal() throws Exception {
     SystemEntityClient mockSystemEntityClient = Mockito.mock(SystemEntityClient.class);
     Mockito.when(
             mockSystemEntityClient.getLatestAspectObject(
-                any(OperationContext.class), eq(firstPropertyUrn), eq("propertyDefinition")))
+                any(OperationContext.class),
+                eq(firstPropertyUrn),
+                eq("propertyDefinition"),
+                anyBoolean()))
         .thenReturn(new com.linkedin.entity.Aspect(structuredPropertyDefinition.data()));
 
     // Add a value for that property
@@ -2062,8 +2064,7 @@ public void testStructuredPropertyIngestProposal() throws Exception {
           stream
               .map(
                   entityAspect ->
-                      EntityUtils.toSystemAspect(
-                              opContext.getRetrieverContext().get(), entityAspect)
+                      EntityUtils.toSystemAspect(opContext.getRetrieverContext(), entityAspect)
                           .get()
                           .getAspect(StructuredPropertyDefinition.class))
               .collect(Collectors.toSet());
@@ -2074,7 +2075,10 @@ public void testStructuredPropertyIngestProposal() throws Exception {
 
     Mockito.when(
             mockSystemEntityClient.getLatestAspectObject(
-                any(OperationContext.class), eq(secondPropertyUrn), eq("propertyDefinition")))
+                any(OperationContext.class),
+                eq(secondPropertyUrn),
+                eq("propertyDefinition"),
+                anyBoolean()))
         .thenReturn(new com.linkedin.entity.Aspect(secondDefinition.data()));
 
     // Get existing value for first structured property
@@ -2209,7 +2213,7 @@ public void testBatchDuplicate() throws Exception {
             .recordTemplate(new Status().setRemoved(true))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     ChangeItemImpl item2 =
         ChangeItemImpl.builder()
             .urn(entityUrn)
@@ -2217,11 +2221,11 @@ public void testBatchDuplicate() throws Exception {
             .recordTemplate(new Status().setRemoved(false))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(item1, item2))
             .build(),
         false,
@@ -2269,7 +2273,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
                     .setTags(new TagAssociationArray(new TagAssociation().setTag(tag1))))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
 
     PatchItemImpl patchAdd2 =
         PatchItemImpl.builder()
@@ -2311,7 +2315,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(initialAspectTag1))
             .build(),
         false,
@@ -2320,7 +2324,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(patchAdd2, patchRemoveNonExistent))
             .build(),
         false,
@@ -2368,7 +2372,7 @@ public void testBatchPatchAdd() throws Exception {
                     .setTags(new TagAssociationArray(new TagAssociation().setTag(tag1))))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
 
     PatchItemImpl patchAdd3 =
         PatchItemImpl.builder()
@@ -2428,7 +2432,7 @@ public void testBatchPatchAdd() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(initialAspectTag1))
             .build(),
         false,
@@ -2437,7 +2441,7 @@ public void testBatchPatchAdd() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(patchAdd3, patchAdd2, patchAdd1))
             .build(),
         false,
@@ -2491,7 +2495,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
             .recordTemplate(new GlobalTags().setTags(new TagAssociationArray(initialTags)))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
 
     PatchItemImpl patchAdd2 =
         PatchItemImpl.builder()
@@ -2516,7 +2520,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(initialAspectTag1))
             .build(),
         false,
@@ -2525,7 +2529,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
     _entityServiceImpl.ingestAspects(
         opContext,
         AspectsBatchImpl.builder()
-            .retrieverContext(opContext.getRetrieverContext().get())
+            .retrieverContext(opContext.getRetrieverContext())
             .items(List.of(patchAdd2, patchAdd2)) // duplicate
             .build(),
         false,
@@ -2581,7 +2585,7 @@ public void testPatchRemoveNonExistent() throws Exception {
         _entityServiceImpl.ingestAspects(
             opContext,
             AspectsBatchImpl.builder()
-                .retrieverContext(opContext.getRetrieverContext().get())
+                .retrieverContext(opContext.getRetrieverContext())
                 .items(List.of(patchRemove))
                 .build(),
             false,
@@ -2638,7 +2642,7 @@ public void testPatchAddNonExistent() throws Exception {
         _entityServiceImpl.ingestAspects(
             opContext,
             AspectsBatchImpl.builder()
-                .retrieverContext(opContext.getRetrieverContext().get())
+                .retrieverContext(opContext.getRetrieverContext())
                 .items(List.of(patchAdd))
                 .build(),
             false,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
index 550f55e6bfd0b9..b4fbfecc9d60d3 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
@@ -10,11 +10,13 @@
 import com.linkedin.metadata.AspectGenerationUtils;
 import com.linkedin.metadata.AspectIngestionUtils;
 import com.linkedin.metadata.CassandraTestUtils;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.linkedin.metadata.entity.EntityServiceAspectRetriever;
 import com.linkedin.metadata.entity.EntityServiceImpl;
 import com.linkedin.metadata.entity.EntityServiceTest;
 import com.linkedin.metadata.entity.ListResult;
+import com.linkedin.metadata.entity.SearchRetriever;
 import com.linkedin.metadata.event.EventProducer;
 import com.linkedin.metadata.key.CorpUserKey;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
@@ -93,12 +95,15 @@ private void configureComponents() {
                             .entityService(_entityServiceImpl)
                             .entityRegistry(_testEntityRegistry)
                             .build())
-                    .graphRetriever(TestOperationContexts.emptyGraphRetriever)
-                    .searchRetriever(TestOperationContexts.emptySearchRetriever)
+                    .cachingAspectRetriever(
+                        TestOperationContexts.emptyActiveUsersAspectRetriever(
+                            () -> _testEntityRegistry))
+                    .graphRetriever(GraphRetriever.EMPTY)
+                    .searchRetriever(SearchRetriever.EMPTY)
                     .build(),
             null,
             opContext ->
-                ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get())
+                ((EntityServiceAspectRetriever) opContext.getAspectRetriever())
                     .setSystemOperationContext(opContext),
             null);
   }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
index 3f6b301e72aa5a..0a867ae3c8f2e0 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
@@ -26,7 +26,7 @@ public void testBatchDuplicate() throws Exception {
             .recordTemplate(new Status().setRemoved(true))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     ChangeItemImpl item2 =
         ChangeItemImpl.builder()
             .urn(entityUrn)
@@ -34,7 +34,7 @@ public void testBatchDuplicate() throws Exception {
             .recordTemplate(new Status().setRemoved(false))
             .systemMetadata(systemMetadata.copy())
             .auditStamp(TEST_AUDIT_STAMP)
-            .build(TestOperationContexts.emptyAspectRetriever(null));
+            .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
 
     assertFalse(item1.isDatabaseDuplicateOf(item2));
   }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
index ca42f0327c86db..8f68f119cb0b7d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
@@ -11,6 +11,7 @@
 import com.linkedin.metadata.recommendation.ranker.SimpleRecommendationRanker;
 import io.datahubproject.test.metadata.context.TestOperationContexts;
 import java.net.URISyntaxException;
+import java.nio.file.AccessDeniedException;
 import java.util.List;
 import java.util.stream.Collectors;
 import org.testng.annotations.Test;
@@ -74,7 +75,7 @@ private List<RecommendationContent> getContentFromUrns(List<Urn> urns) {
   }
 
   @Test
-  public void testService() throws URISyntaxException {
+  public void testService() throws URISyntaxException, AccessDeniedException {
     // Test non-eligible and empty
     RecommendationsService service =
         new RecommendationsService(ImmutableList.of(nonEligibleSource, emptySource), ranker);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
index 1661f5f02ee593..fa895cb4540117 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
@@ -21,7 +21,8 @@
 import com.linkedin.data.ByteString;
 import com.linkedin.entity.Aspect;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.batch.MCLItem;
 import com.linkedin.metadata.aspect.batch.MCPItem;
 import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
@@ -46,7 +47,6 @@
 import com.linkedin.test.metadata.aspect.TestEntityRegistry;
 import com.linkedin.test.metadata.aspect.batch.TestMCP;
 import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
@@ -87,18 +87,18 @@ public class SchemaFieldSideEffectTest {
                       .build()))
           .build();
 
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeMethod
   public void setup() {
-    mockAspectRetriever = mock(AspectRetriever.class);
+    mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
     retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mock(SearchRetriever.class))
-            .aspectRetriever(mockAspectRetriever)
-            .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
+            .graphRetriever(GraphRetriever.EMPTY)
             .build();
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
index fd768424e13c19..1825b65a18ab19 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
@@ -20,6 +20,7 @@
 import com.linkedin.metadata.aspect.models.graph.RelatedEntities;
 import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
 import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration;
+import com.linkedin.metadata.entity.SearchRetriever;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.query.SearchFlags;
 import com.linkedin.metadata.query.filter.Condition;
@@ -71,8 +72,10 @@ public void init() {
             () ->
                 io.datahubproject.metadata.context.RetrieverContext.builder()
                     .aspectRetriever(mockAspectRetriever)
+                    .cachingAspectRetriever(
+                        TestOperationContexts.emptyActiveUsersAspectRetriever(() -> entityRegistry))
                     .graphRetriever(mockGraphRetriever)
-                    .searchRetriever(TestOperationContexts.emptySearchRetriever)
+                    .searchRetriever(SearchRetriever.EMPTY)
                     .build(),
             null,
             null,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
index 8741e24b1bca50..de375271ed6602 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
@@ -13,13 +13,14 @@
 import static org.mockito.Mockito.when;
 import static org.testng.Assert.assertEquals;
 
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.RetrieverContext;
 import com.linkedin.metadata.aspect.models.graph.Edge;
 import com.linkedin.metadata.aspect.models.graph.RelatedEntities;
 import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
 import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration;
+import com.linkedin.metadata.entity.SearchRetriever;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.query.SearchFlags;
 import com.linkedin.metadata.query.filter.Condition;
@@ -54,7 +55,7 @@ public class DomainExpansionRewriterTest
   @BeforeMethod
   public void init() {
     EntityRegistry entityRegistry = new TestEntityRegistry();
-    AspectRetriever mockAspectRetriever = mock(AspectRetriever.class);
+    CachingAspectRetriever mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry);
 
     mockGraphRetriever = spy(GraphRetriever.class);
@@ -71,8 +72,10 @@ public void init() {
             () ->
                 io.datahubproject.metadata.context.RetrieverContext.builder()
                     .aspectRetriever(mockAspectRetriever)
+                    .cachingAspectRetriever(
+                        TestOperationContexts.emptyActiveUsersAspectRetriever(() -> entityRegistry))
                     .graphRetriever(mockGraphRetriever)
-                    .searchRetriever(TestOperationContexts.emptySearchRetriever)
+                    .searchRetriever(SearchRetriever.EMPTY)
                     .build(),
             null,
             null,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
index c68997e25bcff7..d6f5f9c3eedbe7 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
@@ -18,6 +18,7 @@
 import com.linkedin.data.template.StringArray;
 import com.linkedin.entity.Aspect;
 import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.annotation.SearchableAnnotation;
@@ -49,8 +50,8 @@
 
 public class AggregationQueryBuilderTest {
 
-  private static AspectRetriever aspectRetriever;
-  private static AspectRetriever aspectRetrieverV1;
+  private static CachingAspectRetriever aspectRetriever;
+  private static CachingAspectRetriever aspectRetrieverV1;
   private static String DEFAULT_FILTER = "_index";
 
   @BeforeClass
@@ -61,7 +62,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException {
         Urn.createFromString("urn:li:structuredProperty:under.scores.and.dots_make_a_mess");
 
     // legacy
-    aspectRetriever = mock(AspectRetriever.class);
+    aspectRetriever = mock(CachingAspectRetriever.class);
     when(aspectRetriever.getEntityRegistry())
         .thenReturn(TestOperationContexts.defaultEntityRegistry());
 
@@ -106,7 +107,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException {
                     new Aspect(structPropUnderscoresAndDotsDefinition.data()))));
 
     // V1
-    aspectRetrieverV1 = mock(AspectRetriever.class);
+    aspectRetrieverV1 = mock(CachingAspectRetriever.class);
     when(aspectRetrieverV1.getEntityRegistry())
         .thenReturn(TestOperationContexts.defaultEntityRegistry());
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
index 393ca3ca5d4a64..e51511699e345a 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
@@ -662,6 +662,7 @@ public void testInvalidStructuredProperty() {
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(TestOperationContexts.emptyActiveUsersAspectRetriever(null))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
index 2c5bcd1294fa15..65b73b7425b743 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
@@ -247,6 +247,9 @@ public void testSetSearchableRefValue() throws URISyntaxException, RemoteInvocat
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(
+                    TestOperationContexts.emptyActiveUsersAspectRetriever(
+                        () -> TEST_ENTITY_REGISTRY))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
@@ -301,6 +304,9 @@ public void testSetSearchableRefValue_RuntimeException()
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(
+                    TestOperationContexts.emptyActiveUsersAspectRetriever(
+                        () -> TEST_ENTITY_REGISTRY))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
@@ -337,6 +343,9 @@ public void testSetSearchableRefValue_RuntimeException_URNExist()
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(
+                    TestOperationContexts.emptyActiveUsersAspectRetriever(
+                        () -> TEST_ENTITY_REGISTRY))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
@@ -369,6 +378,9 @@ void testSetSearchableRefValue_WithInvalidURN()
         TestOperationContexts.systemContextNoSearchAuthorization(
             RetrieverContext.builder()
                 .aspectRetriever(aspectRetriever)
+                .cachingAspectRetriever(
+                    TestOperationContexts.emptyActiveUsersAspectRetriever(
+                        () -> TEST_ENTITY_REGISTRY))
                 .graphRetriever(mock(GraphRetriever.class))
                 .searchRetriever(mock(SearchRetriever.class))
                 .build());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
index b1b716c5604816..9a0a82c7f9f49d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
@@ -18,7 +18,8 @@
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.entity.Aspect;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.aspect.batch.MCPItem;
 import com.linkedin.metadata.aspect.batch.PatchMCP;
 import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
@@ -36,7 +37,6 @@
 import com.linkedin.test.metadata.aspect.TestEntityRegistry;
 import com.linkedin.test.metadata.aspect.batch.TestMCL;
 import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
 import jakarta.json.Json;
 import jakarta.json.JsonPatch;
 import java.util.List;
@@ -76,13 +76,13 @@ public class PropertyDefinitionDeleteSideEffectTest {
   private static final Urn TEST_DATASET_URN =
       UrnUtils.getUrn(
           "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)");
-  private AspectRetriever mockAspectRetriever;
+  private CachingAspectRetriever mockAspectRetriever;
   private SearchRetriever mockSearchRetriever;
   private RetrieverContext retrieverContext;
 
   @BeforeMethod
   public void setup() {
-    mockAspectRetriever = mock(AspectRetriever.class);
+    mockAspectRetriever = mock(CachingAspectRetriever.class);
     when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
     when(mockAspectRetriever.getLatestAspectObject(
             eq(TEST_PROPERTY_URN), eq(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)))
@@ -101,8 +101,8 @@ public void setup() {
     retrieverContext =
         RetrieverContext.builder()
             .searchRetriever(mockSearchRetriever)
-            .aspectRetriever(mockAspectRetriever)
-            .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
+            .graphRetriever(GraphRetriever.EMPTY)
             .build();
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
index 2503faa00f6e71..6e8886f495c95a 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
@@ -58,7 +58,7 @@ public void setup() {
     mockGraphRetriever = Mockito.mock(GraphRetriever.class);
     retrieverContext =
         io.datahubproject.metadata.context.RetrieverContext.builder()
-            .aspectRetriever(mockAspectRetriever)
+            .cachingAspectRetriever(mockAspectRetriever)
             .searchRetriever(mockSearchRetriever)
             .graphRetriever(mockGraphRetriever)
             .build();
diff --git a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
index 3acd2bf3413578..02cd28eb202e94 100644
--- a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
@@ -171,10 +171,7 @@ public Stream<List<MetadataChangeProposal>> generateMCPs(
                         DefaultAspectsUtil.getAdditionalChanges(
                                 opContext,
                                 AspectsBatchImpl.builder()
-                                    .mcps(
-                                        List.of(mcp),
-                                        auditStamp,
-                                        opContext.getRetrieverContext().get())
+                                    .mcps(List.of(mcp), auditStamp, opContext.getRetrieverContext())
                                     .build()
                                     .getMCPItems(),
                                 entityService,
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
index cf9d73dfa729be..f16c9dbd82e749 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
@@ -20,7 +20,6 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import io.datahubproject.metadata.context.OperationContext;
 import io.datahubproject.metadata.context.OperationContextConfig;
-import io.datahubproject.metadata.context.RetrieverContext;
 import io.datahubproject.metadata.context.ServicesRegistryContext;
 import io.datahubproject.metadata.context.ValidationContext;
 import io.datahubproject.test.metadata.context.TestOperationContexts;
@@ -95,7 +94,7 @@ public OperationContext operationContext(
         entityRegistry,
         mock(ServicesRegistryContext.class),
         indexConvention,
-        mock(RetrieverContext.class),
+        TestOperationContexts.emptyActiveUsersRetrieverContext(() -> entityRegistry),
         mock(ValidationContext.class));
   }
 
diff --git a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
index 47740b02d6166c..65ee6b8591f489 100644
--- a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
+++ b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
@@ -93,8 +93,6 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
                 new RelatedEntity(BUSINESS_ATTRIBUTE_OF, SCHEMA_FIELD_URN.toString())));
 
     when(opContext
-            .getRetrieverContext()
-            .get()
             .getAspectRetriever()
             .getLatestAspectObjects(
                 eq(Set.of(SCHEMA_FIELD_URN)), eq(Set.of(BUSINESS_ATTRIBUTE_ASPECT))))
@@ -108,7 +106,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
 
     // verify
     // page 1
-    Mockito.verify(opContext.getRetrieverContext().get().getGraphRetriever(), Mockito.times(1))
+    Mockito.verify(opContext.getRetrieverContext().getGraphRetriever(), Mockito.times(1))
         .scrollRelatedEntities(
             isNull(),
             any(Filter.class),
@@ -122,7 +120,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
             isNull(),
             isNull());
     // page 2
-    Mockito.verify(opContext.getRetrieverContext().get().getGraphRetriever(), Mockito.times(1))
+    Mockito.verify(opContext.getRetrieverContext().getGraphRetriever(), Mockito.times(1))
         .scrollRelatedEntities(
             isNull(),
             any(Filter.class),
@@ -136,7 +134,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
             isNull(),
             isNull());
 
-    Mockito.verifyNoMoreInteractions(opContext.getRetrieverContext().get().getGraphRetriever());
+    Mockito.verifyNoMoreInteractions(opContext.getRetrieverContext().getGraphRetriever());
 
     // 2 pages = 2 ingest proposals
     Mockito.verify(mockUpdateIndicesService, Mockito.times(2))
@@ -152,8 +150,8 @@ private void testMCLOnInvalidCategory() throws Exception {
     businessAttributeServiceHook.handleChangeEvent(opContext, platformEvent);
 
     // verify
-    Mockito.verifyNoInteractions(opContext.getRetrieverContext().get().getGraphRetriever());
-    Mockito.verifyNoInteractions(opContext.getAspectRetrieverOpt().get());
+    Mockito.verifyNoInteractions(opContext.getRetrieverContext().getGraphRetriever());
+    Mockito.verifyNoInteractions(opContext.getAspectRetriever());
     Mockito.verifyNoInteractions(mockUpdateIndicesService);
   }
 
@@ -226,13 +224,15 @@ private OperationContext mockOperationContextWithGraph(List<RelatedEntity> graph
 
     RetrieverContext mockRetrieverContext = mock(RetrieverContext.class);
     when(mockRetrieverContext.getAspectRetriever()).thenReturn(mock(AspectRetriever.class));
+    when(mockRetrieverContext.getCachingAspectRetriever())
+        .thenReturn(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
     when(mockRetrieverContext.getGraphRetriever()).thenReturn(graphRetriever);
 
     OperationContext opContext =
         TestOperationContexts.systemContextNoSearchAuthorization(mockRetrieverContext);
 
     // reset mock for test
-    reset(opContext.getAspectRetrieverOpt().get());
+    reset(opContext.getAspectRetriever());
 
     if (!graphEdges.isEmpty()) {
 
diff --git a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl
index 2f36eda9141abb..1a1dbea4359fbd 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl
@@ -9,9 +9,13 @@ enum PlatformResourceType {
     /**
      * e.g. a Slack member resource, Looker user resource, etc.
      */
-    USER_INFO,
+    USER_INFO, 
     /**
      * e.g. a Slack channel
      */
     CONVERSATION
+    /**
+      * e.g. Looker mapping of all user ids
+      */
+    USER_ID_MAPPING
 }
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
index e65bf22991736d..c08b7fad4dee32 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
@@ -1,12 +1,23 @@
 package io.datahubproject.metadata.context;
 
+import static com.linkedin.metadata.Constants.CORP_USER_KEY_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.CORP_USER_STATUS_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.CORP_USER_STATUS_SUSPENDED;
+import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.SYSTEM_ACTOR;
+
 import com.datahub.authentication.Authentication;
+import com.linkedin.common.Status;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.entity.Aspect;
+import com.linkedin.identity.CorpUserStatus;
+import com.linkedin.metadata.aspect.AspectRetriever;
 import com.linkedin.metadata.authorization.PoliciesConfig;
 import com.linkedin.policy.DataHubPolicyInfo;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import lombok.Builder;
@@ -48,6 +59,43 @@ public Urn getActorUrn() {
     return UrnUtils.getUrn(authentication.getActor().toUrnStr());
   }
 
+  /**
+   * Actor is considered active if the user is not hard-deleted, soft-deleted, and is not suspended
+   *
+   * @param aspectRetriever aspect retriever - ideally the SystemEntityClient backed one for caching
+   * @return active status
+   */
+  public boolean isActive(AspectRetriever aspectRetriever) {
+    // system cannot be disabled
+    if (SYSTEM_ACTOR.equals(authentication.getActor().toUrnStr())) {
+      return true;
+    }
+
+    Urn selfUrn = UrnUtils.getUrn(authentication.getActor().toUrnStr());
+    Map<Urn, Map<String, Aspect>> urnAspectMap =
+        aspectRetriever.getLatestAspectObjects(
+            Set.of(selfUrn),
+            Set.of(STATUS_ASPECT_NAME, CORP_USER_STATUS_ASPECT_NAME, CORP_USER_KEY_ASPECT_NAME));
+
+    Map<String, Aspect> aspectMap = urnAspectMap.getOrDefault(selfUrn, Map.of());
+
+    if (!aspectMap.containsKey(CORP_USER_KEY_ASPECT_NAME)) {
+      // user is hard deleted
+      return false;
+    }
+
+    Status status =
+        Optional.ofNullable(aspectMap.get(STATUS_ASPECT_NAME))
+            .map(a -> new Status(a.data()))
+            .orElse(new Status().setRemoved(false));
+    CorpUserStatus corpUserStatus =
+        Optional.ofNullable(aspectMap.get(CORP_USER_STATUS_ASPECT_NAME))
+            .map(a -> new CorpUserStatus(a.data()))
+            .orElse(new CorpUserStatus().setStatus(""));
+
+    return !status.isRemoved() && !CORP_USER_STATUS_SUSPENDED.equals(corpUserStatus.getStatus());
+  }
+
   /**
    * The current implementation creates a cache entry unique for the set of policies.
    *
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
index 9a058c526647c2..9158129235b39e 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
@@ -16,6 +16,8 @@
 import com.linkedin.metadata.query.SearchFlags;
 import com.linkedin.metadata.utils.AuditStampUtils;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+import io.datahubproject.metadata.exception.ActorAccessException;
+import io.datahubproject.metadata.exception.OperationContextException;
 import java.util.Collection;
 import java.util.Objects;
 import java.util.Optional;
@@ -63,6 +65,24 @@ public static OperationContext asSession(
       @Nonnull Authorizer authorizer,
       @Nonnull Authentication sessionAuthentication,
       boolean allowSystemAuthentication) {
+    return OperationContext.asSession(
+        systemOperationContext,
+        requestContext,
+        authorizer,
+        sessionAuthentication,
+        allowSystemAuthentication,
+        false);
+  }
+
+  @Nonnull
+  public static OperationContext asSession(
+      OperationContext systemOperationContext,
+      @Nonnull RequestContext requestContext,
+      @Nonnull Authorizer authorizer,
+      @Nonnull Authentication sessionAuthentication,
+      boolean allowSystemAuthentication,
+      boolean skipCache)
+      throws ActorAccessException {
     return systemOperationContext.toBuilder()
         .operationContextConfig(
             // update allowed system authentication
@@ -72,7 +92,7 @@ public static OperationContext asSession(
         .authorizationContext(AuthorizationContext.builder().authorizer(authorizer).build())
         .requestContext(requestContext)
         .validationContext(systemOperationContext.getValidationContext())
-        .build(sessionAuthentication);
+        .build(sessionAuthentication, skipCache);
   }
 
   /**
@@ -85,10 +105,14 @@ public static OperationContext asSession(
   public static OperationContext withSearchFlags(
       OperationContext opContext, Function<SearchFlags, SearchFlags> flagDefaults) {
 
-    return opContext.toBuilder()
-        // update search flags for the request's session
-        .searchContext(opContext.getSearchContext().withFlagDefaults(flagDefaults))
-        .build(opContext.getSessionActorContext());
+    try {
+      return opContext.toBuilder()
+          // update search flags for the request's session
+          .searchContext(opContext.getSearchContext().withFlagDefaults(flagDefaults))
+          .build(opContext.getSessionActorContext(), false);
+    } catch (OperationContextException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   /**
@@ -101,10 +125,14 @@ public static OperationContext withSearchFlags(
   public static OperationContext withLineageFlags(
       OperationContext opContext, Function<LineageFlags, LineageFlags> flagDefaults) {
 
-    return opContext.toBuilder()
-        // update lineage flags for the request's session
-        .searchContext(opContext.getSearchContext().withLineageFlagDefaults(flagDefaults))
-        .build(opContext.getSessionActorContext());
+    try {
+      return opContext.toBuilder()
+          // update lineage flags for the request's session
+          .searchContext(opContext.getSearchContext().withLineageFlagDefaults(flagDefaults))
+          .build(opContext.getSessionActorContext(), false);
+    } catch (OperationContextException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   /**
@@ -155,18 +183,22 @@ public static OperationContext asSystem(
             ? SearchContext.EMPTY
             : SearchContext.builder().indexConvention(indexConvention).build();
 
-    return OperationContext.builder()
-        .operationContextConfig(systemConfig)
-        .systemActorContext(systemActorContext)
-        .searchContext(systemSearchContext)
-        .entityRegistryContext(EntityRegistryContext.builder().build(entityRegistry))
-        .servicesRegistryContext(servicesRegistryContext)
-        // Authorizer.EMPTY doesn't actually apply to system auth
-        .authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build())
-        .retrieverContext(retrieverContext)
-        .objectMapperContext(objectMapperContext)
-        .validationContext(validationContext)
-        .build(systemAuthentication);
+    try {
+      return OperationContext.builder()
+          .operationContextConfig(systemConfig)
+          .systemActorContext(systemActorContext)
+          .searchContext(systemSearchContext)
+          .entityRegistryContext(EntityRegistryContext.builder().build(entityRegistry))
+          .servicesRegistryContext(servicesRegistryContext)
+          // Authorizer.EMPTY doesn't actually apply to system auth
+          .authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build())
+          .retrieverContext(retrieverContext)
+          .objectMapperContext(objectMapperContext)
+          .validationContext(validationContext)
+          .build(systemAuthentication, false);
+    } catch (OperationContextException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   @Nonnull private final OperationContextConfig operationContextConfig;
@@ -177,7 +209,7 @@ public static OperationContext asSystem(
   @Nonnull private final EntityRegistryContext entityRegistryContext;
   @Nullable private final ServicesRegistryContext servicesRegistryContext;
   @Nullable private final RequestContext requestContext;
-  @Nullable private final RetrieverContext retrieverContext;
+  @Nonnull private final RetrieverContext retrieverContext;
   @Nonnull private final ObjectMapperContext objectMapperContext;
   @Nonnull private final ValidationContext validationContext;
 
@@ -194,13 +226,15 @@ public OperationContext withLineageFlags(
   public OperationContext asSession(
       @Nonnull RequestContext requestContext,
       @Nonnull Authorizer authorizer,
-      @Nonnull Authentication sessionAuthentication) {
+      @Nonnull Authentication sessionAuthentication)
+      throws ActorAccessException {
     return OperationContext.asSession(
         this,
         requestContext,
         authorizer,
         sessionAuthentication,
-        getOperationContextConfig().isAllowSystemAuthentication());
+        getOperationContextConfig().isAllowSystemAuthentication(),
+        false);
   }
 
   @Nonnull
@@ -284,17 +318,9 @@ public AuditStamp getAuditStamp() {
     return getAuditStamp(null);
   }
 
-  public Optional<RetrieverContext> getRetrieverContext() {
-    return Optional.ofNullable(retrieverContext);
-  }
-
-  @Nullable
+  @Nonnull
   public AspectRetriever getAspectRetriever() {
-    return getAspectRetrieverOpt().orElse(null);
-  }
-
-  public Optional<AspectRetriever> getAspectRetrieverOpt() {
-    return getRetrieverContext().map(RetrieverContext::getAspectRetriever);
+    return retrieverContext.getAspectRetriever();
   }
 
   /**
@@ -336,10 +362,7 @@ public String getGlobalContextId() {
                     ? EmptyContext.EMPTY
                     : getServicesRegistryContext())
             .add(getRequestContext() == null ? EmptyContext.EMPTY : getRequestContext())
-            .add(
-                getRetrieverContext().isPresent()
-                    ? getRetrieverContext().get()
-                    : EmptyContext.EMPTY)
+            .add(getRetrieverContext())
             .add(getObjectMapperContext())
             .build()
             .stream()
@@ -364,10 +387,7 @@ public String getSearchContextId() {
                 getServicesRegistryContext() == null
                     ? EmptyContext.EMPTY
                     : getServicesRegistryContext())
-            .add(
-                getRetrieverContext().isPresent()
-                    ? getRetrieverContext().get()
-                    : EmptyContext.EMPTY)
+            .add(getRetrieverContext())
             .build()
             .stream()
             .map(ContextInterface::getCacheKeyComponent)
@@ -438,6 +458,12 @@ public static class OperationContextBuilder {
 
     @Nonnull
     public OperationContext build(@Nonnull Authentication sessionAuthentication) {
+      return build(sessionAuthentication, false);
+    }
+
+    @Nonnull
+    public OperationContext build(
+        @Nonnull Authentication sessionAuthentication, boolean skipCache) {
       final Urn actorUrn = UrnUtils.getUrn(sessionAuthentication.getActor().toUrnStr());
       final ActorContext sessionActor =
           ActorContext.builder()
@@ -451,11 +477,20 @@ public OperationContext build(@Nonnull Authentication sessionAuthentication) {
               .policyInfoSet(this.authorizationContext.getAuthorizer().getActorPolicies(actorUrn))
               .groupMembership(this.authorizationContext.getAuthorizer().getActorGroups(actorUrn))
               .build();
-      return build(sessionActor);
+      return build(sessionActor, skipCache);
     }
 
     @Nonnull
-    public OperationContext build(@Nonnull ActorContext sessionActor) {
+    public OperationContext build(@Nonnull ActorContext sessionActor, boolean skipCache) {
+      AspectRetriever retriever =
+          skipCache
+              ? this.retrieverContext.getAspectRetriever()
+              : this.retrieverContext.getCachingAspectRetriever();
+
+      if (!sessionActor.isActive(retriever)) {
+        throw new ActorAccessException("Actor is not active");
+      }
+
       return new OperationContext(
           this.operationContextConfig,
           sessionActor,
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
index 9337fbfe3bb003..9afc4138810bb2 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
@@ -1,8 +1,10 @@
 package io.datahubproject.metadata.context;
 
 import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
 import com.linkedin.metadata.entity.SearchRetriever;
+import java.util.Objects;
 import java.util.Optional;
 import javax.annotation.Nonnull;
 import lombok.Builder;
@@ -15,10 +17,37 @@ public class RetrieverContext
 
   @Nonnull private final GraphRetriever graphRetriever;
   @Nonnull private final AspectRetriever aspectRetriever;
+  @Nonnull private final CachingAspectRetriever cachingAspectRetriever;
   @Nonnull private final SearchRetriever searchRetriever;
 
   @Override
   public Optional<Integer> getCacheKeyComponent() {
     return Optional.empty();
   }
+
+  public static class RetrieverContextBuilder {
+    public RetrieverContext build() {
+      if (this.aspectRetriever == null && this.cachingAspectRetriever != null) {
+        this.aspectRetriever = this.cachingAspectRetriever;
+      }
+
+      if (this.cachingAspectRetriever == null
+          && this.aspectRetriever instanceof CachingAspectRetriever) {
+        this.cachingAspectRetriever = (CachingAspectRetriever) this.aspectRetriever;
+      }
+
+      return new RetrieverContext(
+          this.graphRetriever,
+          Objects.requireNonNull(this.aspectRetriever),
+          Objects.requireNonNull(this.cachingAspectRetriever),
+          this.searchRetriever);
+    }
+  }
+
+  public static final RetrieverContext EMPTY =
+      RetrieverContext.builder()
+          .graphRetriever(GraphRetriever.EMPTY)
+          .searchRetriever(SearchRetriever.EMPTY)
+          .cachingAspectRetriever(CachingAspectRetriever.EMPTY)
+          .build();
 }
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java
new file mode 100644
index 00000000000000..bca2594b96430e
--- /dev/null
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java
@@ -0,0 +1,7 @@
+package io.datahubproject.metadata.exception;
+
+public class ActorAccessException extends OperationContextException {
+  public ActorAccessException(String string) {
+    super(string);
+  }
+}
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java
new file mode 100644
index 00000000000000..1aac8dc3e60ec9
--- /dev/null
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java
@@ -0,0 +1,9 @@
+package io.datahubproject.metadata.exception;
+
+public class OperationContextException extends RuntimeException {
+  public OperationContextException(String message) {
+    super(message);
+  }
+
+  public OperationContextException() {}
+}
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
index 42de6b7398c616..4abfbb196f067c 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
@@ -8,21 +8,17 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
 import com.linkedin.entity.Aspect;
+import com.linkedin.identity.CorpUserInfo;
+import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
 import com.linkedin.metadata.aspect.GraphRetriever;
-import com.linkedin.metadata.aspect.SystemAspect;
-import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
 import com.linkedin.metadata.entity.SearchRetriever;
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
 import com.linkedin.metadata.models.registry.MergedEntityRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
-import com.linkedin.metadata.query.filter.Filter;
-import com.linkedin.metadata.query.filter.RelationshipFilter;
-import com.linkedin.metadata.query.filter.SortCriterion;
-import com.linkedin.metadata.search.ScrollResult;
-import com.linkedin.metadata.search.SearchEntityArray;
 import com.linkedin.metadata.snapshot.Snapshot;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
@@ -32,15 +28,14 @@
 import io.datahubproject.metadata.context.RetrieverContext;
 import io.datahubproject.metadata.context.ServicesRegistryContext;
 import io.datahubproject.metadata.context.ValidationContext;
-import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import java.util.function.Consumer;
 import java.util.function.Supplier;
+import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
-import lombok.Builder;
 
 /**
  * Useful for testing. If the defaults are not sufficient, try using the .toBuilder() and replacing
@@ -81,26 +76,53 @@ public static EntityRegistry defaultEntityRegistry() {
     return defaultEntityRegistryInstance;
   }
 
-  public static AspectRetriever emptyAspectRetriever(
+  public static RetrieverContext emptyActiveUsersRetrieverContext(
       @Nullable Supplier<EntityRegistry> entityRegistrySupplier) {
-    return new EmptyAspectRetriever(
-        () ->
-            Optional.ofNullable(entityRegistrySupplier)
-                .map(Supplier::get)
-                .orElse(defaultEntityRegistry()));
-  }
 
-  public static GraphRetriever emptyGraphRetriever = new EmptyGraphRetriever();
-  public static SearchRetriever emptySearchRetriever = new EmptySearchRetriever();
+    return RetrieverContext.builder()
+        .cachingAspectRetriever(emptyActiveUsersAspectRetriever(entityRegistrySupplier))
+        .graphRetriever(GraphRetriever.EMPTY)
+        .searchRetriever(SearchRetriever.EMPTY)
+        .build();
+  }
 
-  public static RetrieverContext emptyRetrieverContext(
+  public static CachingAspectRetriever emptyActiveUsersAspectRetriever(
       @Nullable Supplier<EntityRegistry> entityRegistrySupplier) {
 
-    return RetrieverContext.builder()
-        .aspectRetriever(emptyAspectRetriever(entityRegistrySupplier))
-        .graphRetriever(emptyGraphRetriever)
-        .searchRetriever(emptySearchRetriever)
-        .build();
+    return new CachingAspectRetriever.EmptyAspectRetriever() {
+
+      @Nonnull
+      @Override
+      public Map<Urn, Map<String, Aspect>> getLatestAspectObjects(
+          Set<Urn> urns, Set<String> aspectNames) {
+        if (urns.stream().allMatch(urn -> urn.toString().startsWith("urn:li:corpuser:"))
+            && aspectNames.contains(Constants.CORP_USER_KEY_ASPECT_NAME)) {
+          return urns.stream()
+              .map(
+                  urn ->
+                      Map.entry(
+                          urn,
+                          Map.of(
+                              Constants.CORP_USER_KEY_ASPECT_NAME,
+                              new Aspect(
+                                  new CorpUserInfo()
+                                      .setActive(true)
+                                      .setEmail(urn.getId())
+                                      .setDisplayName(urn.getId())
+                                      .data()))))
+              .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+        }
+        return super.getLatestAspectObjects(urns, aspectNames);
+      }
+
+      @Nonnull
+      @Override
+      public EntityRegistry getEntityRegistry() {
+        return Optional.ofNullable(entityRegistrySupplier)
+            .map(Supplier::get)
+            .orElse(defaultEntityRegistry());
+      }
+    };
   }
 
   public static OperationContext systemContextNoSearchAuthorization(
@@ -140,8 +162,10 @@ public static OperationContext systemContextNoSearchAuthorization(
     RetrieverContext retrieverContext =
         RetrieverContext.builder()
             .aspectRetriever(aspectRetriever)
-            .graphRetriever(emptyGraphRetriever)
-            .searchRetriever(emptySearchRetriever)
+            .cachingAspectRetriever(
+                emptyActiveUsersAspectRetriever(() -> aspectRetriever.getEntityRegistry()))
+            .graphRetriever(GraphRetriever.EMPTY)
+            .searchRetriever(SearchRetriever.EMPTY)
             .build();
     return systemContextNoSearchAuthorization(
         () -> retrieverContext.getAspectRetriever().getEntityRegistry(),
@@ -208,7 +232,7 @@ public static OperationContext systemContext(
     RetrieverContext retrieverContext =
         Optional.ofNullable(retrieverContextSupplier)
             .map(Supplier::get)
-            .orElse(emptyRetrieverContext(entityRegistrySupplier));
+            .orElse(emptyActiveUsersRetrieverContext(entityRegistrySupplier));
 
     EntityRegistry entityRegistry =
         Optional.ofNullable(entityRegistrySupplier)
@@ -298,66 +322,5 @@ public static OperationContext userContextNoSearchAuthorization(
         .asSession(requestContext, Authorizer.EMPTY, TEST_USER_AUTH);
   }
 
-  @Builder
-  public static class EmptyAspectRetriever implements AspectRetriever {
-    private final Supplier<EntityRegistry> entityRegistrySupplier;
-
-    @Nonnull
-    @Override
-    public Map<Urn, Map<String, Aspect>> getLatestAspectObjects(
-        Set<Urn> urns, Set<String> aspectNames) {
-      return Map.of();
-    }
-
-    @Nonnull
-    @Override
-    public Map<Urn, Map<String, SystemAspect>> getLatestSystemAspects(
-        Map<Urn, Set<String>> urnAspectNames) {
-      return Map.of();
-    }
-
-    @Nonnull
-    @Override
-    public EntityRegistry getEntityRegistry() {
-      return entityRegistrySupplier.get();
-    }
-  }
-
-  public static class EmptyGraphRetriever implements GraphRetriever {
-
-    @Nonnull
-    @Override
-    public RelatedEntitiesScrollResult scrollRelatedEntities(
-        @Nullable List<String> sourceTypes,
-        @Nonnull Filter sourceEntityFilter,
-        @Nullable List<String> destinationTypes,
-        @Nonnull Filter destinationEntityFilter,
-        @Nonnull List<String> relationshipTypes,
-        @Nonnull RelationshipFilter relationshipFilter,
-        @Nonnull List<SortCriterion> sortCriterion,
-        @Nullable String scrollId,
-        int count,
-        @Nullable Long startTimeMillis,
-        @Nullable Long endTimeMillis) {
-      return new RelatedEntitiesScrollResult(0, 0, null, List.of());
-    }
-  }
-
-  public static class EmptySearchRetriever implements SearchRetriever {
-
-    @Override
-    public ScrollResult scroll(
-        @Nonnull List<String> entities,
-        @Nullable Filter filters,
-        @Nullable String scrollId,
-        int count) {
-      ScrollResult empty = new ScrollResult();
-      empty.setEntities(new SearchEntityArray());
-      empty.setNumEntities(0);
-      empty.setPageSize(0);
-      return empty;
-    }
-  }
-
   private TestOperationContexts() {}
 }
diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
index 3e092e20127ee5..f77b244d8f2d86 100644
--- a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
+++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
@@ -8,6 +8,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.linkedin.metadata.models.registry.EntityRegistry;
+import io.datahubproject.test.metadata.context.TestOperationContexts;
 import org.testng.annotations.Test;
 
 public class OperationContextTest {
@@ -25,7 +26,7 @@ public void testSystemPrivilegeEscalation() {
             mock(EntityRegistry.class),
             mock(ServicesRegistryContext.class),
             null,
-            mock(RetrieverContext.class),
+            TestOperationContexts.emptyActiveUsersRetrieverContext(null),
             mock(ValidationContext.class));
 
     OperationContext opContext =
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
index 6724f35d840adb..a9871f1ed99482 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
@@ -145,7 +145,7 @@ public String generateAccessToken(
     _entityService.ingestProposal(
         systemOperationContext,
         AspectsBatchImpl.builder()
-            .mcps(List.of(proposal), auditStamp, systemOperationContext.getRetrieverContext().get())
+            .mcps(List.of(proposal), auditStamp, systemOperationContext.getRetrieverContext())
             .build(),
         false);
 
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java
index 47b406e695a3fb..6eb31e14a2d3b0 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java
@@ -23,6 +23,7 @@ public class EbeanConfiguration {
   private boolean autoCreateDdl;
   private boolean postgresUseIamAuth;
   private LockingConfiguration locking;
+  private String batchGetMethod;
 
   public static final EbeanConfiguration testDefault =
       EbeanConfiguration.builder().locking(LockingConfiguration.testDefault).build();
diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index 9348416606d0a9..b997bc108e4ba1 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -164,6 +164,7 @@ ebean:
   waitTimeoutMillis: ${EBEAN_WAIT_TIMEOUT_MILLIS:1000}
   autoCreateDdl: ${EBEAN_AUTOCREATE:false}
   postgresUseIamAuth: ${EBEAN_POSTGRES_USE_AWS_IAM_AUTH:false}
+  batchGetMethod: ${EBEAN_BATCH_GET_METHOD:IN} # Alternative UNION
   locking:
     enabled: ${EBEAN_LOCKING_ENABLED:false}
     durationSeconds: ${EBEAN_LOCKING_DURATION_SECONDS:60}
@@ -522,12 +523,12 @@ cache:
       entityAspectTTLSeconds:
         # cache user aspects for 20s
         corpuser:
-          corpUserKey: 20
+          corpUserKey: 300 # 5 min
           corpUserInfo: 20
           corpUserEditableInfo: 20
-          corpUserStatus: 20
+          corpUserStatus: 300 # 5 min
           globalTags: 20
-          status: 20
+          status: 300 # 5 min
           corpUserCredentials: 20
           corpUserSettings: 20
           roleMembership: 20
@@ -561,7 +562,7 @@ springdoc.api-docs.groups.enabled: true
 
 forms:
   hook:
-    enabled: { $FORMS_HOOK_ENABLED:true }
+    enabled: ${FORMS_HOOK_ENABLED:true}
     consumerGroupSuffix: ${FORMS_HOOK_CONSUMER_GROUP_SUFFIX:}
 
 businessAttribute:
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
index f5235dc3682fce..3e2823591e168c 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
@@ -45,7 +45,8 @@ protected OperationContext javaSystemOperationContext(
       @Nonnull final SearchService searchService,
       @Qualifier("baseElasticSearchComponents")
           BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components,
-      @Nonnull final ConfigurationProvider configurationProvider) {
+      @Nonnull final ConfigurationProvider configurationProvider,
+      @Qualifier("systemEntityClient") @Nonnull final SystemEntityClient systemEntityClient) {
 
     EntityServiceAspectRetriever entityServiceAspectRetriever =
         EntityServiceAspectRetriever.builder()
@@ -53,6 +54,9 @@ protected OperationContext javaSystemOperationContext(
             .entityService(entityService)
             .build();
 
+    EntityClientAspectRetriever entityClientAspectRetriever =
+        EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build();
+
     SystemGraphRetriever systemGraphRetriever =
         SystemGraphRetriever.builder().graphService(graphService).build();
 
@@ -68,6 +72,7 @@ protected OperationContext javaSystemOperationContext(
             components.getIndexConvention(),
             RetrieverContext.builder()
                 .aspectRetriever(entityServiceAspectRetriever)
+                .cachingAspectRetriever(entityClientAspectRetriever)
                 .graphRetriever(systemGraphRetriever)
                 .searchRetriever(searchServiceSearchRetriever)
                 .build(),
@@ -76,6 +81,7 @@ protected OperationContext javaSystemOperationContext(
                     configurationProvider.getFeatureFlags().isAlternateMCPValidation())
                 .build());
 
+    entityClientAspectRetriever.setSystemOperationContext(systemOperationContext);
     entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext);
     systemGraphRetriever.setSystemOperationContext(systemOperationContext);
     searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext);
@@ -104,7 +110,7 @@ protected OperationContext restliSystemOperationContext(
           BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components,
       @Nonnull final ConfigurationProvider configurationProvider) {
 
-    EntityClientAspectRetriever entityServiceAspectRetriever =
+    EntityClientAspectRetriever entityClientAspectRetriever =
         EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build();
 
     SystemGraphRetriever systemGraphRetriever =
@@ -121,7 +127,7 @@ protected OperationContext restliSystemOperationContext(
             ServicesRegistryContext.builder().restrictedService(restrictedService).build(),
             components.getIndexConvention(),
             RetrieverContext.builder()
-                .aspectRetriever(entityServiceAspectRetriever)
+                .cachingAspectRetriever(entityClientAspectRetriever)
                 .graphRetriever(systemGraphRetriever)
                 .searchRetriever(searchServiceSearchRetriever)
                 .build(),
@@ -130,7 +136,7 @@ protected OperationContext restliSystemOperationContext(
                     configurationProvider.getFeatureFlags().isAlternateMCPValidation())
                 .build());
 
-    entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext);
+    entityClientAspectRetriever.setSystemOperationContext(systemOperationContext);
     systemGraphRetriever.setSystemOperationContext(systemOperationContext);
     searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext);
 
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
index 22ce06a5984ea6..c04dd25ccd4ac9 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
@@ -84,14 +84,14 @@ public void execute(@Nonnull OperationContext systemOperationContext) throws Exc
                   .aspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME)
                   .recordTemplate(dataPlatformInstance.get())
                   .auditStamp(aspectAuditStamp)
-                  .build(systemOperationContext.getAspectRetrieverOpt().get()));
+                  .build(systemOperationContext.getAspectRetriever()));
         }
       }
 
       _entityService.ingestAspects(
           systemOperationContext,
           AspectsBatchImpl.builder()
-              .retrieverContext(systemOperationContext.getRetrieverContext().get())
+              .retrieverContext(systemOperationContext.getRetrieverContext())
               .items(items)
               .build(),
           true,
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
index eb6bfe17ac198e..dac2879487469c 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
@@ -225,7 +225,7 @@ private void ingestPolicy(
                 new AuditStamp()
                     .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR))
                     .setTime(System.currentTimeMillis()),
-                systemOperationContext.getRetrieverContext().get())
+                systemOperationContext.getRetrieverContext())
             .build(),
         false);
   }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
similarity index 81%
rename from metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java
rename to metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
index ba0a426fa20e89..c756827cad56ba 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
@@ -1,9 +1,11 @@
-package io.datahubproject.openapi;
+package io.datahubproject.openapi.config;
 
 import com.linkedin.metadata.dao.throttle.APIThrottleException;
+import io.datahubproject.metadata.exception.ActorAccessException;
 import io.datahubproject.openapi.exception.InvalidUrnException;
 import io.datahubproject.openapi.exception.UnauthorizedException;
 import java.util.Map;
+import javax.annotation.PostConstruct;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.beans.ConversionNotSupportedException;
 import org.springframework.core.Ordered;
@@ -19,6 +21,11 @@
 @ControllerAdvice
 public class GlobalControllerExceptionHandler extends DefaultHandlerExceptionResolver {
 
+  @PostConstruct
+  public void init() {
+    log.info("GlobalControllerExceptionHandler initialized");
+  }
+
   public GlobalControllerExceptionHandler() {
     setOrder(Ordered.HIGHEST_PRECEDENCE);
     setWarnLogCategory(getClass().getName());
@@ -52,4 +59,9 @@ public static ResponseEntity<Map<String, String>> handleUnauthorizedException(
       UnauthorizedException e) {
     return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN);
   }
+
+  @ExceptionHandler(ActorAccessException.class)
+  public static ResponseEntity<Map<String, String>> actorAccessException(ActorAccessException e) {
+    return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN);
+  }
 }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
index 579a62c084999a..592d7bba4211fe 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
@@ -637,7 +637,7 @@ public ResponseEntity<E> createAspect(
     AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName).get();
     ChangeMCP upsert =
         toUpsertItem(
-            opContext.getRetrieverContext().get().getAspectRetriever(),
+            opContext.getRetrieverContext().getAspectRetriever(),
             urn,
             aspectSpec,
             createIfEntityNotExists,
@@ -649,7 +649,7 @@ public ResponseEntity<E> createAspect(
         entityService.ingestProposal(
             opContext,
             AspectsBatchImpl.builder()
-                .retrieverContext(opContext.getRetrieverContext().get())
+                .retrieverContext(opContext.getRetrieverContext())
                 .items(List.of(upsert))
                 .build(),
             async);
@@ -725,7 +725,7 @@ public ResponseEntity<E> patchAspect(
             .build();
     ChangeMCP upsert =
         toUpsertItem(
-            opContext.getRetrieverContext().get().getAspectRetriever(),
+            opContext.getRetrieverContext().getAspectRetriever(),
             validatedUrn(entityUrn),
             aspectSpec,
             currentValue,
@@ -736,7 +736,7 @@ public ResponseEntity<E> patchAspect(
         entityService.ingestAspects(
             opContext,
             AspectsBatchImpl.builder()
-                .retrieverContext(opContext.getRetrieverContext().get())
+                .retrieverContext(opContext.getRetrieverContext())
                 .items(List.of(upsert))
                 .build(),
             true,
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java
new file mode 100644
index 00000000000000..99d3879ab9a320
--- /dev/null
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java
@@ -0,0 +1,54 @@
+package io.datahubproject.openapi.operations.test;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authentication.AuthenticationContext;
+import com.datahub.authorization.AuthorizerChain;
+import io.datahubproject.metadata.context.OperationContext;
+import io.datahubproject.metadata.context.RequestContext;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import jakarta.servlet.http.HttpServletRequest;
+import java.util.List;
+import java.util.Map;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@RequestMapping("/operations/identity")
+@Slf4j
+@Tag(name = "Identity", description = "An API for checking identity")
+public class IdController {
+  private final AuthorizerChain authorizerChain;
+  private final OperationContext systemOperationContext;
+
+  public IdController(OperationContext systemOperationContext, AuthorizerChain authorizerChain) {
+    this.systemOperationContext = systemOperationContext;
+    this.authorizerChain = authorizerChain;
+  }
+
+  @Tag(name = "User")
+  @GetMapping(path = "/user/urn", produces = MediaType.APPLICATION_JSON_VALUE)
+  @Operation(summary = "User id")
+  public ResponseEntity<Map<String, String>> getUserId(
+      HttpServletRequest request,
+      @RequestParam(value = "skipCache", required = false, defaultValue = "false")
+          Boolean skipCache) {
+    Authentication authentication = AuthenticationContext.getAuthentication();
+    String actorUrnStr = authentication.getActor().toUrnStr();
+
+    OperationContext.asSession(
+        systemOperationContext,
+        RequestContext.builder().buildOpenapi(actorUrnStr, request, "getUserIdentity", List.of()),
+        authorizerChain,
+        authentication,
+        true,
+        skipCache);
+
+    return ResponseEntity.ok(Map.of("urn", actorUrnStr));
+  }
+}
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
index c38f2db0eefbb3..ca425810c87a09 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
@@ -491,7 +491,7 @@ public static List<Pair<String, Boolean>> ingestBatchProposal(
     try {
       AspectsBatch batch =
           AspectsBatchImpl.builder()
-              .mcps(serviceProposals, auditStamp, opContext.getRetrieverContext().get())
+              .mcps(serviceProposals, auditStamp, opContext.getRetrieverContext())
               .build();
 
       Map<Urn, List<IngestResult>> resultMap =
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
index 56a7955b9fe871..b1c5709ef01470 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
@@ -203,7 +203,7 @@ protected AspectsBatch toMCPBatch(
                       objectMapper.writeValueAsString(aspect.getValue().get("systemMetadata"))));
             }
 
-            items.add(builder.build(opContext.getAspectRetrieverOpt().get()));
+            items.add(builder.build(opContext.getAspectRetriever()));
           }
         }
       }
@@ -211,7 +211,7 @@ protected AspectsBatch toMCPBatch(
 
     return AspectsBatchImpl.builder()
         .items(items)
-        .retrieverContext(opContext.getRetrieverContext().get())
+        .retrieverContext(opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
index ce7fd73f99b9e5..af13cd3aab0510 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
@@ -554,14 +554,14 @@ protected AspectsBatch toMCPBatch(
                             GenericRecordUtils.JSON,
                             aspectSpec));
 
-            items.add(builder.build(opContext.getRetrieverContext().get().getAspectRetriever()));
+            items.add(builder.build(opContext.getRetrieverContext().getAspectRetriever()));
           }
         }
       }
     }
     return AspectsBatchImpl.builder()
         .items(items)
-        .retrieverContext(opContext.getRetrieverContext().get())
+        .retrieverContext(opContext.getRetrieverContext())
         .build();
   }
 
diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entitiesV2.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entitiesV2.restspec.json
index 33cfba0f27802c..27731af9ffaa71 100644
--- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entitiesV2.restspec.json
+++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entitiesV2.restspec.json
@@ -19,6 +19,10 @@
         "name" : "aspects",
         "type" : "{ \"type\" : \"array\", \"items\" : \"string\" }",
         "optional" : true
+      }, {
+        "name" : "alwaysIncludeKeyAspect",
+        "type" : "boolean",
+        "optional" : true
       } ]
     }, {
       "method" : "batch_get",
@@ -27,6 +31,10 @@
         "name" : "aspects",
         "type" : "{ \"type\" : \"array\", \"items\" : \"string\" }",
         "optional" : true
+      }, {
+        "name" : "alwaysIncludeKeyAspect",
+        "type" : "boolean",
+        "optional" : true
       } ]
     } ],
     "entity" : {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json
index 9bf7f97b34be18..9c5f41281fcfbe 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json
@@ -182,6 +182,10 @@
           "name" : "aspects",
           "type" : "{ \"type\" : \"array\", \"items\" : \"string\" }",
           "optional" : true
+        }, {
+          "name" : "alwaysIncludeKeyAspect",
+          "type" : "boolean",
+          "optional" : true
         } ]
       }, {
         "method" : "batch_get",
@@ -190,6 +194,10 @@
           "name" : "aspects",
           "type" : "{ \"type\" : \"array\", \"items\" : \"string\" }",
           "optional" : true
+        }, {
+          "name" : "alwaysIncludeKeyAspect",
+          "type" : "boolean",
+          "optional" : true
         } ]
       } ],
       "entity" : {
diff --git a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
index cf6e571cb8cbeb..b85f22e781d0b0 100644
--- a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
+++ b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
@@ -45,12 +45,34 @@
 // Consider renaming this to datahub client.
 public interface EntityClient {
 
+  /**
+   * This version follows the legacy behavior of returning key aspects regardless of whether they
+   * exist
+   *
+   * @param opContext operation context
+   * @param entityName entity type
+   * @param urn urn id for the entity
+   * @param aspectNames set of aspects
+   * @return requested entity/aspects
+   */
+  @Deprecated
   @Nullable
-  EntityResponse getV2(
+  default EntityResponse getV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Urn urn,
       @Nullable final Set<String> aspectNames)
+      throws RemoteInvocationException, URISyntaxException {
+    return getV2(opContext, entityName, urn, aspectNames, true);
+  }
+
+  @Nullable
+  EntityResponse getV2(
+      @Nonnull OperationContext opContext,
+      @Nonnull String entityName,
+      @Nonnull final Urn urn,
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException;
 
   @Nonnull
@@ -58,12 +80,34 @@ EntityResponse getV2(
   Entity get(@Nonnull OperationContext opContext, @Nonnull final Urn urn)
       throws RemoteInvocationException;
 
+  /**
+   * This version follows the legacy behavior of returning key aspects regardless of whether they
+   * exist
+   *
+   * @param opContext operation context
+   * @param entityName entity type
+   * @param urns urn ids for the entities
+   * @param aspectNames set of aspects
+   * @return requested entity/aspects
+   */
+  @Deprecated
   @Nonnull
-  Map<Urn, EntityResponse> batchGetV2(
+  default Map<Urn, EntityResponse> batchGetV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Set<Urn> urns,
       @Nullable final Set<String> aspectNames)
+      throws RemoteInvocationException, URISyntaxException {
+    return batchGetV2(opContext, entityName, urns, aspectNames, true);
+  }
+
+  @Nonnull
+  Map<Urn, EntityResponse> batchGetV2(
+      @Nonnull OperationContext opContext,
+      @Nonnull String entityName,
+      @Nonnull final Set<Urn> urns,
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException;
 
   @Nonnull
@@ -589,27 +633,38 @@ void rollbackIngestion(
 
   @Nullable
   default Aspect getLatestAspectObject(
-      @Nonnull OperationContext opContext, @Nonnull Urn urn, @Nonnull String aspectName)
+      @Nonnull OperationContext opContext,
+      @Nonnull Urn urn,
+      @Nonnull String aspectName,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
-    return getLatestAspects(opContext, Set.of(urn), Set.of(aspectName))
+    return getLatestAspects(opContext, Set.of(urn), Set.of(aspectName), alwaysIncludeKeyAspect)
         .getOrDefault(urn, Map.of())
         .get(aspectName);
   }
 
   @Nonnull
   default Map<Urn, Map<String, Aspect>> getLatestAspects(
-      @Nonnull OperationContext opContext, @Nonnull Set<Urn> urns, @Nonnull Set<String> aspectNames)
+      @Nonnull OperationContext opContext,
+      @Nonnull Set<Urn> urns,
+      @Nonnull Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     String entityName = urns.stream().findFirst().map(Urn::getEntityType).get();
-    return entityResponseToAspectMap(batchGetV2(opContext, entityName, urns, aspectNames));
+    return entityResponseToAspectMap(
+        batchGetV2(opContext, entityName, urns, aspectNames, alwaysIncludeKeyAspect));
   }
 
   @Nonnull
   default Map<Urn, Map<String, SystemAspect>> getLatestSystemAspect(
-      @Nonnull OperationContext opContext, @Nonnull Set<Urn> urns, @Nonnull Set<String> aspectNames)
+      @Nonnull OperationContext opContext,
+      @Nonnull Set<Urn> urns,
+      @Nonnull Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     String entityName = urns.stream().findFirst().map(Urn::getEntityType).get();
     return entityResponseToSystemAspectMap(
-        batchGetV2(opContext, entityName, urns, aspectNames), opContext.getEntityRegistry());
+        batchGetV2(opContext, entityName, urns, aspectNames, alwaysIncludeKeyAspect),
+        opContext.getEntityRegistry());
   }
 }
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index 516902601f08a1..8d4c5e9228a71c 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -156,10 +156,15 @@ public EntityResponse getV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Urn urn,
-      @Nullable final Set<String> aspectNames)
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
     final EntitiesV2GetRequestBuilder requestBuilder =
-        ENTITIES_V2_REQUEST_BUILDERS.get().aspectsParam(aspectNames).id(urn.toString());
+        ENTITIES_V2_REQUEST_BUILDERS
+            .get()
+            .aspectsParam(aspectNames)
+            .id(urn.toString())
+            .alwaysIncludeKeyAspectParam(alwaysIncludeKeyAspect);
     return sendClientRequest(requestBuilder, opContext.getSessionAuthentication()).getEntity();
   }
 
@@ -241,7 +246,8 @@ public Map<Urn, EntityResponse> batchGetV2(
       @Nonnull OperationContext opContext,
       @Nonnull String entityName,
       @Nonnull final Set<Urn> urns,
-      @Nullable final Set<String> aspectNames)
+      @Nullable final Set<String> aspectNames,
+      @Nullable Boolean alwaysIncludeKeyAspect)
       throws RemoteInvocationException, URISyntaxException {
 
     Map<Urn, EntityResponse> responseMap = new HashMap<>();
@@ -260,6 +266,7 @@ public Map<Urn, EntityResponse> batchGetV2(
                                 ENTITIES_V2_REQUEST_BUILDERS
                                     .batchGet()
                                     .aspectsParam(aspectNames)
+                                    .alwaysIncludeKeyAspectParam(alwaysIncludeKeyAspect)
                                     .ids(
                                         batch.stream()
                                             .map(Urn::toString)
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
index 2637e2d067c6d5..aa17f1951bc912 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
@@ -59,6 +59,6 @@ public Map<Urn, EntityResponse> batchGetV2NoCache(
       @Nonnull Set<Urn> urns,
       @Nullable Set<String> aspectNames)
       throws RemoteInvocationException, URISyntaxException {
-    return super.batchGetV2(opContext, entityName, urns, aspectNames);
+    return super.batchGetV2(opContext, entityName, urns, aspectNames, false);
   }
 }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
index 6033ead36f10ec..30b187da00e91a 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
@@ -309,7 +309,7 @@ private Task<String> ingestProposals(
       log.debug("Proposals: {}", metadataChangeProposals);
       try {
         final AspectsBatch batch = AspectsBatchImpl.builder()
-                .mcps(metadataChangeProposals, auditStamp, opContext.getRetrieverContext().get(),
+                .mcps(metadataChangeProposals, auditStamp, opContext.getRetrieverContext(),
                     opContext.getValidationContext().isAlternateValidation())
                 .build();
 
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
index 20209ddf44d643..896d81d3cbecc3 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
@@ -64,7 +64,8 @@ public class EntityV2Resource extends CollectionResourceTaskTemplate<String, Ent
   @Nonnull
   @WithSpan
   public Task<EntityResponse> get(
-      @Nonnull String urnStr, @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames)
+      @Nonnull String urnStr, @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames,
+      @QueryParam(PARAM_ALWAYS_INCLUDE_KEY_ASPECT) @Optional @Nullable Boolean alwaysIncludeKeyAspect)
       throws URISyntaxException {
     log.debug("GET V2 {}", urnStr);
     final Urn urn = Urn.createFromString(urnStr);
@@ -90,7 +91,7 @@ public Task<EntityResponse> get(
                   ? opContext.getEntityAspectNames(entityName)
                   : new HashSet<>(Arrays.asList(aspectNames));
           try {
-            return _entityService.getEntityV2(opContext, entityName, urn, projectedAspects);
+            return _entityService.getEntityV2(opContext, entityName, urn, projectedAspects, alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect);
           } catch (Exception e) {
             throw new RuntimeException(
                 String.format(
@@ -106,7 +107,8 @@ public Task<EntityResponse> get(
   @WithSpan
   public Task<Map<Urn, EntityResponse>> batchGet(
       @Nonnull Set<String> urnStrs,
-      @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames)
+      @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames,
+      @QueryParam(PARAM_ALWAYS_INCLUDE_KEY_ASPECT) @Optional @Nullable Boolean alwaysIncludeKeyAspect)
       throws URISyntaxException {
     log.debug("BATCH GET V2 {}", urnStrs.toString());
     final Set<Urn> urns = new HashSet<>();
@@ -138,7 +140,7 @@ public Task<Map<Urn, EntityResponse>> batchGet(
                   ? opContext.getEntityAspectNames(entityName)
                   : new HashSet<>(Arrays.asList(aspectNames));
           try {
-            return _entityService.getEntitiesV2(opContext, entityName, urns, projectedAspects);
+            return _entityService.getEntitiesV2(opContext, entityName, urns, projectedAspects, alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect);
           } catch (Exception e) {
             throw new RuntimeException(
                 String.format(
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java
index ef79a404c2145e..11df52ad66709e 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliConstants.java
@@ -1,5 +1,7 @@
 package com.linkedin.metadata.resources.restli;
 
+import javax.annotation.Nullable;
+
 public final class RestliConstants {
   private RestliConstants() {}
 
@@ -21,6 +23,7 @@ private RestliConstants() {}
   public static final String PARAM_INPUT = "input";
   public static final String PARAM_MAX_HOPS = "maxHops";
   public static final String PARAM_ASPECTS = "aspects";
+  public static final String PARAM_ALWAYS_INCLUDE_KEY_ASPECT = "alwaysIncludeKeyAspect";
   public static final String PARAM_FILTER = "filter";
   public static final String PARAM_GROUP = "group";
   public static final String PARAM_SORT = "sort";
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
index 185874fac1382d..a2092405da3ff6 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
@@ -8,6 +8,7 @@
 import com.linkedin.parseq.Task;
 import com.linkedin.restli.common.HttpStatus;
 import com.linkedin.restli.server.RestLiServiceException;
+import io.datahubproject.metadata.exception.ActorAccessException;
 import java.util.Optional;
 import java.util.function.Supplier;
 import javax.annotation.Nonnull;
@@ -38,6 +39,8 @@ public static <T> Task<T> toTask(@Nonnull Supplier<T> supplier) {
       if (throwable instanceof IllegalArgumentException
           || throwable.getCause() instanceof IllegalArgumentException) {
         finalException = badRequestException(throwable.getMessage());
+      } else if (throwable.getCause() instanceof ActorAccessException) {
+          finalException = forbidden(throwable.getCause().getMessage());
       } else if (throwable instanceof APIThrottleException) {
         finalException = apiThrottled(throwable.getMessage());
       } else if (throwable instanceof RestLiServiceException) {
@@ -109,4 +112,9 @@ public static RestLiServiceException invalidArgumentsException(@Nullable String
   public static RestLiServiceException apiThrottled(@Nullable String message) {
     return new RestLiServiceException(HttpStatus.S_429_TOO_MANY_REQUESTS, message);
   }
+
+  @Nonnull
+  public static RestLiServiceException forbidden(@Nullable String message) {
+    return new RestLiServiceException(HttpStatus.S_403_FORBIDDEN, message);
+  }
 }
diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
index a39401c170a114..037b5b81fd4df0 100644
--- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
+++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
@@ -100,7 +100,7 @@ public void testAsyncDefaultAspects() throws URISyntaxException {
             .recordTemplate(mcp.getAspect())
             .auditStamp(new AuditStamp())
             .metadataChangeProposal(mcp)
-            .build(opContext.getAspectRetrieverOpt().get());
+            .build(opContext.getAspectRetriever());
     when(aspectDao.runInTransactionWithRetry(any(), any(), anyInt()))
         .thenReturn(
             List.of(List.of(
diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
index d701c8fc8be035..80a11ab98bbf4a 100644
--- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
+++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
@@ -219,6 +219,7 @@ public class PoliciesConfig {
           MANAGE_BUSINESS_ATTRIBUTE_PRIVILEGE,
           MANAGE_CONNECTIONS_PRIVILEGE,
           MANAGE_STRUCTURED_PROPERTIES_PRIVILEGE,
+          VIEW_STRUCTURED_PROPERTIES_PAGE_PRIVILEGE,
           MANAGE_DOCUMENTATION_FORMS_PRIVILEGE,
           MANAGE_FEATURES_PRIVILEGE,
           MANAGE_SYSTEM_OPERATIONS_PRIVILEGE);
diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py
index af29437c051e19..006daae39333ed 100644
--- a/smoke-test/tests/tokens/revokable_access_token_test.py
+++ b/smoke-test/tests/tokens/revokable_access_token_test.py
@@ -9,6 +9,8 @@
     wait_for_writes_to_sync,
 )
 
+from .token_utils import listUsers, removeUser
+
 pytestmark = pytest.mark.no_cypress_suite1
 
 # Disable telemetry
@@ -490,45 +492,3 @@ def getAccessTokenMetadata(session, token):
     response.raise_for_status()
 
     return response.json()
-
-
-def removeUser(session, urn):
-    # Remove user
-    json = {
-        "query": """mutation removeUser($urn: String!) {
-            removeUser(urn: $urn)
-        }""",
-        "variables": {"urn": urn},
-    }
-
-    response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
-
-    response.raise_for_status()
-    return response.json()
-
-
-def listUsers(session):
-    input = {
-        "start": "0",
-        "count": "20",
-    }
-
-    # list users
-    json = {
-        "query": """query listUsers($input: ListUsersInput!) {
-            listUsers(input: $input) {
-              start
-              count
-              total
-              users {
-                username
-              }
-            }
-        }""",
-        "variables": {"input": input},
-    }
-
-    response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
-
-    response.raise_for_status()
-    return response.json()
diff --git a/smoke-test/tests/tokens/session_access_token_test.py b/smoke-test/tests/tokens/session_access_token_test.py
new file mode 100644
index 00000000000000..a16abc44453036
--- /dev/null
+++ b/smoke-test/tests/tokens/session_access_token_test.py
@@ -0,0 +1,173 @@
+import os
+import time
+
+import pytest
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import AuditStampClass, CorpUserStatusClass
+from requests.exceptions import HTTPError
+
+from tests.utils import (
+    get_admin_credentials,
+    get_frontend_url,
+    login_as,
+    wait_for_writes_to_sync,
+)
+
+from .token_utils import getUserId, listUsers, removeUser
+
+pytestmark = pytest.mark.no_cypress_suite1
+
+# Disable telemetry
+os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
+
+(admin_user, admin_pass) = get_admin_credentials()
+user_urn = "urn:li:corpuser:sessionUser"
+
+
+@pytest.fixture(scope="class")
+def custom_user_session():
+    """Fixture to execute setup before and tear down after all tests are run"""
+    admin_session = login_as(admin_user, admin_pass)
+
+    res_data = removeUser(admin_session, user_urn)
+    assert res_data
+    assert "error" not in res_data
+
+    # Test getting the invite token
+    get_invite_token_json = {
+        "query": """query getInviteToken($input: GetInviteTokenInput!) {
+            getInviteToken(input: $input){
+              inviteToken
+            }
+        }""",
+        "variables": {"input": {}},
+    }
+
+    get_invite_token_response = admin_session.post(
+        f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json
+    )
+    get_invite_token_response.raise_for_status()
+    get_invite_token_res_data = get_invite_token_response.json()
+
+    assert get_invite_token_res_data
+    assert get_invite_token_res_data["data"]
+    invite_token = get_invite_token_res_data["data"]["getInviteToken"]["inviteToken"]
+    assert invite_token is not None
+    assert "error" not in invite_token
+
+    # Pass the invite token when creating the user
+    sign_up_json = {
+        "fullName": "Test Session User",
+        "email": "sessionUser",
+        "password": "sessionUser",
+        "title": "Date Engineer",
+        "inviteToken": invite_token,
+    }
+
+    sign_up_response = admin_session.post(
+        f"{get_frontend_url()}/signUp", json=sign_up_json
+    )
+    sign_up_response.raise_for_status()
+    assert sign_up_response
+    assert "error" not in sign_up_response
+    # Sleep for eventual consistency
+    wait_for_writes_to_sync()
+
+    # signUp will override the session cookie to the new user to be signed up.
+    admin_session.cookies.clear()
+    admin_session = login_as(admin_user, admin_pass)
+
+    # Make user created user is there.
+    res_data = listUsers(admin_session)
+    assert res_data["data"]
+    assert res_data["data"]["listUsers"]
+    assert {"username": "sessionUser"} in res_data["data"]["listUsers"]["users"]
+
+    yield login_as(sign_up_json["email"], sign_up_json["password"])
+
+    # Delete created user
+    res_data = removeUser(admin_session, user_urn)
+    assert res_data
+    assert res_data["data"]
+    assert res_data["data"]["removeUser"] is True
+    # Sleep for eventual consistency
+    wait_for_writes_to_sync()
+
+    # Make user created user is not there.
+    res_data = listUsers(admin_session)
+    assert res_data["data"]
+    assert res_data["data"]["listUsers"]
+    assert {"username": "sessionUser"} not in res_data["data"]["listUsers"]["users"]
+
+
+@pytest.mark.dependency()
+def test_soft_delete(graph_client, custom_user_session):
+    # assert initial access
+    assert getUserId(custom_user_session) == {"urn": user_urn}
+
+    graph_client.soft_delete_entity(urn=user_urn)
+    wait_for_writes_to_sync()
+
+    with pytest.raises(HTTPError) as req_info:
+        getUserId(custom_user_session)
+    assert "403 Client Error: Forbidden" in str(req_info.value)
+
+    # undo soft delete
+    graph_client.set_soft_delete_status(urn=user_urn, delete=False)
+    wait_for_writes_to_sync()
+
+
+@pytest.mark.dependency(depends=["test_soft_delete"])
+def test_suspend(graph_client, custom_user_session):
+    # assert initial access
+    assert getUserId(custom_user_session) == {"urn": user_urn}
+
+    graph_client.emit(
+        MetadataChangeProposalWrapper(
+            entityType="corpuser",
+            entityUrn=user_urn,
+            changeType="UPSERT",
+            aspectName="corpUserStatus",
+            aspect=CorpUserStatusClass(
+                status="SUSPENDED",
+                lastModified=AuditStampClass(
+                    time=int(time.time() * 1000.0), actor="urn:li:corpuser:unknown"
+                ),
+            ),
+        )
+    )
+    wait_for_writes_to_sync()
+
+    with pytest.raises(HTTPError) as req_info:
+        getUserId(custom_user_session)
+    assert "403 Client Error: Forbidden" in str(req_info.value)
+
+    # undo suspend
+    graph_client.emit(
+        MetadataChangeProposalWrapper(
+            entityType="corpuser",
+            entityUrn=user_urn,
+            changeType="UPSERT",
+            aspectName="corpUserStatus",
+            aspect=CorpUserStatusClass(
+                status="ACTIVE",
+                lastModified=AuditStampClass(
+                    time=int(time.time() * 1000.0), actor="urn:li:corpuser:unknown"
+                ),
+            ),
+        )
+    )
+    wait_for_writes_to_sync()
+
+
+@pytest.mark.dependency(depends=["test_suspend"])
+def test_hard_delete(graph_client, custom_user_session):
+    # assert initial access
+    assert getUserId(custom_user_session) == {"urn": user_urn}
+
+    graph_client.hard_delete_entity(urn=user_urn)
+    wait_for_writes_to_sync()
+
+    with pytest.raises(HTTPError) as req_info:
+        getUserId(custom_user_session)
+    assert "403 Client Error: Forbidden" in str(req_info.value)
diff --git a/smoke-test/tests/tokens/token_utils.py b/smoke-test/tests/tokens/token_utils.py
new file mode 100644
index 00000000000000..10558e7085de72
--- /dev/null
+++ b/smoke-test/tests/tokens/token_utils.py
@@ -0,0 +1,53 @@
+from tests.utils import get_frontend_url
+
+
+def getUserId(session):
+    response = session.get(
+        f"{get_frontend_url()}/openapi/operations/identity/user/urn",
+        params={"skipCache": "true"},
+    )
+
+    response.raise_for_status()
+    return response.json()
+
+
+def removeUser(session, urn):
+    # Remove user
+    json = {
+        "query": """mutation removeUser($urn: String!) {
+            removeUser(urn: $urn)
+        }""",
+        "variables": {"urn": urn},
+    }
+
+    response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
+
+    response.raise_for_status()
+    return response.json()
+
+
+def listUsers(session):
+    input = {
+        "start": "0",
+        "count": "20",
+    }
+
+    # list users
+    json = {
+        "query": """query listUsers($input: ListUsersInput!) {
+            listUsers(input: $input) {
+              start
+              count
+              total
+              users {
+                username
+              }
+            }
+        }""",
+        "variables": {"input": input},
+    }
+
+    response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
+
+    response.raise_for_status()
+    return response.json()