drm/amd/display: DML21 Reintegration

For various fixes to mcache_row_bytes calculation. Reviewed-by: Alvin Lee <alvin.lee2@amd.com> Signed-off-by: Austin Zheng <Austin.Zheng@amd.com> Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2025-09-09 16:41:16 +00:00 · 2025-01-21 17:10:27 -05:00 · 2025-01-21 17:10:27 -05:00 · 1b30456150
commit 1b30456150
parent e8bffa52e0
16 changed files with 83 additions and 12853 deletions
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
@ -14,11 +14,6 @@

 struct dml2_instance;

-enum dml2_status {
-	dml2_success = 0,
-	dml2_error_generic = 1
-};
-
 enum dml2_project_id {
 	dml2_project_invalid = 0,
 	dml2_project_dcn4x_stage1 = 1,
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
@ -44,7 +44,7 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = {
 	.dppclk_delay_scl_lb_only = 16,
 	.dppclk_delay_cnvc_formatter = 28,
 	.dppclk_delay_cnvc_cursor = 6,
-	.cursor_buffer_size = 24,
+	.cursor_buffer_size = 42,
 	.cursor_chunk_size = 2,
 	.dispclk_delay_subtotal = 125,
 	.max_inter_dcn_tile_repeaters = 8,
@ -327,11 +327,11 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in
 		dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index);

 		memcpy(&programming->plane_programming[plane_index].mcache_allocation,
-				&display_cfg->stage2.mcache_allocations[plane_index],
-				sizeof(struct dml2_mcache_surface_allocation));
+			&display_cfg->stage2.mcache_allocations[plane_index],
+			sizeof(struct dml2_mcache_surface_allocation));
 		total_main_mcaches_required += programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane0 +
-				programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane1 -
-				(programming->plane_programming[plane_index].mcache_allocation.last_slice_sharing.plane0_plane1 ? 1 : 0);
+			programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane1 -
+			(programming->plane_programming[plane_index].mcache_allocation.last_slice_sharing.plane0_plane1 ? 1 : 0);

 		for (pipe_offset = 0; pipe_offset < programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) {
 			// Assign storage for this pipe's register values
@ -374,17 +374,17 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in

 		/* generate mcache allocation, phantoms use identical mcache configuration, but in the MALL set and unique mcache ID's beginning after all main ID's */
 		memcpy(&programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation,
-				&programming->plane_programming[main_plane_index].mcache_allocation,
-				sizeof(struct dml2_mcache_surface_allocation));
+			&programming->plane_programming[main_plane_index].mcache_allocation,
+			sizeof(struct dml2_mcache_surface_allocation));
 		for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane0; mcache_index++) {
 			programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index] += total_main_mcaches_required;
 			programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane0[mcache_index] =
-					programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index];
+				programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index];
 		}
 		for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane1; mcache_index++) {
 			programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index] += total_main_mcaches_required;
 			programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane1[mcache_index] =
-					programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index];
+				programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index];
 		}

 		for (pipe_offset = 0; pipe_offset < programming->plane_programming[main_plane_index].num_dpps_required; pipe_offset++) {
@ -597,8 +597,8 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out
 				dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index);

 				memcpy(&in_out->programming->plane_programming[plane_index].mcache_allocation,
-						&in_out->display_cfg->stage2.mcache_allocations[plane_index],
-						sizeof(struct dml2_mcache_surface_allocation));
+					&in_out->display_cfg->stage2.mcache_allocations[plane_index],
+					sizeof(struct dml2_mcache_surface_allocation));

 				for (pipe_offset = 0; pipe_offset < in_out->programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) {
 					in_out->programming->plane_programming[plane_index].plane_descriptor = &in_out->programming->display_config.plane_descriptors[plane_index];
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h
@ -9,7 +9,4 @@ bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out);
 bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out);
 bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out);
 bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out);
-
-bool core_dcn4_unit_test(void);
-
 #endif
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
@ -2352,6 +2352,7 @@ static void calculate_mcache_row_bytes(
 	if (p->full_vp_height == 0 && p->full_vp_width == 0) {
 		*p->num_mcaches = 0;
 		*p->mcache_row_bytes = 0;
+		*p->mcache_row_bytes_per_channel = 0;
 	} else {
 		blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);

@ -2420,15 +2421,18 @@ static void calculate_mcache_row_bytes(

 		// If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
 		// then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
-		if (p->gpuvm_enable || !p->surf_vert) {
-			*p->mcache_row_bytes = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
+		if (p->gpuvm_enable || p->surf_vert) {
+			*p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
+			*p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans;
 		} else { // horizontal and gpuvm disable
 			*p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
-			*p->mcache_row_bytes = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
+			if (p->mcache_line_size_bytes != 0)
+				*p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
 		}

 		*p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
-		*p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->mcache_size_bytes, 1);
+		if (p->mcache_size_bytes != 0)
+			*p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1);

 		mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
 		*p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
@ -2449,6 +2453,7 @@ static void calculate_mcache_row_bytes(

 #ifdef __DML_VBA_DEBUG__
 	dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
+	dml2_printf("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel);
 	dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
 #endif
 	DML2_ASSERT(*p->num_mcaches > 0);
@ -2465,11 +2470,13 @@ static void calculate_mcache_setting(

 	*p->num_mcaches_l = 0;
 	*p->mcache_row_bytes_l = 0;
+	*p->mcache_row_bytes_per_channel_l = 0;
 	*p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
 	*p->dcc_dram_bw_pref_overhead_factor_l = 1.0;

 	*p->num_mcaches_c = 0;
 	*p->mcache_row_bytes_c = 0;
+	*p->mcache_row_bytes_per_channel_c = 0;
 	*p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
 	*p->dcc_dram_bw_pref_overhead_factor_c = 1.0;

@ -2505,6 +2512,7 @@ static void calculate_mcache_setting(
 	// output
 	l->l_p.num_mcaches = p->num_mcaches_l;
 	l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
+	l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l;
 	l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
 	l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
 	l->l_p.mvmpg_width = &l->mvmpg_width_l;
@ -2514,7 +2522,7 @@ static void calculate_mcache_setting(
 	l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;

 	calculate_mcache_row_bytes(scratch, &l->l_p);
-	dml2_assert(*p->num_mcaches_l > 0);
+	DML2_ASSERT(*p->num_mcaches_l > 0);

 	if (l->is_dual_plane) {
 		l->c_p.num_chans = p->num_chans;
@ -2540,6 +2548,7 @@ static void calculate_mcache_setting(
 		// output
 		l->c_p.num_mcaches = p->num_mcaches_c;
 		l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
+		l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c;
 		l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
 		l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
 		l->c_p.mvmpg_width = &l->mvmpg_width_c;
@ -2549,12 +2558,12 @@ static void calculate_mcache_setting(
 		l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;

 		calculate_mcache_row_bytes(scratch, &l->c_p);
-		dml2_assert(*p->num_mcaches_c > 0);
+		DML2_ASSERT(*p->num_mcaches_c > 0);
 	}

 	// Sharing for iMALL access
-	l->mcache_remainder_l = *p->mcache_row_bytes_l % p->mcache_size_bytes;
-	l->mcache_remainder_c = *p->mcache_row_bytes_c % p->mcache_size_bytes;
+	l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes;
+	l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes;
 	l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
 	l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;

@ -2577,11 +2586,14 @@ static void calculate_mcache_setting(
 	if (l->is_dual_plane) {
 		l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;

-		if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
-			l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
-				(l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
+		/* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */
+		if (l->mcache_remainder_l && l->mcache_remainder_c) {
+			if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
+				l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
+					(l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
+			}
+			*p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
 		}
-		*p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
 	}

 #ifdef __DML_VBA_DEBUG__
@ -2637,9 +2649,6 @@ static void calculate_mcache_setting(
 	// Luma/Chroma combine in the last mcache
 	// In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
 	if (*p->lc_comb_mcache && l->is_dual_plane) {
-		/* if luma and chroma planes share an mcache, increase total chroma mcache count */
-		*p->num_mcaches_c = *p->num_mcaches_c + 1;
-
 		for (n = 0; n < *p->num_mcaches_l - 1; n++)
 			p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
 		p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
@ -3400,7 +3409,7 @@ static void calculate_cursor_req_attributes(
 	} else {
 		if (cursor_width > 0) {
 			dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
-			dml2_assert(0);
+			DML2_ASSERT(0);
 		}
 	}

@ -3443,7 +3452,7 @@ static void calculate_cursor_urgent_burst_factor(
 		CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
 		if (CursorBufferSizeInTime - UrgentLatency <= 0) {
 			*NotEnoughUrgentLatencyHiding = 1;
-			*UrgentBurstFactorCursor = 0;
+			*UrgentBurstFactorCursor = 1;
 		} else {
 			*NotEnoughUrgentLatencyHiding = 0;
 			*UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
@ -3506,7 +3515,7 @@ static void CalculateUrgentBurstFactor(
 	DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
 	if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
 		*NotEnoughUrgentLatencyHiding = 1;
-		*UrgentBurstFactorLuma = 0;
+		*UrgentBurstFactorLuma = 1;
 	} else {
 		*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
 	}
@ -3517,7 +3526,7 @@ static void CalculateUrgentBurstFactor(
 		DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
 		if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
 			*NotEnoughUrgentLatencyHiding = 1;
-			*UrgentBurstFactorChroma = 0;
+			*UrgentBurstFactorChroma = 1;
 		} else {
 			*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
 		}
@ -5391,7 +5400,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 	}

 	/* oto prefetch bw should be always be less than total vactive bw */
-	DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
+	//DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);

 	s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;

@ -5801,7 +5810,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
 		dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
 #endif
-		dml2_assert(*p->dst_y_prefetch < 64);
+		DML2_ASSERT(*p->dst_y_prefetch < 64);

 		unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
 		if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
@ -5994,7 +6003,7 @@ static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned
 		}
 	}
 	if (max_idx <= 0) {
-		dml2_assert(max_idx >= 0);
+		DML2_ASSERT(max_idx >= 0);
 		max_idx = this_plane_idx;
 	}

@ -6341,7 +6350,7 @@ static void calculate_peak_bandwidth_required(
 			dml2_printf("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
 			dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
 #endif
-			dml2_assert(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
+			DML2_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
 		}
 	}
 }
@ -6473,7 +6482,7 @@ static void calculate_immediate_flip_bandwidth_support(
 		dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
 		dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
 #endif
-		dml2_assert(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
+		DML2_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
 	}

 	*frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
@ -6587,7 +6596,7 @@ static void CalculateFlipSchedule(
 #ifdef __DML_VBA_DEBUG__
 		dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
 #endif
-		dml2_assert(l->min_row_time > 0);
+		DML2_ASSERT(l->min_row_time > 0);

 		if (use_lb_flip_bw) {
 			// For mode check, calculation the flip bw requirement with worst case flip time
@ -7163,7 +7172,8 @@ static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, c
 		}
 	}

-	dml2_assert(clk_entry_found);
+	if (!clk_entry_found)
+		DML2_ASSERT(clk_entry_found);
 #if defined(__DML_VBA_DEBUG__)
 	dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
 	dml2_printf("DML::%s: index = %d\n", __func__, i);
@ -8772,11 +8782,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out

 			calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
 			calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
+			calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k];
 			calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
 			calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];

 			calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
 			calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
+			calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k];
 			calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
 			calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];

@ -10430,13 +10442,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex

 	for (k = 0; k < s->num_active_planes; ++k) {
 		unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
-		dml2_assert(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
-		dml2_assert(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
+		DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
+		DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
 					cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
 					cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);

 		if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
-			dml2_assert(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
+			DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);

 		switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
 		case (4):
@ -10462,7 +10474,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 	for (k = 0; k < s->num_active_planes; ++k) {
 		mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
 		mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
-		dml2_assert(mode_lib->mp.Dppclk[k] > 0);
+		DML2_ASSERT(mode_lib->mp.Dppclk[k] > 0);
 	}

 	for (k = 0; k < s->num_active_planes; ++k) {
@ -10474,14 +10486,14 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 	mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
 	mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;

-	dml2_assert(mode_lib->mp.Dcfclk > 0);
-	dml2_assert(mode_lib->mp.FabricClock > 0);
-	dml2_assert(mode_lib->mp.dram_bw_mbps > 0);
-	dml2_assert(mode_lib->mp.uclk_freq_mhz > 0);
-	dml2_assert(mode_lib->mp.GlobalDPPCLK > 0);
-	dml2_assert(mode_lib->mp.Dispclk > 0);
-	dml2_assert(mode_lib->mp.DCFCLKDeepSleep > 0);
-	dml2_assert(s->SOCCLK > 0);
+	DML2_ASSERT(mode_lib->mp.Dcfclk > 0);
+	DML2_ASSERT(mode_lib->mp.FabricClock > 0);
+	DML2_ASSERT(mode_lib->mp.dram_bw_mbps > 0);
+	DML2_ASSERT(mode_lib->mp.uclk_freq_mhz > 0);
+	DML2_ASSERT(mode_lib->mp.GlobalDPPCLK > 0);
+	DML2_ASSERT(mode_lib->mp.Dispclk > 0);
+	DML2_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0);
+	DML2_ASSERT(s->SOCCLK > 0);

 #ifdef __DML_VBA_DEBUG__
 	dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
@ -10869,11 +10881,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex

 			calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
 			calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
+			calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k];
 			calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
 			calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];

 			calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
 			calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
+			calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k];
 			calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
 			calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];

@ -11585,7 +11599,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 			calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
 			calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
 			calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
-			calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[k];
 			calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
 			calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
 			calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
@ -11593,6 +11606,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 			calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
 			calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
 			calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
+			calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0];
 			calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
 			calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
 			calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
@ -12398,7 +12412,7 @@ static void rq_dlg_get_dlg_reg(
 	dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);

 	l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
-	dml2_assert(l->plane_idx < DML2_MAX_PLANES);
+	DML2_ASSERT(l->plane_idx < DML2_MAX_PLANES);

 	l->source_format = dml2_444_8;
 	l->odm_mode = dml2_odm_mode_bypass;
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
@ -484,7 +484,7 @@ struct dml2_core_internal_mode_support {
 	double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK];
 	double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES];
 	double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES];
-	/* oto bw should also be considered when calculating urgent bw to avoid situations oto/equ mismatches between ms and mp */
+	/* oto bw should also be considered when calculating peak urgent bw to avoid situations oto/equ mismatches between ms and mp */
 	double RequiredPrefetchBWOTO[DML2_MAX_PLANES];
 	double cursor_bw[DML2_MAX_PLANES];
 	double prefetch_cursor_bw[DML2_MAX_PLANES];
@ -524,11 +524,13 @@ struct dml2_core_internal_mode_support {

 	unsigned int num_mcaches_l[DML2_MAX_PLANES];
 	unsigned int mcache_row_bytes_l[DML2_MAX_PLANES];
+	unsigned int mcache_row_bytes_per_channel_l[DML2_MAX_PLANES];
 	unsigned int mcache_offsets_l[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1];
 	unsigned int mcache_shift_granularity_l[DML2_MAX_PLANES];

 	unsigned int num_mcaches_c[DML2_MAX_PLANES];
 	unsigned int mcache_row_bytes_c[DML2_MAX_PLANES];
+	unsigned int mcache_row_bytes_per_channel_c[DML2_MAX_PLANES];
 	unsigned int mcache_offsets_c[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1];
 	unsigned int mcache_shift_granularity_c[DML2_MAX_PLANES];

@ -841,11 +843,13 @@ struct dml2_core_internal_mode_program {

 	unsigned int num_mcaches_l[DML2_MAX_PLANES];
 	unsigned int mcache_row_bytes_l[DML2_MAX_PLANES];
+	unsigned int mcache_row_bytes_per_channel_l[DML2_MAX_PLANES];
 	unsigned int mcache_offsets_l[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1];
 	unsigned int mcache_shift_granularity_l[DML2_MAX_PLANES];

 	unsigned int num_mcaches_c[DML2_MAX_PLANES];
 	unsigned int mcache_row_bytes_c[DML2_MAX_PLANES];
+	unsigned int mcache_row_bytes_per_channel_c[DML2_MAX_PLANES];
 	unsigned int mcache_offsets_c[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1];
 	unsigned int mcache_shift_granularity_c[DML2_MAX_PLANES];

@ -1887,6 +1891,7 @@ struct dml2_core_calcs_calculate_mcache_row_bytes_params {
 	// output
 	unsigned int *num_mcaches;
 	unsigned int *mcache_row_bytes;
+	unsigned int *mcache_row_bytes_per_channel;
 	unsigned int *meta_row_width_ub;
 	double *dcc_dram_bw_nom_overhead_factor;
 	double *dcc_dram_bw_pref_overhead_factor;
@ -1966,6 +1971,7 @@ struct dml2_core_calcs_calculate_mcache_setting_params {
 	// output
 	unsigned int *num_mcaches_l;
 	unsigned int *mcache_row_bytes_l;
+	unsigned int *mcache_row_bytes_per_channel_l;
 	unsigned int *mcache_offsets_l;
 	unsigned int *mcache_shift_granularity_l;
 	double *dcc_dram_bw_nom_overhead_factor_l;
@ -1973,6 +1979,7 @@ struct dml2_core_calcs_calculate_mcache_setting_params {

 	unsigned int *num_mcaches_c;
 	unsigned int *mcache_row_bytes_c;
+	unsigned int *mcache_row_bytes_per_channel_c;
 	unsigned int *mcache_offsets_c;
 	unsigned int *mcache_shift_granularity_c;
 	double *dcc_dram_bw_nom_overhead_factor_c;
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
@ -544,7 +544,8 @@ unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_fr
 		}
 	}

-	dml2_assert(clk_entry_found);
+	if (!clk_entry_found)
+		DML2_ASSERT(clk_entry_found);
 #if defined(__DML_VBA_DEBUG__)
 	dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
 	dml2_printf("DML::%s: index = %d\n", __func__, i);
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
@ -212,7 +212,7 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma

 	clock_khz *= 1.0 + margin;

-	divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz));
+	divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz));

 	/* we want to floor here to get higher clock than required rather than lower */
 	if (divider < DFS_DIVIDER_RANGE_2_START) {
@ -307,8 +307,8 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr
 	/* these clocks are optional, so they can fail to map, in which case map all to 0 */
 	if (result) {
 		if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) ||
-				!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) ||
-				!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) {
+			!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) ||
+			!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) {
 			display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0;
 			display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0;
 			display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0;
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h
@ -11,6 +11,4 @@ bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_i
 bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out);
 bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out);

-bool dpmm_dcn4_unit_test(void);
-
 #endif
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c
@ -15,7 +15,7 @@ bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *
 {
 	bool result = false;

-	if (!out)
+	if (out == 0)
 		return false;

 	memset(out, 0, sizeof(struct dml2_mcg_instance));
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
@ -1094,8 +1094,8 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo,

 		if (plane_descriptor->surface.dcc.enable) {
 			mcaches_per_plane += display_config->stage2.mcache_allocations[i].num_mcaches_plane0 +
-					display_config->stage2.mcache_allocations[i].num_mcaches_plane1 -
-					(display_config->stage2.mcache_allocations[i].last_slice_sharing.plane0_plane1 ? 1 : 0);
+				display_config->stage2.mcache_allocations[i].num_mcaches_plane1 -
+				(display_config->stage2.mcache_allocations[i].last_slice_sharing.plane0_plane1 ? 1 : 0);
 		}

 		if (is_bit_set_in_bitfield(mask, (unsigned char)plane_descriptor->stream_index)) {
@ -1113,7 +1113,6 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo,
 				mcaches_per_plane *= 2;
 			}
 		}
-
 		total_mcaches_required += mcaches_per_plane;
 	}

--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c
@ -47,4 +47,3 @@ bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *

 	return in_out->dml2_instance->funcs.build_mcache_programming(in_out);
 }
-
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c
@ -1,354 +0,0 @@
-// SPDX-License-Identifier: MIT
-//
-// Copyright 2024 Advanced Micro Devices, Inc.
-
-#include "dml2_internal_shared_types.h"
-#include "dml_top.h"
-#include "dml2_mcg_factory.h"
-#include "dml2_core_factory.h"
-#include "dml2_dpmm_factory.h"
-#include "dml2_pmo_factory.h"
-#include "dml_top_mcache.h"
-#include "dml2_top_optimization.h"
-#include "dml2_external_lib_deps.h"
-
-unsigned int dml2_get_instance_size_bytes(void)
-{
-	return sizeof(struct dml2_instance);
-}
-
-bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out)
-{
-	struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
-	struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals;
-	struct dml2_core_initialize_in_out core_init_params = { 0 };
-	struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 };
-	struct dml2_pmo_initialize_in_out pmo_init_params = { 0 };
-	bool result = false;
-
-	memset(l, 0, sizeof(struct dml2_initialize_instance_locals));
-	memset(dml, 0, sizeof(struct dml2_instance));
-
-	memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities));
-	memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb));
-
-	dml->project_id = in_out->options.project_id;
-	dml->pmo_options = in_out->options.pmo_options;
-
-	// Initialize All Components
-	result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance);
-
-	if (result)
-		result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance);
-
-	if (result)
-		result = dml2_core_create(in_out->options.project_id, &dml->core_instance);
-
-	if (result) {
-		mcg_build_min_clk_params.soc_bb = &in_out->soc_bb;
-		mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table;
-		result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params);
-	}
-
-	if (result) {
-		core_init_params.project_id = in_out->options.project_id;
-		core_init_params.instance = &dml->core_instance;
-		core_init_params.minimum_clock_table = &dml->min_clk_table;
-		core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb;
-		core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size;
-		core_init_params.ip_caps = &in_out->ip_caps;
-		core_init_params.soc_bb = &in_out->soc_bb;
-		result = dml->core_instance.initialize(&core_init_params);
-
-		if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) {
-			memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities));
-		}
-	}
-
-	if (result)
-		result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance);
-
-	if (result) {
-		pmo_init_params.instance = &dml->pmo_instance;
-		pmo_init_params.soc_bb = &dml->soc_bbox;
-		pmo_init_params.ip_caps = &dml->ip_caps;
-		pmo_init_params.mcg_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries;
-		pmo_init_params.options = &dml->pmo_options;
-		dml->pmo_instance.initialize(&pmo_init_params);
-	}
-
-	return result;
-}
-
-static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config)
-{
-	memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg));
-	out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1;
-}
-
-static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config)
-{
-	memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg));
-	out->stage1.min_clk_index_for_latency = 0;
-}
-
-bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out)
-{
-	struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
-	struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals;
-	struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming;
-
-	bool result = false;
-	bool mcache_success = false;
-
-	memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming));
-
-	setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
-
-	l->mode_support_params.instance = &dml->core_instance;
-	l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
-	l->mode_support_params.min_clk_table = &dml->min_clk_table;
-	l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
-
-	result = dml->core_instance.mode_support(&l->mode_support_params);
-	l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
-
-	if (result) {
-		struct optimization_phase_params mcache_phase =	{
-		.dml = dml,
-		.display_config = &l->base_display_config_with_meta,
-		.test_function = dml2_top_optimization_test_function_mcache,
-		.optimize_function = dml2_top_optimization_optimize_function_mcache,
-		.optimized_display_config = &l->optimized_display_config_with_meta,
-		.all_or_nothing = false,
-		};
-		mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase);
-	}
-
-	/*
-	 * Call DPMM to map all requirements to minimum clock state
-	 */
-	if (result) {
-		l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table;
-		l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta;
-		l->dppm_map_mode_params.programming = dpmm_programming;
-		l->dppm_map_mode_params.soc_bb = &dml->soc_bbox;
-		l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip;
-		result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params);
-	}
-
-	in_out->is_supported = mcache_success;
-	result = result && in_out->is_supported;
-
-	return result;
-}
-
-bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out)
-{
-	struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
-	struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals;
-
-	bool result = false;
-	bool mcache_success = false;
-	bool uclk_pstate_success = false;
-	bool vmin_success = false;
-	bool stutter_success = false;
-	unsigned int i;
-
-	memset(l, 0, sizeof(struct dml2_build_mode_programming_locals));
-	memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming));
-
-	memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg));
-
-	setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
-
-	l->mode_support_params.instance = &dml->core_instance;
-	l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
-	l->mode_support_params.min_clk_table = &dml->min_clk_table;
-	l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
-
-	result = dml->core_instance.mode_support(&l->mode_support_params);
-	l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
-
-	if (!result) {
-		setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
-
-		l->mode_support_params.instance = &dml->core_instance;
-		l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
-		l->mode_support_params.min_clk_table = &dml->min_clk_table;
-		l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
-
-		result = dml->core_instance.mode_support(&l->mode_support_params);
-		l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
-
-		if (!result) {
-			l->informative_params.instance = &dml->core_instance;
-			l->informative_params.programming = in_out->programming;
-			l->informative_params.mode_is_supported = false;
-			dml->core_instance.populate_informative(&l->informative_params);
-
-			return false;
-		}
-
-		/*
-		* Phase 1: Determine minimum clocks to satisfy latency requirements for this mode
-		*/
-		memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params));
-		l->min_clock_for_latency_phase.dml = dml;
-		l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta;
-		l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency;
-		l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency;
-		l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency;
-		l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta;
-		l->min_clock_for_latency_phase.all_or_nothing = false;
-
-		dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase);
-
-		memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
-	}
-
-	/*
-	* Phase 2: Satisfy DCC mcache requirements
-	*/
-	memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params));
-	l->mcache_phase.dml = dml;
-	l->mcache_phase.display_config = &l->base_display_config_with_meta;
-	l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache;
-	l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache;
-	l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta;
-	l->mcache_phase.all_or_nothing = true;
-
-	mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase);
-
-	if (!mcache_success) {
-		l->informative_params.instance = &dml->core_instance;
-		l->informative_params.programming = in_out->programming;
-		l->informative_params.mode_is_supported = false;
-
-		dml->core_instance.populate_informative(&l->informative_params);
-
-		in_out->programming->informative.failed_mcache_validation = true;
-		return false;
-	}
-
-	memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
-
-	/*
-	* Phase 3: Optimize for Pstate
-	*/
-	memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params));
-	l->uclk_pstate_phase.dml = dml;
-	l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta;
-	l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate;
-	l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate;
-	l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate;
-	l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta;
-	l->uclk_pstate_phase.all_or_nothing = true;
-
-	uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase);
-
-	if (uclk_pstate_success) {
-		memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
-		l->base_display_config_with_meta.stage3.success = true;
-	}
-
-	/*
-	* Phase 4: Optimize for Vmin
-	*/
-	memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params));
-	l->vmin_phase.dml = dml;
-	l->vmin_phase.display_config = &l->base_display_config_with_meta;
-	l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin;
-	l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin;
-	l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin;
-	l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta;
-	l->vmin_phase.all_or_nothing = false;
-
-	vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase);
-
-	if (l->optimized_display_config_with_meta.stage4.performed) {
-		/*
-		 * when performed is true, optimization has applied to
-		 * optimized_display_config_with_meta and it has passed mode
-		 * support. However it may or may not pass the test function to
-		 * reach actual Vmin. As long as voltage is optimized even if it
-		 * doesn't reach Vmin level, there is still power benefit so in
-		 * this case we will still copy this optimization into base
-		 * display config.
-		 */
-		memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
-		l->base_display_config_with_meta.stage4.success = vmin_success;
-	}
-
-	/*
-	* Phase 5: Optimize for Stutter
-	*/
-	memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params));
-	l->stutter_phase.dml = dml;
-	l->stutter_phase.display_config = &l->base_display_config_with_meta;
-	l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter;
-	l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter;
-	l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter;
-	l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta;
-	l->stutter_phase.all_or_nothing = true;
-
-	stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase);
-
-	if (stutter_success) {
-		memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
-		l->base_display_config_with_meta.stage5.success = true;
-	}
-
-	/*
-	* Populate mcache programming
-	*/
-	for (i = 0; i < in_out->display_config->num_planes; i++) {
-		in_out->programming->plane_programming[i].mcache_allocation = l->base_display_config_with_meta.stage2.mcache_allocations[i];
-	}
-
-	/*
-	* Call DPMM to map all requirements to minimum clock state
-	*/
-	if (result) {
-		l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table;
-		l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta;
-		l->dppm_map_mode_params.programming = in_out->programming;
-		l->dppm_map_mode_params.soc_bb = &dml->soc_bbox;
-		l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip;
-		result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params);
-		if (!result)
-			in_out->programming->informative.failed_dpmm = true;
-	}
-
-	if (result) {
-		l->mode_programming_params.instance = &dml->core_instance;
-		l->mode_programming_params.display_cfg = &l->base_display_config_with_meta;
-		l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info;
-		l->mode_programming_params.programming = in_out->programming;
-
-		result = dml->core_instance.mode_programming(&l->mode_programming_params);
-		if (!result)
-			in_out->programming->informative.failed_mode_programming = true;
-	}
-
-	if (result) {
-		l->dppm_map_watermarks_params.core = &dml->core_instance;
-		l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta;
-		l->dppm_map_watermarks_params.programming = in_out->programming;
-		result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params);
-	}
-
-	l->informative_params.instance = &dml->core_instance;
-	l->informative_params.programming = in_out->programming;
-	l->informative_params.mode_is_supported = result;
-
-	dml->core_instance.populate_informative(&l->informative_params);
-
-	return result;
-}
-
-bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out)
-{
-	return dml2_top_mcache_build_mcache_programming(in_out);
-}
-
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c
@ -29,8 +29,3 @@ int dml2_printf(const char *format, ...)
 	return 0;
 #endif
 }
-
-void dml2_assert(int condition)
-{
-	//ASSERT(condition);
-}
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
@ -5,11 +5,10 @@
 #ifndef __DML2_DEBUG_H__
 #define __DML2_DEBUG_H__

-#ifdef _DEBUG
-#define DML2_ASSERT(condition) dml2_assert(condition)
-#else
+#ifndef DML2_ASSERT
 #define DML2_ASSERT(condition) ((void)0)
 #endif
+
 /*
 * DML_LOG_FATAL - fatal errors for unrecoverable DML states until a restart.
 * DML_LOG_ERROR - unexpected but recoverable failures inside DML
@ -56,6 +55,5 @@

 int dml2_log_internal(const char *format, ...);
 int dml2_printf(const char *format, ...);
-void dml2_assert(int condition);

 #endif
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
@ -64,7 +64,6 @@ struct dml2_mcg_build_min_clock_table_params_in_out {
 };
 struct dml2_mcg_instance {
 	bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out);
-	bool (*unit_test)(void);
 };

 /*
@ -110,7 +109,6 @@ struct dml2_dpmm_scratch {
 struct dml2_dpmm_instance {
 	bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out);
 	bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out);
-	bool (*unit_test)(void);

 	struct dml2_dpmm_scratch dpmm_scratch;
 };
@ -473,7 +471,6 @@ struct dml2_core_instance {
 	bool (*mode_programming)(struct dml2_core_mode_programming_in_out *in_out);
 	bool (*populate_informative)(struct dml2_core_populate_informative_in_out *in_out);
 	bool (*calculate_mcache_allocation)(struct dml2_calculate_mcache_allocation_in_out *in_out);
-	bool (*unit_test)(void);

 	struct {
 		struct dml2_core_internal_display_mode_lib mode_lib;
@ -721,8 +718,6 @@ struct dml2_pmo_instance {
 	bool (*test_for_stutter)(struct dml2_pmo_test_for_stutter_in_out *in_out);
 	bool (*optimize_for_stutter)(struct dml2_pmo_optimize_for_stutter_in_out *in_out);

-	bool (*unit_test)(void);
-
 	struct dml2_pmo_init_data init_data;
 	struct dml2_pmo_scratch scratch;
 };
@ -947,7 +942,6 @@ struct dml2_top_funcs {
 	bool (*check_mode_supported)(struct dml2_check_mode_supported_in_out *in_out);
 	bool (*build_mode_programming)(struct dml2_build_mode_programming_in_out *in_out);
 	bool (*build_mcache_programming)(struct dml2_build_mcache_programming_in_out *in_out);
-	bool (*unit_test)(void);
 };

 struct dml2_instance {